data-designer-engine 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/engine/__init__.py +2 -0
- data_designer/engine/_version.py +34 -0
- data_designer/engine/analysis/column_profilers/base.py +49 -0
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +153 -0
- data_designer/engine/analysis/column_profilers/registry.py +22 -0
- data_designer/engine/analysis/column_statistics.py +145 -0
- data_designer/engine/analysis/dataset_profiler.py +149 -0
- data_designer/engine/analysis/errors.py +9 -0
- data_designer/engine/analysis/utils/column_statistics_calculations.py +234 -0
- data_designer/engine/analysis/utils/judge_score_processing.py +132 -0
- data_designer/engine/column_generators/__init__.py +2 -0
- data_designer/engine/column_generators/generators/__init__.py +2 -0
- data_designer/engine/column_generators/generators/base.py +122 -0
- data_designer/engine/column_generators/generators/embedding.py +35 -0
- data_designer/engine/column_generators/generators/expression.py +55 -0
- data_designer/engine/column_generators/generators/llm_completion.py +116 -0
- data_designer/engine/column_generators/generators/samplers.py +69 -0
- data_designer/engine/column_generators/generators/seed_dataset.py +144 -0
- data_designer/engine/column_generators/generators/validation.py +140 -0
- data_designer/engine/column_generators/registry.py +60 -0
- data_designer/engine/column_generators/utils/errors.py +15 -0
- data_designer/engine/column_generators/utils/generator_classification.py +43 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +58 -0
- data_designer/engine/column_generators/utils/prompt_renderer.py +100 -0
- data_designer/engine/compiler.py +97 -0
- data_designer/engine/configurable_task.py +71 -0
- data_designer/engine/dataset_builders/artifact_storage.py +283 -0
- data_designer/engine/dataset_builders/column_wise_builder.py +354 -0
- data_designer/engine/dataset_builders/errors.py +15 -0
- data_designer/engine/dataset_builders/multi_column_configs.py +46 -0
- data_designer/engine/dataset_builders/utils/__init__.py +2 -0
- data_designer/engine/dataset_builders/utils/concurrency.py +212 -0
- data_designer/engine/dataset_builders/utils/config_compiler.py +62 -0
- data_designer/engine/dataset_builders/utils/dag.py +62 -0
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +200 -0
- data_designer/engine/dataset_builders/utils/errors.py +15 -0
- data_designer/engine/dataset_builders/utils/progress_tracker.py +122 -0
- data_designer/engine/errors.py +51 -0
- data_designer/engine/model_provider.py +77 -0
- data_designer/engine/models/__init__.py +2 -0
- data_designer/engine/models/errors.py +300 -0
- data_designer/engine/models/facade.py +284 -0
- data_designer/engine/models/factory.py +42 -0
- data_designer/engine/models/litellm_overrides.py +179 -0
- data_designer/engine/models/parsers/__init__.py +2 -0
- data_designer/engine/models/parsers/errors.py +34 -0
- data_designer/engine/models/parsers/parser.py +235 -0
- data_designer/engine/models/parsers/postprocessors.py +93 -0
- data_designer/engine/models/parsers/tag_parsers.py +62 -0
- data_designer/engine/models/parsers/types.py +84 -0
- data_designer/engine/models/recipes/base.py +81 -0
- data_designer/engine/models/recipes/response_recipes.py +293 -0
- data_designer/engine/models/registry.py +151 -0
- data_designer/engine/models/telemetry.py +362 -0
- data_designer/engine/models/usage.py +73 -0
- data_designer/engine/models/utils.py +101 -0
- data_designer/engine/processing/ginja/__init__.py +2 -0
- data_designer/engine/processing/ginja/ast.py +65 -0
- data_designer/engine/processing/ginja/environment.py +463 -0
- data_designer/engine/processing/ginja/exceptions.py +56 -0
- data_designer/engine/processing/ginja/record.py +32 -0
- data_designer/engine/processing/gsonschema/__init__.py +2 -0
- data_designer/engine/processing/gsonschema/exceptions.py +15 -0
- data_designer/engine/processing/gsonschema/schema_transformers.py +83 -0
- data_designer/engine/processing/gsonschema/types.py +10 -0
- data_designer/engine/processing/gsonschema/validators.py +202 -0
- data_designer/engine/processing/processors/base.py +13 -0
- data_designer/engine/processing/processors/drop_columns.py +42 -0
- data_designer/engine/processing/processors/registry.py +25 -0
- data_designer/engine/processing/processors/schema_transform.py +71 -0
- data_designer/engine/processing/utils.py +169 -0
- data_designer/engine/registry/base.py +99 -0
- data_designer/engine/registry/data_designer_registry.py +39 -0
- data_designer/engine/registry/errors.py +12 -0
- data_designer/engine/resources/managed_dataset_generator.py +39 -0
- data_designer/engine/resources/managed_dataset_repository.py +197 -0
- data_designer/engine/resources/managed_storage.py +65 -0
- data_designer/engine/resources/resource_provider.py +77 -0
- data_designer/engine/resources/seed_reader.py +154 -0
- data_designer/engine/sampling_gen/column.py +91 -0
- data_designer/engine/sampling_gen/constraints.py +100 -0
- data_designer/engine/sampling_gen/data_sources/base.py +217 -0
- data_designer/engine/sampling_gen/data_sources/errors.py +12 -0
- data_designer/engine/sampling_gen/data_sources/sources.py +347 -0
- data_designer/engine/sampling_gen/entities/__init__.py +2 -0
- data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +90 -0
- data_designer/engine/sampling_gen/entities/email_address_utils.py +171 -0
- data_designer/engine/sampling_gen/entities/errors.py +10 -0
- data_designer/engine/sampling_gen/entities/national_id_utils.py +102 -0
- data_designer/engine/sampling_gen/entities/person.py +144 -0
- data_designer/engine/sampling_gen/entities/phone_number.py +128 -0
- data_designer/engine/sampling_gen/errors.py +26 -0
- data_designer/engine/sampling_gen/generator.py +122 -0
- data_designer/engine/sampling_gen/jinja_utils.py +64 -0
- data_designer/engine/sampling_gen/people_gen.py +199 -0
- data_designer/engine/sampling_gen/person_constants.py +56 -0
- data_designer/engine/sampling_gen/schema.py +147 -0
- data_designer/engine/sampling_gen/schema_builder.py +61 -0
- data_designer/engine/sampling_gen/utils.py +46 -0
- data_designer/engine/secret_resolver.py +82 -0
- data_designer/engine/testing/__init__.py +12 -0
- data_designer/engine/testing/stubs.py +133 -0
- data_designer/engine/testing/utils.py +20 -0
- data_designer/engine/validation.py +367 -0
- data_designer/engine/validators/__init__.py +19 -0
- data_designer/engine/validators/base.py +38 -0
- data_designer/engine/validators/local_callable.py +39 -0
- data_designer/engine/validators/python.py +254 -0
- data_designer/engine/validators/remote.py +89 -0
- data_designer/engine/validators/sql.py +65 -0
- data_designer_engine-0.4.0.dist-info/METADATA +50 -0
- data_designer_engine-0.4.0.dist-info/RECORD +114 -0
- data_designer_engine-0.4.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
from typing import Iterator
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, ConfigDict
|
|
10
|
+
from typing_extensions import Self
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ValidationOutput(BaseModel):
|
|
14
|
+
is_valid: bool | None
|
|
15
|
+
model_config = ConfigDict(extra="allow")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ValidationResult(BaseModel):
|
|
19
|
+
data: list[ValidationOutput]
|
|
20
|
+
|
|
21
|
+
def __len__(self) -> int:
|
|
22
|
+
return len(self.data)
|
|
23
|
+
|
|
24
|
+
def __getitem__(self, index: int) -> ValidationOutput:
|
|
25
|
+
return self.data[index]
|
|
26
|
+
|
|
27
|
+
def __iter__(self) -> Iterator[ValidationOutput]:
|
|
28
|
+
return iter(self.data)
|
|
29
|
+
|
|
30
|
+
@classmethod
|
|
31
|
+
def empty(cls, size: int) -> Self:
|
|
32
|
+
return cls(data=[ValidationOutput(is_valid=None) for _ in range(size)])
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class BaseValidator(ABC):
|
|
36
|
+
@abstractmethod
|
|
37
|
+
def run_validation(self, data: list[dict]) -> ValidationResult:
|
|
38
|
+
pass
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from data_designer.config.validator_params import LocalCallableValidatorParams
|
|
10
|
+
from data_designer.engine.errors import LocalCallableValidationError
|
|
11
|
+
from data_designer.engine.processing.gsonschema.validators import validate
|
|
12
|
+
from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
|
|
13
|
+
from data_designer.lazy_heavy_imports import pd
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class LocalCallableValidator(BaseValidator):
|
|
22
|
+
def __init__(self, config: LocalCallableValidatorParams):
|
|
23
|
+
self.validation_function = config.validation_function
|
|
24
|
+
self.output_schema = config.output_schema
|
|
25
|
+
|
|
26
|
+
def run_validation(self, data: list[dict]) -> ValidationResult:
|
|
27
|
+
df = pd.DataFrame(data)
|
|
28
|
+
|
|
29
|
+
try:
|
|
30
|
+
result_as_df = self.validation_function(df)
|
|
31
|
+
except Exception as e:
|
|
32
|
+
logger.error(f"Callback validator failed: {e}")
|
|
33
|
+
raise LocalCallableValidationError(str(e))
|
|
34
|
+
|
|
35
|
+
records = result_as_df.to_dict(orient="records")
|
|
36
|
+
result = ValidationResult(data=[ValidationOutput.model_validate(record) for record in records])
|
|
37
|
+
if self.output_schema:
|
|
38
|
+
validate(result.model_dump(mode="json"), self.output_schema, no_extra_properties=True)
|
|
39
|
+
return result
|
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import ast
|
|
7
|
+
import json
|
|
8
|
+
import logging
|
|
9
|
+
import subprocess
|
|
10
|
+
import tempfile
|
|
11
|
+
from collections import defaultdict
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import TYPE_CHECKING
|
|
14
|
+
from uuid import uuid4
|
|
15
|
+
|
|
16
|
+
from pydantic import BaseModel
|
|
17
|
+
from ruff.__main__ import find_ruff_bin
|
|
18
|
+
|
|
19
|
+
from data_designer.config.validator_params import CodeValidatorParams
|
|
20
|
+
from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
|
|
21
|
+
from data_designer.lazy_heavy_imports import pd
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
import pandas as pd
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
PYLINT_ERROR_CATEGORIES_ORDERED = [
|
|
29
|
+
"fatal",
|
|
30
|
+
"error",
|
|
31
|
+
"warning",
|
|
32
|
+
"convention",
|
|
33
|
+
"refactor",
|
|
34
|
+
]
|
|
35
|
+
PYLINT_VALID_LEVELS = {"none", "warning", "convention", "refactor"}
|
|
36
|
+
|
|
37
|
+
TYPE_FROM_SYMBOL = {
|
|
38
|
+
"E": "refactor",
|
|
39
|
+
"F": "error",
|
|
40
|
+
"SIM": "refactor",
|
|
41
|
+
"PLC": "convention",
|
|
42
|
+
"PLE": "error",
|
|
43
|
+
"PLR": "refactor",
|
|
44
|
+
"PLW": "warning",
|
|
45
|
+
"SyntaxError": "fatal",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
PYTHON_MESSAGES_FIELD = "python_linter_messages"
|
|
49
|
+
RECORD_ID_COLUMN_NAME = "internal_code_record_id"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class PythonValidationStat(BaseModel):
|
|
53
|
+
fatal: int = 0
|
|
54
|
+
error: int = 0
|
|
55
|
+
warning: int = 0
|
|
56
|
+
refactor: int = 0
|
|
57
|
+
convention: int = 0
|
|
58
|
+
statement: int = 0
|
|
59
|
+
|
|
60
|
+
@property
|
|
61
|
+
def score(self) -> float:
|
|
62
|
+
# https://pylint.pycqa.org/en/latest/user_guide/configuration/all-options.html#evaluation
|
|
63
|
+
if self.statement == 0: # prevent division by zero down below
|
|
64
|
+
self.statement = max(1, self.statement)
|
|
65
|
+
return max(
|
|
66
|
+
0,
|
|
67
|
+
(
|
|
68
|
+
0
|
|
69
|
+
if self.fatal
|
|
70
|
+
else 10.0
|
|
71
|
+
- ((float(5 * self.error + self.warning + self.refactor + self.convention) / self.statement) * 10)
|
|
72
|
+
),
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class PythonLinterMessage(BaseModel):
|
|
77
|
+
type: str
|
|
78
|
+
symbol: str
|
|
79
|
+
line: int
|
|
80
|
+
column: int
|
|
81
|
+
message: str
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def type_sort_order(self) -> int:
|
|
85
|
+
return PYLINT_ERROR_CATEGORIES_ORDERED.index(self.type)
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
class PythonLinterMessages(BaseModel):
|
|
89
|
+
_messages: list[PythonLinterMessage] = []
|
|
90
|
+
|
|
91
|
+
@property
|
|
92
|
+
def messages(self) -> list[PythonLinterMessage]:
|
|
93
|
+
# Ordered by severity first then by line number
|
|
94
|
+
return sorted(self._messages, key=lambda msg: (msg.type_sort_order, msg.line))
|
|
95
|
+
|
|
96
|
+
def add(self, message: PythonLinterMessage) -> None:
|
|
97
|
+
self._messages.append(message)
|
|
98
|
+
|
|
99
|
+
def get_count_by_type(self) -> dict[str, int]:
|
|
100
|
+
count_by_type = defaultdict(int)
|
|
101
|
+
for message in self.messages:
|
|
102
|
+
count_by_type[message.type] += 1
|
|
103
|
+
return dict(count_by_type)
|
|
104
|
+
|
|
105
|
+
@property
|
|
106
|
+
def is_empty(self) -> bool:
|
|
107
|
+
return len(self.messages) == 0
|
|
108
|
+
|
|
109
|
+
@property
|
|
110
|
+
def severity(self) -> str:
|
|
111
|
+
if self.is_empty:
|
|
112
|
+
return "none"
|
|
113
|
+
return self.messages[0].type
|
|
114
|
+
|
|
115
|
+
@property
|
|
116
|
+
def is_valid(self) -> bool:
|
|
117
|
+
return self.is_empty or self.messages[0].type in PYLINT_VALID_LEVELS
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class PythonValidator(BaseValidator):
|
|
121
|
+
def __init__(self, config: CodeValidatorParams):
|
|
122
|
+
self.config = config
|
|
123
|
+
|
|
124
|
+
def run_validation(self, data: list[dict]) -> ValidationResult:
|
|
125
|
+
df = pd.DataFrame(data)
|
|
126
|
+
|
|
127
|
+
if len(df.columns) > 1:
|
|
128
|
+
raise ValueError("Python validator assumes single column input")
|
|
129
|
+
target_column = df.columns[0]
|
|
130
|
+
|
|
131
|
+
df.loc[:, RECORD_ID_COLUMN_NAME] = [uuid4() for _ in range(df.shape[0])]
|
|
132
|
+
with tempfile.TemporaryDirectory() as temp_dir:
|
|
133
|
+
_ = df.apply(
|
|
134
|
+
self._write_code_to_file,
|
|
135
|
+
args=(target_column, temp_dir),
|
|
136
|
+
axis=1,
|
|
137
|
+
)
|
|
138
|
+
results = self._validate_files_in_path(path=temp_dir)
|
|
139
|
+
|
|
140
|
+
records = df.to_dict(orient="records")
|
|
141
|
+
|
|
142
|
+
ordered_results = []
|
|
143
|
+
for record in records:
|
|
144
|
+
module_id = self._get_module_name(record[RECORD_ID_COLUMN_NAME], target_column)
|
|
145
|
+
result = results.get(module_id)
|
|
146
|
+
if result is not None:
|
|
147
|
+
ordered_results.append(result)
|
|
148
|
+
|
|
149
|
+
return ValidationResult(data=ordered_results)
|
|
150
|
+
|
|
151
|
+
def _validate_files_in_path(self, path: str) -> dict[str, ValidationOutput]:
|
|
152
|
+
lint_results = self._run_linter(path)
|
|
153
|
+
|
|
154
|
+
scores_by_module = self._get_scores(
|
|
155
|
+
{
|
|
156
|
+
module: messages.get_count_by_type()
|
|
157
|
+
| {"statement": self._count_python_statements(f"{path}/{module}.py")}
|
|
158
|
+
for module, messages in lint_results.items()
|
|
159
|
+
}
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
validation_result = {}
|
|
163
|
+
for module, score in scores_by_module.items():
|
|
164
|
+
messages = lint_results.get(module, PythonLinterMessages())
|
|
165
|
+
metadata = {
|
|
166
|
+
"python_linter_score": score,
|
|
167
|
+
"python_linter_severity": messages.severity,
|
|
168
|
+
PYTHON_MESSAGES_FIELD: [m.model_dump() for m in messages.messages],
|
|
169
|
+
}
|
|
170
|
+
validation_result[module] = ValidationOutput(is_valid=messages.is_valid, **metadata)
|
|
171
|
+
return validation_result
|
|
172
|
+
|
|
173
|
+
def _write_code_to_file(self, row: pd.Series, code_column: str, path: str) -> None:
|
|
174
|
+
with open(f"{path}/{self._get_module_name(row[RECORD_ID_COLUMN_NAME], code_column)}.py", "w") as file:
|
|
175
|
+
file.write(row[code_column])
|
|
176
|
+
|
|
177
|
+
@staticmethod
|
|
178
|
+
def _get_module_name(record_id: str, column_name: str) -> str:
|
|
179
|
+
return f"{record_id}_{column_name}"
|
|
180
|
+
|
|
181
|
+
@staticmethod
|
|
182
|
+
def _run_linter(codebase_path: str) -> dict[str, PythonLinterMessages]:
|
|
183
|
+
# Create empty dict for output
|
|
184
|
+
processed = {}
|
|
185
|
+
for file in Path(codebase_path).glob("*.py"):
|
|
186
|
+
processed[file.stem] = PythonLinterMessages()
|
|
187
|
+
|
|
188
|
+
# Run ruff linter with JSON output
|
|
189
|
+
ruff_bin = find_ruff_bin()
|
|
190
|
+
|
|
191
|
+
ruff_exec = subprocess.run(
|
|
192
|
+
[
|
|
193
|
+
ruff_bin,
|
|
194
|
+
"check",
|
|
195
|
+
"--select",
|
|
196
|
+
"E,F6,F7,F8,SIM,PLC,PLE,PLR,PLW",
|
|
197
|
+
"--output-format=json",
|
|
198
|
+
codebase_path,
|
|
199
|
+
],
|
|
200
|
+
text=True,
|
|
201
|
+
capture_output=True,
|
|
202
|
+
check=False,
|
|
203
|
+
cwd=Path.cwd(),
|
|
204
|
+
)
|
|
205
|
+
ruff_output = ruff_exec.stdout
|
|
206
|
+
|
|
207
|
+
# Parse JSON output
|
|
208
|
+
try:
|
|
209
|
+
diagnostics = json.loads(ruff_output)
|
|
210
|
+
except json.JSONDecodeError as e:
|
|
211
|
+
raise RuntimeError(f"Failed to parse ruff JSON output: {e}")
|
|
212
|
+
|
|
213
|
+
if not diagnostics:
|
|
214
|
+
return processed # no errors or warnings
|
|
215
|
+
|
|
216
|
+
for diagnostic in diagnostics:
|
|
217
|
+
filename = diagnostic["filename"]
|
|
218
|
+
code = diagnostic["code"]
|
|
219
|
+
location = diagnostic["location"]
|
|
220
|
+
message = diagnostic["message"]
|
|
221
|
+
|
|
222
|
+
# Extract alphabetic prefix from code for type mapping
|
|
223
|
+
alpha_prefix = "".join(c for c in code if c.isalpha())
|
|
224
|
+
error_type = TYPE_FROM_SYMBOL.get(alpha_prefix, "warning")
|
|
225
|
+
|
|
226
|
+
processed[Path(filename).stem].add(
|
|
227
|
+
PythonLinterMessage(
|
|
228
|
+
type=error_type,
|
|
229
|
+
symbol=code,
|
|
230
|
+
line=location["row"],
|
|
231
|
+
column=location["column"],
|
|
232
|
+
message=message,
|
|
233
|
+
)
|
|
234
|
+
)
|
|
235
|
+
|
|
236
|
+
return processed
|
|
237
|
+
|
|
238
|
+
@staticmethod
|
|
239
|
+
def _get_scores(stats_by_module: dict[str, dict[str, int]]) -> dict[str, float]:
|
|
240
|
+
scores = {}
|
|
241
|
+
for key, item in stats_by_module.items():
|
|
242
|
+
stat = PythonValidationStat(**item)
|
|
243
|
+
scores[key] = stat.score
|
|
244
|
+
return scores
|
|
245
|
+
|
|
246
|
+
@staticmethod
|
|
247
|
+
def _count_python_statements(file_path: str) -> int:
|
|
248
|
+
"""Count the number of statements in a Python file."""
|
|
249
|
+
try:
|
|
250
|
+
with open(file_path, "r", encoding="utf-8") as f:
|
|
251
|
+
tree = ast.parse(f.read())
|
|
252
|
+
return sum(1 for node in ast.walk(tree) if isinstance(node, ast.stmt))
|
|
253
|
+
except Exception:
|
|
254
|
+
return 0
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from httpx_retries import Retry, RetryTransport
|
|
10
|
+
|
|
11
|
+
from data_designer.config.validator_params import RemoteValidatorParams
|
|
12
|
+
from data_designer.engine.errors import RemoteValidationSchemaError
|
|
13
|
+
from data_designer.engine.processing.gsonschema.exceptions import JSONSchemaValidationError
|
|
14
|
+
from data_designer.engine.processing.gsonschema.validators import validate
|
|
15
|
+
from data_designer.engine.validators.base import BaseValidator, ValidationResult
|
|
16
|
+
from data_designer.lazy_heavy_imports import httpx
|
|
17
|
+
|
|
18
|
+
if TYPE_CHECKING:
|
|
19
|
+
import httpx
|
|
20
|
+
|
|
21
|
+
logger = logging.getLogger(__name__)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RemoteEndpointClient:
|
|
25
|
+
"""Client for making parallel HTTP requests to remote endpoints with retry, timeout, and auth support."""
|
|
26
|
+
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
config: RemoteValidatorParams,
|
|
30
|
+
):
|
|
31
|
+
"""
|
|
32
|
+
Initialize the remote endpoint client.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
config: Remote validator parameters
|
|
36
|
+
"""
|
|
37
|
+
self.endpoint_url = config.endpoint_url
|
|
38
|
+
self.output_schema = config.output_schema
|
|
39
|
+
self.timeout = config.timeout
|
|
40
|
+
self.max_retries = config.max_retries
|
|
41
|
+
self.retry_backoff = config.retry_backoff
|
|
42
|
+
|
|
43
|
+
def post_to_remote_endpoint(self, content: dict) -> dict:
|
|
44
|
+
"""
|
|
45
|
+
Make a single HTTP request with retry logic.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
content: The content to be posted to the remote endpoint
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
The JSON response from the remote endpoint
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
httpx.RequestError: If all retry attempts fail
|
|
55
|
+
httpx.HTTPStatusError: If the server returns an error status
|
|
56
|
+
"""
|
|
57
|
+
retry = Retry(
|
|
58
|
+
total=self.max_retries,
|
|
59
|
+
backoff_factor=self.retry_backoff,
|
|
60
|
+
status_forcelist=[429, 500, 502, 503, 504],
|
|
61
|
+
)
|
|
62
|
+
transport = RetryTransport(retry=retry)
|
|
63
|
+
|
|
64
|
+
with httpx.Client(
|
|
65
|
+
timeout=httpx.Timeout(self.timeout),
|
|
66
|
+
transport=transport,
|
|
67
|
+
) as http_client:
|
|
68
|
+
response = http_client.post(
|
|
69
|
+
self.endpoint_url,
|
|
70
|
+
json=content,
|
|
71
|
+
)
|
|
72
|
+
response.raise_for_status()
|
|
73
|
+
|
|
74
|
+
response_json = response.json()
|
|
75
|
+
if self.output_schema:
|
|
76
|
+
try:
|
|
77
|
+
validate(response_json, self.output_schema, no_extra_properties=True)
|
|
78
|
+
except JSONSchemaValidationError as exc:
|
|
79
|
+
raise RemoteValidationSchemaError(str(exc)) from exc
|
|
80
|
+
return response_json
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class RemoteValidator(BaseValidator):
|
|
84
|
+
def __init__(self, config: RemoteValidatorParams):
|
|
85
|
+
self.remote_endpoint_client = RemoteEndpointClient(config=config)
|
|
86
|
+
|
|
87
|
+
def run_validation(self, data: list[dict]) -> ValidationResult:
|
|
88
|
+
result = self.remote_endpoint_client.post_to_remote_endpoint(content={"data": data})
|
|
89
|
+
return ValidationResult.model_validate(result)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import re
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from data_designer.config.utils.code_lang import CodeLang
|
|
11
|
+
from data_designer.config.validator_params import CodeValidatorParams
|
|
12
|
+
from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
|
|
13
|
+
from data_designer.lazy_heavy_imports import pd, sqlfluff
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
import pandas as pd
|
|
17
|
+
import sqlfluff
|
|
18
|
+
|
|
19
|
+
sqlfluff_logger = logging.getLogger("sqlfluff")
|
|
20
|
+
sqlfluff_logger.setLevel(logging.WARNING)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class SQLValidator(BaseValidator):
|
|
24
|
+
def __init__(self, config: CodeValidatorParams):
|
|
25
|
+
self.config = config
|
|
26
|
+
|
|
27
|
+
def run_validation(self, data: list[dict]) -> ValidationResult:
|
|
28
|
+
df = pd.DataFrame(data)
|
|
29
|
+
|
|
30
|
+
if len(df.columns) > 1:
|
|
31
|
+
raise ValueError("SQL validator assumes single column input")
|
|
32
|
+
target_column = df.columns[0]
|
|
33
|
+
|
|
34
|
+
records = df.to_dict(orient="records")
|
|
35
|
+
|
|
36
|
+
results = []
|
|
37
|
+
for record in records:
|
|
38
|
+
result = self._validate_query(record[target_column])
|
|
39
|
+
results.append(result)
|
|
40
|
+
|
|
41
|
+
return ValidationResult(data=results)
|
|
42
|
+
|
|
43
|
+
def _validate_query(self, content: str) -> ValidationResult:
|
|
44
|
+
try:
|
|
45
|
+
result = sqlfluff.lint(
|
|
46
|
+
content,
|
|
47
|
+
dialect=CodeLang.parse_dialect(self.config.code_lang),
|
|
48
|
+
)
|
|
49
|
+
prs_errors = [res for res in result if res["code"].startswith("PRS")]
|
|
50
|
+
error_messages = "\n".join([f"{error['code']}: {error['description']}" for error in prs_errors])
|
|
51
|
+
decimal_pattern = re.compile(r"DECIMAL\(\d+\)")
|
|
52
|
+
decimal_issues = decimal_pattern.findall(content)
|
|
53
|
+
if decimal_issues:
|
|
54
|
+
error_messages += "\nCustom Check: Found DECIMAL definitions without a scale, which may be incorrect."
|
|
55
|
+
if error_messages:
|
|
56
|
+
return ValidationOutput(
|
|
57
|
+
is_valid=False,
|
|
58
|
+
error_messages=error_messages,
|
|
59
|
+
)
|
|
60
|
+
return ValidationOutput(is_valid=True, error_messages="")
|
|
61
|
+
except Exception as e:
|
|
62
|
+
return ValidationOutput(
|
|
63
|
+
is_valid=False,
|
|
64
|
+
error_messages=f"Exception during SQL parsing: {e}",
|
|
65
|
+
)
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-designer-engine
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Generation engine for DataDesigner synthetic data generation
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Classifier: Development Status :: 4 - Beta
|
|
7
|
+
Classifier: Intended Audience :: Developers
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: >=3.10
|
|
16
|
+
Requires-Dist: anyascii<1,>=0.3.3
|
|
17
|
+
Requires-Dist: data-designer-config
|
|
18
|
+
Requires-Dist: duckdb<2,>=1.1.3
|
|
19
|
+
Requires-Dist: faker<21,>=20.1.0
|
|
20
|
+
Requires-Dist: httpx-retries<1,>=0.4.2
|
|
21
|
+
Requires-Dist: httpx<1,>=0.27.2
|
|
22
|
+
Requires-Dist: huggingface-hub<2,>=1.0.1
|
|
23
|
+
Requires-Dist: json-repair<1,>=0.48.0
|
|
24
|
+
Requires-Dist: jsonpath-rust-bindings<2,>=1.0
|
|
25
|
+
Requires-Dist: jsonschema<5,>=4.0.0
|
|
26
|
+
Requires-Dist: litellm<1.80.12,>=1.73.6
|
|
27
|
+
Requires-Dist: lxml<7,>=6.0.2
|
|
28
|
+
Requires-Dist: marko<3,>=2.1.2
|
|
29
|
+
Requires-Dist: networkx<4,>=3.0
|
|
30
|
+
Requires-Dist: ruff<1,>=0.14.10
|
|
31
|
+
Requires-Dist: scipy<2,>=1.11.0
|
|
32
|
+
Requires-Dist: sqlfluff<4,>=3.2.0
|
|
33
|
+
Requires-Dist: tiktoken<1,>=0.8.0
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
# data-designer-engine
|
|
37
|
+
|
|
38
|
+
Generation engine for NeMo Data Designer synthetic data generation framework.
|
|
39
|
+
|
|
40
|
+
This package contains the execution engine that powers Data Designer. It depends on `data-designer-config` and includes heavy dependencies like pandas, numpy, and LLM integration via litellm.
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install data-designer-engine
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
This automatically installs `data-designer-config` as a dependency.
|
|
49
|
+
|
|
50
|
+
See main [README.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/README.md) for more information.
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
data_designer/engine/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
2
|
+
data_designer/engine/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
|
|
3
|
+
data_designer/engine/compiler.py,sha256=4QAeCJjINtH0afSXygdhiKMyq2KIfaDthK3ApZLgrQ0,4152
|
|
4
|
+
data_designer/engine/configurable_task.py,sha256=6R4FPXPzIeK0lqNVSEXzRDtK14B3dFz38lplr-nkvRE,2539
|
|
5
|
+
data_designer/engine/errors.py,sha256=YXI7ny83BQ16sOK43CpTm384hJTKuZkPTEAjlHlDIfA,1303
|
|
6
|
+
data_designer/engine/model_provider.py,sha256=_uU5Bw7yrGlMROjHL4dN1mMTg1eN-LVW5JWcQxovhAA,2823
|
|
7
|
+
data_designer/engine/secret_resolver.py,sha256=srIAnwbTfsDfgzhWojGTR1u8Vx6SY4vSp0_hJU0_i9A,2468
|
|
8
|
+
data_designer/engine/validation.py,sha256=q9wZqCcRAFoW8p1BtkblFQ3CWgeBHT5JTKVCoYlqZeA,14544
|
|
9
|
+
data_designer/engine/analysis/column_statistics.py,sha256=UW14ooahDgeEdkurgj2d0L6DIX4qce1faWSss_2IR6M,5843
|
|
10
|
+
data_designer/engine/analysis/dataset_profiler.py,sha256=DB-c4m4R39PXapY3CsUZvMGM_E-LByRMQMZBrDfIQY0,7323
|
|
11
|
+
data_designer/engine/analysis/errors.py,sha256=QRmvkNjcMpQ5QDlM7YOSbR2key4d6dsiknk994Ewvv4,296
|
|
12
|
+
data_designer/engine/analysis/column_profilers/base.py,sha256=jgUfoR0fQYG5JMo7KaJktNKao0YSQbgO-ge214popd8,1711
|
|
13
|
+
data_designer/engine/analysis/column_profilers/judge_score_profiler.py,sha256=nSkdb7OumaOWFRq64Abiii43G9MgF3OeOvOH9XpdqFg,6572
|
|
14
|
+
data_designer/engine/analysis/column_profilers/registry.py,sha256=yFEE3gwNUzPI8WMGKfNcObtJSs1b3a87GKrL_ksIqBs,923
|
|
15
|
+
data_designer/engine/analysis/utils/column_statistics_calculations.py,sha256=ry0QxRqLFRn7N4OAn6z7TqSAPEGwxiiUEUtsG_bI-98,8958
|
|
16
|
+
data_designer/engine/analysis/utils/judge_score_processing.py,sha256=QkFMHp0WFhxW3YwwmAnKoEFTULSCxnJ2DSkq8v9kiaE,4884
|
|
17
|
+
data_designer/engine/column_generators/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
18
|
+
data_designer/engine/column_generators/registry.py,sha256=c4WKk1XwXCasDd4sR0EJqM0Lb1T1UP87rI3LPgbNaUQ,3101
|
|
19
|
+
data_designer/engine/column_generators/generators/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
20
|
+
data_designer/engine/column_generators/generators/base.py,sha256=QElk5KsaUQ3EYwlv40NcZgQsw3HIkX3YQV_0S3erl7Q,4209
|
|
21
|
+
data_designer/engine/column_generators/generators/embedding.py,sha256=uB0jgHlCgctgIUf9ZfMqG1YThbJ0g-GCX3VdNbdDSko,1407
|
|
22
|
+
data_designer/engine/column_generators/generators/expression.py,sha256=BiQcfVTinvQl3OI9nkdhB9B7FGBueWiHJwxTA8uNVuY,2330
|
|
23
|
+
data_designer/engine/column_generators/generators/llm_completion.py,sha256=gMOOdd0_BY-RLXrArx1u8GL7YJfVvKceTqn_Zg1xHPI,4897
|
|
24
|
+
data_designer/engine/column_generators/generators/samplers.py,sha256=gNzURmu9K8Zb5MHamKvZPIxmWlFgl2W4FIVgaFcy4f0,3371
|
|
25
|
+
data_designer/engine/column_generators/generators/seed_dataset.py,sha256=CoQPbz4Ww7pBLaGw8-CYqIk1sjfkBaoRMKZQexdfgKY,6824
|
|
26
|
+
data_designer/engine/column_generators/generators/validation.py,sha256=YfYbk-8_ZUye0No6_Q7hIqpZv_tunnEZ6HkLSMFXlDE,6659
|
|
27
|
+
data_designer/engine/column_generators/utils/errors.py,sha256=NSAOupF13NU20qRN9_Is5AwiL_8l1IJur0TnuQEiJhw,406
|
|
28
|
+
data_designer/engine/column_generators/utils/generator_classification.py,sha256=XBA_vagEXKBQK54OHANKeHw6Mm2B4RuAmXu0QrRdEEo,1958
|
|
29
|
+
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=gESiqMrQzbbcFpZas0sAAAkrH2DL0Z4Nq5ywBO-pQ6k,2141
|
|
30
|
+
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=LATVAlDYwL7HyM7Nogd6n9XTTk-j9s64o4z0LpKHMhQ,4819
|
|
31
|
+
data_designer/engine/dataset_builders/artifact_storage.py,sha256=CKpTBtJTde7OQvsFZQa1v1autVz5yUxlBHkIKeATFnE,10999
|
|
32
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=UAfl-iejVYqvmVx2anGmtPKfmqztM5o8nvyVzxYrM_0,16581
|
|
33
|
+
data_designer/engine/dataset_builders/errors.py,sha256=gLXtPcGSMBG10PzQ85dOXskdA0mKbBQrHa_VtP9sbVY,400
|
|
34
|
+
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=U4Pg0ETCBq5phRhb2zt8IFa4fRx-aTMakomKOBnrs0U,1660
|
|
35
|
+
data_designer/engine/dataset_builders/utils/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
36
|
+
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=Lga_xd8i3ZAPqJlKCB4GHG7uxWxws1m-UGAz9UeqU_8,8283
|
|
37
|
+
data_designer/engine/dataset_builders/utils/config_compiler.py,sha256=NGI6U0vgG88d5YKj7oW_SIJ4-_fhA6VFhPbjqGRHea4,2441
|
|
38
|
+
data_designer/engine/dataset_builders/utils/dag.py,sha256=RIEI75OtiphkuDl1vfI_MQC1xMiiIg29s-0C_fNZkWQ,2613
|
|
39
|
+
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=IfWd_HcfEzIPhgFp2dJaxNIKRlrPsHqYATFXauvCfaw,8133
|
|
40
|
+
data_designer/engine/dataset_builders/utils/errors.py,sha256=G1MIkQDXguSqHK1EP-60FkG_bys7bJ1UgJnSvcNgtt8,411
|
|
41
|
+
data_designer/engine/dataset_builders/utils/progress_tracker.py,sha256=3zSljzDHwhqgP9IqPUR3XbwC231JvLNWslpmhqKIbUg,4255
|
|
42
|
+
data_designer/engine/models/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
43
|
+
data_designer/engine/models/errors.py,sha256=k9oZnmk8DRD8U2SVKJJRLwrcdsCcVoJiOb_Q7ZyEdvg,12271
|
|
44
|
+
data_designer/engine/models/facade.py,sha256=ckwFxcMHC23-qKU8bdBC0eWKYx6vfVjvp9-0AtCXMX0,12497
|
|
45
|
+
data_designer/engine/models/factory.py,sha256=2NjI0iiGv8ayQ1c249lsJtha4pDmvmtSjdwvlvitRds,1581
|
|
46
|
+
data_designer/engine/models/litellm_overrides.py,sha256=e9IZCFQ6BhNWlOTncm8ErL8w4rtE1_4USh2mtUYxCZI,6207
|
|
47
|
+
data_designer/engine/models/registry.py,sha256=Bid7Mv_ebzbTrlfzN-1wbcFxp_qQwilL0h2iwN5UPJ0,7099
|
|
48
|
+
data_designer/engine/models/telemetry.py,sha256=_VZR6Iatr6-5Hypw3bes5Jr4y7Y3VagxFEVAv36eHcE,12733
|
|
49
|
+
data_designer/engine/models/usage.py,sha256=A0LV9Ycuj_7snOsaqnirs4mlkAjozv2mzj2om2FpDoU,2410
|
|
50
|
+
data_designer/engine/models/utils.py,sha256=Szy3lOg_E14DRAx6U2Dpr3HXPg09xIr3VUnoREiZ1mw,3807
|
|
51
|
+
data_designer/engine/models/parsers/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
52
|
+
data_designer/engine/models/parsers/errors.py,sha256=ODcZ4TOsmZyH4-MoNkKXhjiMm_4gLWPsz90qKtNF9_Q,1053
|
|
53
|
+
data_designer/engine/models/parsers/parser.py,sha256=XkdDt2WEnolvsv2bArq4hhujfJ3kLmG6G2jkRXMYA8c,9489
|
|
54
|
+
data_designer/engine/models/parsers/postprocessors.py,sha256=GwgPAdaz8GNahnXbyzQmvo_fpHZDK8ddmrM7-dn9X48,2896
|
|
55
|
+
data_designer/engine/models/parsers/tag_parsers.py,sha256=HNAIBfXW1Wjdkw4IX-P9sHodir1UUt-4Lp91Tz0XWPA,2036
|
|
56
|
+
data_designer/engine/models/parsers/types.py,sha256=wEt80al1FykbMplZVjJ5uXFtacMx-a9GE4_QoqDJ6Us,2631
|
|
57
|
+
data_designer/engine/models/recipes/base.py,sha256=AQg3Ay_E0hBEVg-sqSNVVZNMJfJ3r1eT14-b9yqymnQ,2630
|
|
58
|
+
data_designer/engine/models/recipes/response_recipes.py,sha256=UX9m-8RTDj3sXkzEdKpkSj5z7jO-fQhdca3MSByb_Js,10189
|
|
59
|
+
data_designer/engine/processing/utils.py,sha256=g82KsdDR20g_isadpmgHnneQSX0W21aCVhkp5TIWEhw,5443
|
|
60
|
+
data_designer/engine/processing/ginja/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
61
|
+
data_designer/engine/processing/ginja/ast.py,sha256=w62yt434RDnJYrcfofIDThGv0C5H9XJE3VHOnxEzJVM,1964
|
|
62
|
+
data_designer/engine/processing/ginja/environment.py,sha256=wJRbzPuUCQGvCi4zS4g8sYzihgu_6fn-tE_nYSL1AoU,18974
|
|
63
|
+
data_designer/engine/processing/ginja/exceptions.py,sha256=o1ogMKtItC336cu_sBWHAGSVhWCKNHEIqd7dNN_13DA,1926
|
|
64
|
+
data_designer/engine/processing/ginja/record.py,sha256=eD6M0CUbEtElYLEtxjyYFlx3yuwHFSgJVWDmsGbTlBQ,1100
|
|
65
|
+
data_designer/engine/processing/gsonschema/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
66
|
+
data_designer/engine/processing/gsonschema/exceptions.py,sha256=iiWHnOtrRs1sEsQ8K5HQUl_M_IS0rTAelHkCgKKKQ3A,425
|
|
67
|
+
data_designer/engine/processing/gsonschema/schema_transformers.py,sha256=P2qmeVBF385Dz2GSbMuQYTGMCIbXgw_R4I7ZqB0wcjQ,3107
|
|
68
|
+
data_designer/engine/processing/gsonschema/types.py,sha256=nCrs0d7NADz0YzflhYmWpmKpq-F3ukUWFIISc_q1Kso,354
|
|
69
|
+
data_designer/engine/processing/gsonschema/validators.py,sha256=ui3PzGjIclI6Hlw48GUDuAJ3_cgyQr4NANMn4NVxBKE,6960
|
|
70
|
+
data_designer/engine/processing/processors/base.py,sha256=bkAQO0yK6ATJ3zTwS7F9FXobenJqydCyfijSP2MM-70,472
|
|
71
|
+
data_designer/engine/processing/processors/drop_columns.py,sha256=xT7ym2pQc-R0-YHIuYDQGFn2uAf74309-pV4H878Wlk,1866
|
|
72
|
+
data_designer/engine/processing/processors/registry.py,sha256=ewuFY8QeXpql5CNTZZa_87aYPGPNv1H0hpJR7CBVuzI,1097
|
|
73
|
+
data_designer/engine/processing/processors/schema_transform.py,sha256=cpN5XAg_YNKpne_Ed3Vhk8_yuoTUiUy_pINgPZF2ASk,2822
|
|
74
|
+
data_designer/engine/registry/base.py,sha256=eACpE7o_c2btiiXrOFJw7o0VvACo7DSqhj8AntkNkCQ,3579
|
|
75
|
+
data_designer/engine/registry/data_designer_registry.py,sha256=mz8ksE49pS1JRVDNubYSxTs0j-8Q6sd08F_dYyTCWSE,1528
|
|
76
|
+
data_designer/engine/registry/errors.py,sha256=k1EaV7egNQwNmRsI8EfymTfeNprcDutPf2M6Vc1nbn8,350
|
|
77
|
+
data_designer/engine/resources/managed_dataset_generator.py,sha256=2wGc-tH5usXAPXgDkXzslLsCkAsAQgYa3uIYJC5_Oa0,1495
|
|
78
|
+
data_designer/engine/resources/managed_dataset_repository.py,sha256=lx8NTtAPxheZdqkgilYSmqZv4Nd_CeHXXUaXHzGLLVk,7684
|
|
79
|
+
data_designer/engine/resources/managed_storage.py,sha256=8tLJjKGvDbuHnsESL2VZVu9vfEH3--OLZaiZe-LZo_8,2120
|
|
80
|
+
data_designer/engine/resources/resource_provider.py,sha256=1D-a4g1s5r_ECZE5-mR4TwFLxWPH8GLcaBZnd-j51E8,3047
|
|
81
|
+
data_designer/engine/resources/seed_reader.py,sha256=GQiOqf9t-yRag2g5Io3-kQPhpyKJbXgHn2YTUoAgftI,5717
|
|
82
|
+
data_designer/engine/sampling_gen/column.py,sha256=0aQzeJtcM0DNEaarG1ybXV4LLJH0iiOaXvi46Ay4qOE,3987
|
|
83
|
+
data_designer/engine/sampling_gen/constraints.py,sha256=AvFoyZ1QU--R9kGyIaPHClm3mG_ZoPuOE3IQQqYUPqw,3157
|
|
84
|
+
data_designer/engine/sampling_gen/errors.py,sha256=42shYMUNk5bd3FxTOCsBWXa7jlgMZ1ZyE9yyhFzwE7g,869
|
|
85
|
+
data_designer/engine/sampling_gen/generator.py,sha256=olwpzBwSNEerppReBzRXlcoO3Ts3fZxEwVki5Hem50Y,5501
|
|
86
|
+
data_designer/engine/sampling_gen/jinja_utils.py,sha256=DMMunGEonyXUaKd_WyAg9yo39RL65346DYTUJKp7dP4,2136
|
|
87
|
+
data_designer/engine/sampling_gen/people_gen.py,sha256=vplDwZ66VnjMM7AGX0odKPXY__cktlC-nW-z6aMHCKc,8417
|
|
88
|
+
data_designer/engine/sampling_gen/person_constants.py,sha256=jp6SJ9NwAObu31wqE7WC6hBRUGEAOelaDUmqbxkLpcs,1202
|
|
89
|
+
data_designer/engine/sampling_gen/schema.py,sha256=qdgKGNFwdiHvYl4ZhjRv0P8857wAdDKiekpTH8nL43Y,6240
|
|
90
|
+
data_designer/engine/sampling_gen/schema_builder.py,sha256=PvVyWbo3T8zgiVonecD1ST10uwNGO02KtDcczonhCRE,2313
|
|
91
|
+
data_designer/engine/sampling_gen/utils.py,sha256=wdgTQgsKoQWCqE3rnocCZdzHXWWCkD7nR3n9rmQ6C9w,1500
|
|
92
|
+
data_designer/engine/sampling_gen/data_sources/base.py,sha256=zUG5XTplD5pgHh4ytCMFumeuU2O8jr39bxgpGaA3oVc,7447
|
|
93
|
+
data_designer/engine/sampling_gen/data_sources/errors.py,sha256=_9rbwUpaz0Pd2Ods4AVDQ7Uq4JvPyfHhTp51BdtJDto,367
|
|
94
|
+
data_designer/engine/sampling_gen/data_sources/sources.py,sha256=53KVPp7REjNKA0rajGmT_tBkxwQqwrcIKhcijBGcfcs,13647
|
|
95
|
+
data_designer/engine/sampling_gen/entities/__init__.py,sha256=ObZ6NUPeEvvpGTJ5WIGKUyIrIjaI747OM6ErweRtHxQ,137
|
|
96
|
+
data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py,sha256=r8qXWe8EquJognihPNGzma4fFuSQAAZHlkYVsGmcX2w,2006
|
|
97
|
+
data_designer/engine/sampling_gen/entities/email_address_utils.py,sha256=THfD7muq5tMHkRWOATN-N3iSFgkKjT4e8hKquDFMTlU,5272
|
|
98
|
+
data_designer/engine/sampling_gen/entities/errors.py,sha256=SbtwwG6JgoY4k6pq2-y-lD60nX_pqjf5QftmwgXt0us,352
|
|
99
|
+
data_designer/engine/sampling_gen/entities/national_id_utils.py,sha256=XUFB6RhfLGFQUNyy0B6BSgtrG9NdEnIjfSALBwJplho,2652
|
|
100
|
+
data_designer/engine/sampling_gen/entities/person.py,sha256=9S-xAj6_8ZaFX4G_I7CMMKN2Ju_0YeDSnq1ajIIAdhE,5719
|
|
101
|
+
data_designer/engine/sampling_gen/entities/phone_number.py,sha256=dGY5LRwCz19RBH0mJDTpnBb0a98piDSNgkQRemgwqV0,4818
|
|
102
|
+
data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet,sha256=L6G4laXExB7uRCWHlF4XGDk0yMh41jbDnp9LIy7jNHM,576064
|
|
103
|
+
data_designer/engine/testing/__init__.py,sha256=ICYoOcewhvzZmuaF4A8fn-LDaaOBGhBQf-RQ9QyNdhs,396
|
|
104
|
+
data_designer/engine/testing/stubs.py,sha256=_fmsaqIVY3BLlXHZg3oJ-yqhM61bPWcjgXE5SQSZrvU,3917
|
|
105
|
+
data_designer/engine/testing/utils.py,sha256=a9LEgK827cnIzHEkgXOdgywrKDLBE36cyttrpG1ctT4,973
|
|
106
|
+
data_designer/engine/validators/__init__.py,sha256=uT0CTJF9Ce97zoAdMSWvfYn7mO5ja0lIgyPRKJLcsOU,693
|
|
107
|
+
data_designer/engine/validators/base.py,sha256=XfDDMMP0PusoKAjM9rXdIYkyWlLiQPAJChMgtkcdspw,1005
|
|
108
|
+
data_designer/engine/validators/local_callable.py,sha256=JaL-yOXrTFpubiO2QlSt4QbiJzD_ddChmfcHyMhbgaQ,1531
|
|
109
|
+
data_designer/engine/validators/python.py,sha256=omXjwMaomQYiyq4g6XqKt2wexVuI_rWue9Dk-CYc-do,8039
|
|
110
|
+
data_designer/engine/validators/remote.py,sha256=rythhIrH2GvqncMQeF3FiJa9Om0KZWeK3cWjW-ZubaM,3077
|
|
111
|
+
data_designer/engine/validators/sql.py,sha256=AMaEdA-gj9j0zwVp809x3ycKltd51wVEhI8mMYGyxd4,2408
|
|
112
|
+
data_designer_engine-0.4.0.dist-info/METADATA,sha256=hHuNlKxfNErQUPbmwmBkux0M2q9ebuFna97Xoe8y2lc,1873
|
|
113
|
+
data_designer_engine-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
114
|
+
data_designer_engine-0.4.0.dist-info/RECORD,,
|