data-designer 0.3.4__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/__init__.py +2 -0
- data_designer/_version.py +2 -2
- data_designer/cli/__init__.py +2 -0
- data_designer/cli/commands/download.py +2 -0
- data_designer/cli/commands/list.py +2 -0
- data_designer/cli/commands/models.py +2 -0
- data_designer/cli/commands/providers.py +2 -0
- data_designer/cli/commands/reset.py +2 -0
- data_designer/cli/controllers/__init__.py +2 -0
- data_designer/cli/controllers/download_controller.py +2 -0
- data_designer/cli/controllers/model_controller.py +6 -1
- data_designer/cli/controllers/provider_controller.py +6 -1
- data_designer/cli/forms/__init__.py +2 -0
- data_designer/cli/forms/builder.py +2 -0
- data_designer/cli/forms/field.py +2 -0
- data_designer/cli/forms/form.py +2 -0
- data_designer/cli/forms/model_builder.py +2 -0
- data_designer/cli/forms/provider_builder.py +2 -0
- data_designer/cli/main.py +2 -0
- data_designer/cli/repositories/__init__.py +2 -0
- data_designer/cli/repositories/base.py +2 -0
- data_designer/cli/repositories/model_repository.py +2 -0
- data_designer/cli/repositories/persona_repository.py +2 -0
- data_designer/cli/repositories/provider_repository.py +2 -0
- data_designer/cli/services/__init__.py +2 -0
- data_designer/cli/services/download_service.py +2 -0
- data_designer/cli/services/model_service.py +2 -0
- data_designer/cli/services/provider_service.py +2 -0
- data_designer/cli/ui.py +2 -0
- data_designer/cli/utils.py +2 -0
- data_designer/config/analysis/column_profilers.py +2 -0
- data_designer/config/analysis/column_statistics.py +8 -5
- data_designer/config/analysis/dataset_profiler.py +9 -3
- data_designer/config/analysis/utils/errors.py +2 -0
- data_designer/config/analysis/utils/reporting.py +7 -3
- data_designer/config/column_configs.py +77 -7
- data_designer/config/column_types.py +33 -36
- data_designer/config/dataset_builders.py +2 -0
- data_designer/config/default_model_settings.py +1 -0
- data_designer/config/errors.py +2 -0
- data_designer/config/exports.py +2 -0
- data_designer/config/interface.py +3 -2
- data_designer/config/models.py +7 -2
- data_designer/config/preview_results.py +7 -3
- data_designer/config/processors.py +2 -0
- data_designer/config/run_config.py +2 -0
- data_designer/config/sampler_constraints.py +2 -0
- data_designer/config/sampler_params.py +7 -2
- data_designer/config/seed.py +2 -0
- data_designer/config/seed_source.py +7 -2
- data_designer/config/seed_source_types.py +2 -0
- data_designer/config/utils/constants.py +2 -0
- data_designer/config/utils/errors.py +2 -0
- data_designer/config/utils/info.py +2 -0
- data_designer/config/utils/io_helpers.py +8 -3
- data_designer/config/utils/misc.py +2 -2
- data_designer/config/utils/numerical_helpers.py +2 -0
- data_designer/config/utils/type_helpers.py +2 -0
- data_designer/config/utils/visualization.py +8 -4
- data_designer/config/validator_params.py +2 -0
- data_designer/engine/analysis/column_profilers/base.py +9 -8
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
- data_designer/engine/analysis/column_profilers/registry.py +2 -0
- data_designer/engine/analysis/column_statistics.py +5 -2
- data_designer/engine/analysis/dataset_profiler.py +12 -9
- data_designer/engine/analysis/errors.py +2 -0
- data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
- data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
- data_designer/engine/column_generators/generators/base.py +26 -14
- data_designer/engine/column_generators/generators/embedding.py +4 -11
- data_designer/engine/column_generators/generators/expression.py +7 -16
- data_designer/engine/column_generators/generators/llm_completion.py +11 -37
- data_designer/engine/column_generators/generators/samplers.py +8 -14
- data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
- data_designer/engine/column_generators/generators/validation.py +8 -20
- data_designer/engine/column_generators/registry.py +2 -0
- data_designer/engine/column_generators/utils/errors.py +2 -0
- data_designer/engine/column_generators/utils/generator_classification.py +2 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
- data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
- data_designer/engine/compiler.py +3 -6
- data_designer/engine/configurable_task.py +12 -13
- data_designer/engine/dataset_builders/artifact_storage.py +87 -8
- data_designer/engine/dataset_builders/column_wise_builder.py +32 -34
- data_designer/engine/dataset_builders/errors.py +2 -0
- data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
- data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
- data_designer/engine/dataset_builders/utils/dag.py +7 -2
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +9 -6
- data_designer/engine/dataset_builders/utils/errors.py +2 -0
- data_designer/engine/errors.py +2 -0
- data_designer/engine/model_provider.py +2 -0
- data_designer/engine/models/errors.py +23 -31
- data_designer/engine/models/facade.py +12 -9
- data_designer/engine/models/factory.py +42 -0
- data_designer/engine/models/litellm_overrides.py +16 -11
- data_designer/engine/models/parsers/errors.py +2 -0
- data_designer/engine/models/parsers/parser.py +2 -2
- data_designer/engine/models/parsers/postprocessors.py +1 -0
- data_designer/engine/models/parsers/tag_parsers.py +2 -0
- data_designer/engine/models/parsers/types.py +2 -0
- data_designer/engine/models/recipes/base.py +2 -0
- data_designer/engine/models/recipes/response_recipes.py +2 -0
- data_designer/engine/models/registry.py +11 -18
- data_designer/engine/models/telemetry.py +6 -2
- data_designer/engine/processing/ginja/ast.py +2 -0
- data_designer/engine/processing/ginja/environment.py +2 -0
- data_designer/engine/processing/ginja/exceptions.py +2 -0
- data_designer/engine/processing/ginja/record.py +2 -0
- data_designer/engine/processing/gsonschema/exceptions.py +9 -2
- data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
- data_designer/engine/processing/gsonschema/types.py +2 -0
- data_designer/engine/processing/gsonschema/validators.py +10 -6
- data_designer/engine/processing/processors/base.py +1 -5
- data_designer/engine/processing/processors/drop_columns.py +7 -10
- data_designer/engine/processing/processors/registry.py +2 -0
- data_designer/engine/processing/processors/schema_transform.py +7 -10
- data_designer/engine/processing/utils.py +7 -3
- data_designer/engine/registry/base.py +2 -0
- data_designer/engine/registry/data_designer_registry.py +2 -0
- data_designer/engine/registry/errors.py +2 -0
- data_designer/engine/resources/managed_dataset_generator.py +6 -2
- data_designer/engine/resources/managed_dataset_repository.py +8 -5
- data_designer/engine/resources/managed_storage.py +2 -0
- data_designer/engine/resources/resource_provider.py +8 -1
- data_designer/engine/resources/seed_reader.py +7 -2
- data_designer/engine/sampling_gen/column.py +2 -0
- data_designer/engine/sampling_gen/constraints.py +8 -2
- data_designer/engine/sampling_gen/data_sources/base.py +10 -7
- data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
- data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
- data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
- data_designer/engine/sampling_gen/entities/errors.py +2 -0
- data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
- data_designer/engine/sampling_gen/entities/person.py +2 -0
- data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
- data_designer/engine/sampling_gen/errors.py +2 -0
- data_designer/engine/sampling_gen/generator.py +5 -4
- data_designer/engine/sampling_gen/jinja_utils.py +7 -3
- data_designer/engine/sampling_gen/people_gen.py +7 -7
- data_designer/engine/sampling_gen/person_constants.py +2 -0
- data_designer/engine/sampling_gen/schema.py +5 -1
- data_designer/engine/sampling_gen/schema_builder.py +2 -0
- data_designer/engine/sampling_gen/utils.py +7 -1
- data_designer/engine/secret_resolver.py +2 -0
- data_designer/engine/validation.py +2 -2
- data_designer/engine/validators/__init__.py +2 -0
- data_designer/engine/validators/base.py +2 -0
- data_designer/engine/validators/local_callable.py +7 -2
- data_designer/engine/validators/python.py +7 -1
- data_designer/engine/validators/remote.py +7 -1
- data_designer/engine/validators/sql.py +8 -3
- data_designer/errors.py +2 -0
- data_designer/essentials/__init__.py +2 -0
- data_designer/interface/data_designer.py +23 -17
- data_designer/interface/errors.py +2 -0
- data_designer/interface/results.py +5 -2
- data_designer/lazy_heavy_imports.py +54 -0
- data_designer/logging.py +2 -0
- data_designer/plugins/__init__.py +2 -0
- data_designer/plugins/errors.py +2 -0
- data_designer/plugins/plugin.py +0 -1
- data_designer/plugins/registry.py +2 -0
- data_designer/plugins/testing/__init__.py +2 -0
- data_designer/plugins/testing/stubs.py +21 -43
- data_designer/plugins/testing/utils.py +2 -0
- {data_designer-0.3.4.dist-info → data_designer-0.3.5.dist-info}/METADATA +12 -5
- data_designer-0.3.5.dist-info/RECORD +196 -0
- data_designer-0.3.4.dist-info/RECORD +0 -194
- {data_designer-0.3.4.dist-info → data_designer-0.3.5.dist-info}/WHEEL +0 -0
- {data_designer-0.3.4.dist-info → data_designer-0.3.5.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.4.dist-info → data_designer-0.3.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
from pathlib import Path
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
8
9
|
|
|
9
10
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
10
11
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
12
|
+
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
11
13
|
from data_designer.config.default_model_settings import (
|
|
12
14
|
get_default_model_configs,
|
|
13
15
|
get_default_model_providers_missing_api_keys,
|
|
@@ -29,14 +31,10 @@ from data_designer.config.utils.constants import (
|
|
|
29
31
|
PREDEFINED_PROVIDERS,
|
|
30
32
|
)
|
|
31
33
|
from data_designer.config.utils.info import InfoType, InterfaceInfo
|
|
32
|
-
from data_designer.engine.analysis.dataset_profiler import
|
|
33
|
-
DataDesignerDatasetProfiler,
|
|
34
|
-
DatasetProfilerConfig,
|
|
35
|
-
)
|
|
34
|
+
from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig
|
|
36
35
|
from data_designer.engine.compiler import compile_data_designer_config
|
|
37
36
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
38
37
|
from data_designer.engine.dataset_builders.column_wise_builder import ColumnWiseDatasetBuilder
|
|
39
|
-
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
40
38
|
from data_designer.engine.model_provider import resolve_model_provider_registry
|
|
41
39
|
from data_designer.engine.resources.managed_storage import init_managed_blob_storage
|
|
42
40
|
from data_designer.engine.resources.resource_provider import ResourceProvider, create_resource_provider
|
|
@@ -58,10 +56,17 @@ from data_designer.interface.errors import (
|
|
|
58
56
|
DataDesignerProfilingError,
|
|
59
57
|
)
|
|
60
58
|
from data_designer.interface.results import DatasetCreationResults
|
|
59
|
+
from data_designer.lazy_heavy_imports import pd
|
|
61
60
|
from data_designer.logging import RandomEmoji
|
|
62
61
|
from data_designer.plugins.plugin import PluginType
|
|
63
62
|
from data_designer.plugins.registry import PluginRegistry
|
|
64
63
|
|
|
64
|
+
if TYPE_CHECKING:
|
|
65
|
+
import pandas as pd
|
|
66
|
+
|
|
67
|
+
logger = logging.getLogger(__name__)
|
|
68
|
+
|
|
69
|
+
|
|
65
70
|
DEFAULT_SECRET_RESOLVER = CompositeResolver([EnvironmentResolver(), PlaintextResolver()])
|
|
66
71
|
|
|
67
72
|
DEFAULT_SEED_READERS = [
|
|
@@ -72,8 +77,6 @@ DEFAULT_SEED_READERS = [
|
|
|
72
77
|
for plugin in PluginRegistry().get_plugins(PluginType.SEED_READER):
|
|
73
78
|
DEFAULT_SEED_READERS.append(plugin.impl_cls())
|
|
74
79
|
|
|
75
|
-
logger = logging.getLogger(__name__)
|
|
76
|
-
|
|
77
80
|
|
|
78
81
|
class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
79
82
|
"""Main interface for creating datasets with Data Designer.
|
|
@@ -162,7 +165,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
162
165
|
|
|
163
166
|
resource_provider = self._create_resource_provider(dataset_name, config_builder)
|
|
164
167
|
|
|
165
|
-
builder = self._create_dataset_builder(config_builder, resource_provider)
|
|
168
|
+
builder = self._create_dataset_builder(config_builder.build(), resource_provider)
|
|
166
169
|
|
|
167
170
|
try:
|
|
168
171
|
builder.build(num_records=num_records)
|
|
@@ -180,6 +183,12 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
180
183
|
|
|
181
184
|
dataset_metadata = resource_provider.get_dataset_metadata()
|
|
182
185
|
|
|
186
|
+
# Update metadata with column statistics from analysis
|
|
187
|
+
if analysis:
|
|
188
|
+
builder.artifact_storage.update_metadata(
|
|
189
|
+
{"column_statistics": [stat.model_dump(mode="json") for stat in analysis.column_statistics]}
|
|
190
|
+
)
|
|
191
|
+
|
|
183
192
|
return DatasetCreationResults(
|
|
184
193
|
artifact_storage=builder.artifact_storage,
|
|
185
194
|
analysis=analysis,
|
|
@@ -210,7 +219,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
210
219
|
logger.info(f"{RandomEmoji.previewing()} Preview generation in progress")
|
|
211
220
|
|
|
212
221
|
resource_provider = self._create_resource_provider("preview-dataset", config_builder)
|
|
213
|
-
builder = self._create_dataset_builder(config_builder, resource_provider)
|
|
222
|
+
builder = self._create_dataset_builder(config_builder.build(), resource_provider)
|
|
214
223
|
|
|
215
224
|
try:
|
|
216
225
|
raw_dataset = builder.build_preview(num_records=num_records)
|
|
@@ -274,7 +283,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
274
283
|
InvalidConfigError: If the configuration is invalid.
|
|
275
284
|
"""
|
|
276
285
|
resource_provider = self._create_resource_provider("validate-configuration", config_builder)
|
|
277
|
-
compile_data_designer_config(config_builder, resource_provider)
|
|
286
|
+
compile_data_designer_config(config_builder.build(), resource_provider)
|
|
278
287
|
|
|
279
288
|
def get_default_model_configs(self) -> list[ModelConfig]:
|
|
280
289
|
"""Get the default model configurations.
|
|
@@ -339,14 +348,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
339
348
|
|
|
340
349
|
def _create_dataset_builder(
|
|
341
350
|
self,
|
|
342
|
-
|
|
351
|
+
data_designer_config: DataDesignerConfig,
|
|
343
352
|
resource_provider: ResourceProvider,
|
|
344
353
|
) -> ColumnWiseDatasetBuilder:
|
|
345
|
-
config = compile_data_designer_config(config_builder, resource_provider)
|
|
346
|
-
|
|
347
354
|
return ColumnWiseDatasetBuilder(
|
|
348
|
-
|
|
349
|
-
processor_configs=config.processors or [],
|
|
355
|
+
data_designer_config=data_designer_config,
|
|
350
356
|
resource_provider=resource_provider,
|
|
351
357
|
)
|
|
352
358
|
|
|
@@ -4,8 +4,7 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
from pathlib import Path
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
9
8
|
|
|
10
9
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
11
10
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
@@ -13,6 +12,10 @@ from data_designer.config.dataset_metadata import DatasetMetadata
|
|
|
13
12
|
from data_designer.config.utils.visualization import WithRecordSamplerMixin
|
|
14
13
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
15
14
|
from data_designer.engine.dataset_builders.errors import ArtifactStorageError
|
|
15
|
+
from data_designer.lazy_heavy_imports import pd
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import pandas as pd
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
class DatasetCreationResults(WithRecordSamplerMixin):
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Lazy imports facade for heavy third-party dependencies.
|
|
6
|
+
|
|
7
|
+
This module provides a centralized facade that lazily imports heavy dependencies
|
|
8
|
+
only when accessed, significantly improving import performance.
|
|
9
|
+
|
|
10
|
+
Usage:
|
|
11
|
+
from data_designer.lazy_heavy_imports import pd, np, faker, litellm
|
|
12
|
+
|
|
13
|
+
df = pd.DataFrame(...)
|
|
14
|
+
arr = np.array([1, 2, 3])
|
|
15
|
+
fake = faker.Faker()
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import importlib
|
|
21
|
+
|
|
22
|
+
# Mapping of lazy import names to their actual module paths
|
|
23
|
+
_LAZY_IMPORTS = {
|
|
24
|
+
"pd": "pandas",
|
|
25
|
+
"np": "numpy",
|
|
26
|
+
"pq": "pyarrow.parquet",
|
|
27
|
+
"pa": "pyarrow",
|
|
28
|
+
"faker": "faker",
|
|
29
|
+
"litellm": "litellm",
|
|
30
|
+
"sqlfluff": "sqlfluff",
|
|
31
|
+
"httpx": "httpx",
|
|
32
|
+
"duckdb": "duckdb",
|
|
33
|
+
"nx": "networkx",
|
|
34
|
+
"scipy": "scipy",
|
|
35
|
+
"jsonschema": "jsonschema",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def __getattr__(name: str) -> object:
|
|
40
|
+
"""Lazily import heavy third-party dependencies when accessed.
|
|
41
|
+
|
|
42
|
+
This allows fast imports of data_designer while deferring loading of heavy
|
|
43
|
+
libraries until they're actually needed.
|
|
44
|
+
"""
|
|
45
|
+
if name in _LAZY_IMPORTS:
|
|
46
|
+
module_name = _LAZY_IMPORTS[name]
|
|
47
|
+
return importlib.import_module(module_name)
|
|
48
|
+
|
|
49
|
+
raise AttributeError(f"module 'data_designer.lazy_heavy_imports' has no attribute {name!r}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def __dir__() -> list[str]:
|
|
53
|
+
"""Return list of available lazy imports."""
|
|
54
|
+
return list(_LAZY_IMPORTS.keys())
|
data_designer/logging.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.plugins.plugin import Plugin, PluginType
|
|
5
7
|
|
|
6
8
|
__all__ = ["Plugin", "PluginType"]
|
data_designer/plugins/errors.py
CHANGED
data_designer/plugins/plugin.py
CHANGED
|
@@ -70,7 +70,6 @@ class Plugin(BaseModel):
|
|
|
70
70
|
..., description="The fully-qualified name o the config class object, e.g. 'my_plugin.config.MyConfig'"
|
|
71
71
|
)
|
|
72
72
|
plugin_type: PluginType = Field(..., description="The type of plugin")
|
|
73
|
-
emoji: str = Field(default="🔌", description="The emoji to use in logs related to the plugin")
|
|
74
73
|
|
|
75
74
|
@property
|
|
76
75
|
def config_type_as_class_name(self) -> str:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.plugins.testing.utils import assert_valid_plugin
|
|
5
7
|
|
|
6
8
|
__all__ = [
|
|
@@ -1,11 +1,13 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from typing import Literal
|
|
5
7
|
|
|
6
8
|
from data_designer.config.base import ConfigBase
|
|
7
9
|
from data_designer.config.column_configs import SingleColumnConfig
|
|
8
|
-
from data_designer.engine.
|
|
10
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorCellByCell
|
|
9
11
|
from data_designer.plugins.plugin import Plugin, PluginType
|
|
10
12
|
|
|
11
13
|
MODULE_NAME = __name__
|
|
@@ -18,15 +20,11 @@ class ValidTestConfig(SingleColumnConfig):
|
|
|
18
20
|
name: str
|
|
19
21
|
|
|
20
22
|
|
|
21
|
-
class ValidTestTask(
|
|
23
|
+
class ValidTestTask(ColumnGeneratorCellByCell[ValidTestConfig]):
|
|
22
24
|
"""Valid task for testing plugin creation."""
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
return ConfigurableTaskMetadata(
|
|
27
|
-
name="test_generator",
|
|
28
|
-
description="Test generator",
|
|
29
|
-
)
|
|
26
|
+
def generate(self, data: dict) -> dict:
|
|
27
|
+
return data
|
|
30
28
|
|
|
31
29
|
|
|
32
30
|
class ConfigWithoutDiscriminator(ConfigBase):
|
|
@@ -53,22 +51,14 @@ class StubPluginConfigB(SingleColumnConfig):
|
|
|
53
51
|
column_type: Literal["test-plugin-b"] = "test-plugin-b"
|
|
54
52
|
|
|
55
53
|
|
|
56
|
-
class StubPluginTaskA(
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
return ConfigurableTaskMetadata(
|
|
60
|
-
name="test_plugin_a",
|
|
61
|
-
description="Test plugin A",
|
|
62
|
-
)
|
|
54
|
+
class StubPluginTaskA(ColumnGeneratorCellByCell[StubPluginConfigA]):
|
|
55
|
+
def generate(self, data: dict) -> dict:
|
|
56
|
+
return data
|
|
63
57
|
|
|
64
58
|
|
|
65
|
-
class StubPluginTaskB(
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
return ConfigurableTaskMetadata(
|
|
69
|
-
name="test_plugin_b",
|
|
70
|
-
description="Test plugin B",
|
|
71
|
-
)
|
|
59
|
+
class StubPluginTaskB(ColumnGeneratorCellByCell[StubPluginConfigB]):
|
|
60
|
+
def generate(self, data: dict) -> dict:
|
|
61
|
+
return data
|
|
72
62
|
|
|
73
63
|
|
|
74
64
|
# Stub plugins requiring different combinations of resources
|
|
@@ -86,31 +76,19 @@ class StubPluginConfigBlobsAndSeeds(SingleColumnConfig):
|
|
|
86
76
|
column_type: Literal["test-plugin-blobs-and-seeds"] = "test-plugin-blobs-and-seeds"
|
|
87
77
|
|
|
88
78
|
|
|
89
|
-
class StubPluginTaskModels(
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
return ConfigurableTaskMetadata(
|
|
93
|
-
name="test_plugin_models",
|
|
94
|
-
description="Test plugin requiring models",
|
|
95
|
-
)
|
|
79
|
+
class StubPluginTaskModels(ColumnGeneratorCellByCell[StubPluginConfigModels]):
|
|
80
|
+
def generate(self, data: dict) -> dict:
|
|
81
|
+
return data
|
|
96
82
|
|
|
97
83
|
|
|
98
|
-
class StubPluginTaskModelsAndBlobs(
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
return ConfigurableTaskMetadata(
|
|
102
|
-
name="test_plugin_models_and_blobs",
|
|
103
|
-
description="Test plugin requiring models and blobs",
|
|
104
|
-
)
|
|
84
|
+
class StubPluginTaskModelsAndBlobs(ColumnGeneratorCellByCell[StubPluginConfigModelsAndBlobs]):
|
|
85
|
+
def generate(self, data: dict) -> dict:
|
|
86
|
+
return data
|
|
105
87
|
|
|
106
88
|
|
|
107
|
-
class StubPluginTaskBlobsAndSeeds(
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
return ConfigurableTaskMetadata(
|
|
111
|
-
name="test_plugin_blobs_and_seeds",
|
|
112
|
-
description="Test plugin requiring blobs and seeds",
|
|
113
|
-
)
|
|
89
|
+
class StubPluginTaskBlobsAndSeeds(ColumnGeneratorCellByCell[StubPluginConfigBlobsAndSeeds]):
|
|
90
|
+
def generate(self, data: dict) -> dict:
|
|
91
|
+
return data
|
|
114
92
|
|
|
115
93
|
|
|
116
94
|
plugin_none = Plugin(
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.config.base import ConfigBase
|
|
5
7
|
from data_designer.engine.configurable_task import ConfigurableTask
|
|
6
8
|
from data_designer.engine.resources.seed_reader import SeedReader
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -87,12 +87,19 @@ make install
|
|
|
87
87
|
|
|
88
88
|
### 2. Set your API key
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
Start with one of our default model providers:
|
|
91
91
|
|
|
92
|
+
- [NVIDIA Build API](https://build.nvidia.com)
|
|
93
|
+
- [OpenAI](https://platform.openai.com/api-keys)
|
|
94
|
+
- [OpenRouter](https://openrouter.ai)
|
|
95
|
+
|
|
96
|
+
Grab your API key(s) using the above links and set one or more of the following environment variables:
|
|
92
97
|
```bash
|
|
93
98
|
export NVIDIA_API_KEY="your-api-key-here"
|
|
94
|
-
|
|
99
|
+
|
|
95
100
|
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
101
|
+
|
|
102
|
+
export OPENROUTER_API_KEY="your-openrouter-api-key-here"
|
|
96
103
|
```
|
|
97
104
|
|
|
98
105
|
### 3. Start generating data!
|
|
@@ -127,7 +134,7 @@ config_builder.add_column(
|
|
|
127
134
|
LLMTextColumnConfig(
|
|
128
135
|
name="review",
|
|
129
136
|
model_alias="nvidia-text",
|
|
130
|
-
prompt="
|
|
137
|
+
prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
|
|
131
138
|
)
|
|
132
139
|
)
|
|
133
140
|
|
|
@@ -195,7 +202,7 @@ To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
|
|
|
195
202
|
|
|
196
203
|
### Top Models
|
|
197
204
|
|
|
198
|
-
This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
|
|
205
|
+
This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
|
|
199
206
|
|
|
200
207
|

|
|
201
208
|
|
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
data_designer/__init__.py,sha256=iLr6FpW41-DFbGexuXCJ6gN1xBMNUZ2jfj9XxySmQhk,502
|
|
2
|
+
data_designer/_version.py,sha256=UAb2Toi6SAdScDfq1uKRRv5QpMUuRtJqqwNxTMGe5Q4,704
|
|
3
|
+
data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
|
|
4
|
+
data_designer/lazy_heavy_imports.py,sha256=wULSEPQRUOZXvOnb0tdf6wNbRBpaaczYfAjY-pstCBM,1512
|
|
5
|
+
data_designer/logging.py,sha256=gRi9BOqm95UC1-u4pn6n-G4EySy9HhwKVyKLRO4aqm4,5382
|
|
6
|
+
data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
|
|
7
|
+
data_designer/cli/README.md,sha256=uPE3KdlF5Y3H8pQc8c6ZZ3h6YSFXNQW-iEXGQJuVnI4,9026
|
|
8
|
+
data_designer/cli/__init__.py,sha256=--5yQzMciTX8-vroyXyFNBCqQ0HQd67GWCwnIoIHhJ4,251
|
|
9
|
+
data_designer/cli/main.py,sha256=1klKdUKPZTgmUbduHSzEFueQHWkc-42Gcbri25cjiHo,1974
|
|
10
|
+
data_designer/cli/ui.py,sha256=IgpV_Ht6qmLFrT3ybgOoADTQthoSGJxrwds38o1Zz10,17632
|
|
11
|
+
data_designer/cli/utils.py,sha256=yyKZfr4ndcsngKgmpj5r4fN7fP6ouX-Nwx1Go5s6SdM,2151
|
|
12
|
+
data_designer/cli/commands/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
13
|
+
data_designer/cli/commands/download.py,sha256=bTynzORVj1rftrrQhmTj6se-ITi2_L7Z3qtio1mLvXU,1770
|
|
14
|
+
data_designer/cli/commands/list.py,sha256=Lu02qFTkhEkLX2e7ak_rHmoO8_4Jjrgy4Yua-EAtyHo,4091
|
|
15
|
+
data_designer/cli/commands/models.py,sha256=Ot4eWyEbCS7heG_bylBdWZ1qj4CILv_hTddm2VdY0Dc,428
|
|
16
|
+
data_designer/cli/commands/providers.py,sha256=-zVNtE_0A0hifcUk6n3c_v_Olcd14mHt3N8_HahHTQ4,491
|
|
17
|
+
data_designer/cli/commands/reset.py,sha256=iCNjkFNdGU6Y7rv-Fprl9ZW60riseL_R7CrYi6DrwR0,3514
|
|
18
|
+
data_designer/cli/controllers/__init__.py,sha256=70il4GIKebdau43nCXyu4VcQj7IFNoxxjEo1Z3hm8_M,491
|
|
19
|
+
data_designer/cli/controllers/download_controller.py,sha256=9lQo-njn890WJiewGazfd6SrBBA4Rj8LYFkXZG_phPI,8117
|
|
20
|
+
data_designer/cli/controllers/model_controller.py,sha256=CZimP1npWwH8UrJXlfMIfbNEn9pcJKtg14CqgArbqQM,9020
|
|
21
|
+
data_designer/cli/controllers/provider_controller.py,sha256=mSLHkc60lu9VsXJE2NNpFZ6zHkasz6UQLtoTBYoFtkA,12293
|
|
22
|
+
data_designer/cli/forms/__init__.py,sha256=UpTr7s5q2GFFssNz3229Kb5JxvFOqtZ55XpifB9a15w,713
|
|
23
|
+
data_designer/cli/forms/builder.py,sha256=Juem3wB2j1KXtZZY7wVP0-eWKK_tj_0-L8Zq9EAS0-k,1731
|
|
24
|
+
data_designer/cli/forms/field.py,sha256=TYEQLqjMvYBS_ftf6Ms-D5J6TOIK9NNe-Ydvo5Nkq50,7543
|
|
25
|
+
data_designer/cli/forms/form.py,sha256=wFdKS0WfuhfotRtwWZgJyN2HrTthI7Kx07NUoQV2DtM,2066
|
|
26
|
+
data_designer/cli/forms/model_builder.py,sha256=DPggV2cl-XQPUiVhrrGO_4_d7jTn5_kBeBn9oTw-V1U,13354
|
|
27
|
+
data_designer/cli/forms/provider_builder.py,sha256=YA6IoLwV39Sh6w0lZYoF25m-ryhnqBqysXLUo4V5X-w,2936
|
|
28
|
+
data_designer/cli/repositories/__init__.py,sha256=ukHlLpOimH9CCJsdW5U9tooV_oFWQ4iLGK5GNi5YXtM,475
|
|
29
|
+
data_designer/cli/repositories/base.py,sha256=ofOAHeAYAL6Bm4EJjSOFaNPD-odm2PlrW6quRkQQzaw,1095
|
|
30
|
+
data_designer/cli/repositories/model_repository.py,sha256=oaa5ISP8Y-BRzXOdzAhDHf0FqmWngSEJ8RGOYWeXi4M,1487
|
|
31
|
+
data_designer/cli/repositories/persona_repository.py,sha256=3ZRarD6BYAKVYFs_r9hDkh2nfkKW7BA8KJyfcYP0RRc,2683
|
|
32
|
+
data_designer/cli/repositories/provider_repository.py,sha256=hG6tYbjR3gT8DmXL7usRvMrc6ILws4ECyTZ5imENpuQ,1556
|
|
33
|
+
data_designer/cli/services/__init__.py,sha256=2ycyikXx-8gbYZm-xl6IMyKXLwR4REU5heg6BkUW6qo,455
|
|
34
|
+
data_designer/cli/services/download_service.py,sha256=m_wtDfxAA80tZdIf9kUS3ye8fzKG-3DjfDnm5u0-mJE,3519
|
|
35
|
+
data_designer/cli/services/model_service.py,sha256=cFiP9ZQIprPdrVibUC6uwL-NuCYRgx8XVIjxDV-TznU,3926
|
|
36
|
+
data_designer/cli/services/provider_service.py,sha256=5cou_EWU0RwE9p2PWpRBM9HcPqdENLpkHUuGzQ-l9J4,3957
|
|
37
|
+
data_designer/config/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
38
|
+
data_designer/config/base.py,sha256=IGj6sy_GnKzC94uu2rdxe12EqR_AmGJ6O3rl2MxOv6g,2449
|
|
39
|
+
data_designer/config/column_configs.py,sha256=JBYIeqmIiwdZzA-NXTw4qghs2Y30RZPQ_-koqPVcQ9g,20384
|
|
40
|
+
data_designer/config/column_types.py,sha256=xGXuu0EBy3Y5Jd74f2VM6x5jHq72GmK9leA6qOnAz8c,5423
|
|
41
|
+
data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLBAW1AmhajI,24707
|
|
42
|
+
data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
|
|
43
|
+
data_designer/config/dataset_builders.py,sha256=jdCujJYFlKAiSkPNX2Qeyrs683GrRcCDv_m8ZZhtg64,368
|
|
44
|
+
data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhFMy9vvbUwMV4,647
|
|
45
|
+
data_designer/config/default_model_settings.py,sha256=d9ZuTDGMtS1rZpIDqoSQjCiD5tcHrUOr22X0-mGQspc,4497
|
|
46
|
+
data_designer/config/errors.py,sha256=JhvUYecfLmP0gZjQzqA3OmfaSs9TRlC5E-ubnV_-3gs,560
|
|
47
|
+
data_designer/config/exports.py,sha256=lNwteK4djETKXrMKh5PPeHeZvPAZ5RpnJt2otpoaUz0,4756
|
|
48
|
+
data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75IqdpFLfk,1676
|
|
49
|
+
data_designer/config/models.py,sha256=OekrXEVnI9WdHzEVk-8fO0NtxLZtjKVtCL03RY8qwYs,15457
|
|
50
|
+
data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
|
|
51
|
+
data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
|
|
52
|
+
data_designer/config/run_config.py,sha256=5TA1PSmZ3Ca5V0GA1KTds3xrEwGPFZY9C35Vf_1cAs0,2429
|
|
53
|
+
data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
|
|
54
|
+
data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
|
|
55
|
+
data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
|
|
56
|
+
data_designer/config/seed_source.py,sha256=ufcZdibP3aeruswC1lfh-JJcr5NjK_Ht50uY6-wnl8E,2635
|
|
57
|
+
data_designer/config/seed_source_types.py,sha256=sxu6EOVr4ChZFvv2dI1-F9AZg_9fnv8UJ0dGVbsWQ6E,715
|
|
58
|
+
data_designer/config/validator_params.py,sha256=xm5H1IgphK61aMFoH2FOu4MROlvxeL84CajI8DTPv6Y,3947
|
|
59
|
+
data_designer/config/analysis/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
60
|
+
data_designer/config/analysis/column_profilers.py,sha256=sgfHrHYRZgtTDrHtfkDtu6F9iZUwD3ISc3m9kka0UUE,6256
|
|
61
|
+
data_designer/config/analysis/column_statistics.py,sha256=g3ipgwHMLyTLhvJrB7St0SYhyvIe6ENfTCEJKoePetc,16885
|
|
62
|
+
data_designer/config/analysis/dataset_profiler.py,sha256=-5eX55IXivwUBMg2pI-d_3e7nbJb83a0tyxL-WzL-MY,4174
|
|
63
|
+
data_designer/config/analysis/utils/errors.py,sha256=pvmdQ_YuIlWW4NFw-cX_rOoQf-GG8y_FiQzNctB__DQ,331
|
|
64
|
+
data_designer/config/analysis/utils/reporting.py,sha256=teTzd1OHtpI4vbIinGOGsKXyNldO3F5eqbNdAztF0_s,7066
|
|
65
|
+
data_designer/config/utils/code_lang.py,sha256=EqMJh1GL5ysUZIoyqx_6vmqenUKHm4J-RQtKXiA4EPg,2354
|
|
66
|
+
data_designer/config/utils/constants.py,sha256=KU4ZCIe18gXdBp2N_BgZlRW90FIqjFPYmJtqgVY3Ink,8925
|
|
67
|
+
data_designer/config/utils/errors.py,sha256=HCjer0YrF0bMn5j8gmgWaLb0395LAr_hxMD1ftOsOc8,520
|
|
68
|
+
data_designer/config/utils/info.py,sha256=yOa4U8kI_CY4OfCKZxCm2okU8klAiThvyjKM5tG-F0A,3469
|
|
69
|
+
data_designer/config/utils/io_helpers.py,sha256=kzvOR7QgqijkqU-O2enIlpCWwHvzc3oRaEl4Lsjh1Do,8466
|
|
70
|
+
data_designer/config/utils/misc.py,sha256=7n_0txc78IoK6V39CwZY-65KtYcjh38WDl0Q1bQM-EA,2481
|
|
71
|
+
data_designer/config/utils/numerical_helpers.py,sha256=DIubKzc8q2_Bw7xRjyOGwxYulTV3dt3JxCdpH560dak,838
|
|
72
|
+
data_designer/config/utils/type_helpers.py,sha256=XyVup24F4Bl7uNze_yUW9oD6EzFbfsJWKhpeMN2901A,4059
|
|
73
|
+
data_designer/config/utils/visualization.py,sha256=_0Mn-jva0Oz1tVTQH1mnWSARpqZ2kh1JSzJEuikyy9s,18491
|
|
74
|
+
data_designer/engine/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
75
|
+
data_designer/engine/compiler.py,sha256=4a6ayCQjpULrGU2CXaBMDs-RU0TszT2oEkMK-vn51zk,2757
|
|
76
|
+
data_designer/engine/configurable_task.py,sha256=6R4FPXPzIeK0lqNVSEXzRDtK14B3dFz38lplr-nkvRE,2539
|
|
77
|
+
data_designer/engine/errors.py,sha256=YXI7ny83BQ16sOK43CpTm384hJTKuZkPTEAjlHlDIfA,1303
|
|
78
|
+
data_designer/engine/model_provider.py,sha256=_uU5Bw7yrGlMROjHL4dN1mMTg1eN-LVW5JWcQxovhAA,2823
|
|
79
|
+
data_designer/engine/secret_resolver.py,sha256=srIAnwbTfsDfgzhWojGTR1u8Vx6SY4vSp0_hJU0_i9A,2468
|
|
80
|
+
data_designer/engine/validation.py,sha256=q9wZqCcRAFoW8p1BtkblFQ3CWgeBHT5JTKVCoYlqZeA,14544
|
|
81
|
+
data_designer/engine/analysis/column_statistics.py,sha256=UW14ooahDgeEdkurgj2d0L6DIX4qce1faWSss_2IR6M,5843
|
|
82
|
+
data_designer/engine/analysis/dataset_profiler.py,sha256=DB-c4m4R39PXapY3CsUZvMGM_E-LByRMQMZBrDfIQY0,7323
|
|
83
|
+
data_designer/engine/analysis/errors.py,sha256=QRmvkNjcMpQ5QDlM7YOSbR2key4d6dsiknk994Ewvv4,296
|
|
84
|
+
data_designer/engine/analysis/column_profilers/base.py,sha256=jgUfoR0fQYG5JMo7KaJktNKao0YSQbgO-ge214popd8,1711
|
|
85
|
+
data_designer/engine/analysis/column_profilers/judge_score_profiler.py,sha256=nSkdb7OumaOWFRq64Abiii43G9MgF3OeOvOH9XpdqFg,6572
|
|
86
|
+
data_designer/engine/analysis/column_profilers/registry.py,sha256=yFEE3gwNUzPI8WMGKfNcObtJSs1b3a87GKrL_ksIqBs,923
|
|
87
|
+
data_designer/engine/analysis/utils/column_statistics_calculations.py,sha256=ry0QxRqLFRn7N4OAn6z7TqSAPEGwxiiUEUtsG_bI-98,8958
|
|
88
|
+
data_designer/engine/analysis/utils/judge_score_processing.py,sha256=QkFMHp0WFhxW3YwwmAnKoEFTULSCxnJ2DSkq8v9kiaE,4884
|
|
89
|
+
data_designer/engine/column_generators/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
90
|
+
data_designer/engine/column_generators/registry.py,sha256=c4WKk1XwXCasDd4sR0EJqM0Lb1T1UP87rI3LPgbNaUQ,3101
|
|
91
|
+
data_designer/engine/column_generators/generators/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
92
|
+
data_designer/engine/column_generators/generators/base.py,sha256=QElk5KsaUQ3EYwlv40NcZgQsw3HIkX3YQV_0S3erl7Q,4209
|
|
93
|
+
data_designer/engine/column_generators/generators/embedding.py,sha256=uB0jgHlCgctgIUf9ZfMqG1YThbJ0g-GCX3VdNbdDSko,1407
|
|
94
|
+
data_designer/engine/column_generators/generators/expression.py,sha256=BiQcfVTinvQl3OI9nkdhB9B7FGBueWiHJwxTA8uNVuY,2330
|
|
95
|
+
data_designer/engine/column_generators/generators/llm_completion.py,sha256=TGVCV0Sp2AI5KwJ7lG9Co7-zF6gVy-vmVg9eEKmiazE,3873
|
|
96
|
+
data_designer/engine/column_generators/generators/samplers.py,sha256=gNzURmu9K8Zb5MHamKvZPIxmWlFgl2W4FIVgaFcy4f0,3371
|
|
97
|
+
data_designer/engine/column_generators/generators/seed_dataset.py,sha256=CoQPbz4Ww7pBLaGw8-CYqIk1sjfkBaoRMKZQexdfgKY,6824
|
|
98
|
+
data_designer/engine/column_generators/generators/validation.py,sha256=YfYbk-8_ZUye0No6_Q7hIqpZv_tunnEZ6HkLSMFXlDE,6659
|
|
99
|
+
data_designer/engine/column_generators/utils/errors.py,sha256=NSAOupF13NU20qRN9_Is5AwiL_8l1IJur0TnuQEiJhw,406
|
|
100
|
+
data_designer/engine/column_generators/utils/generator_classification.py,sha256=XBA_vagEXKBQK54OHANKeHw6Mm2B4RuAmXu0QrRdEEo,1958
|
|
101
|
+
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=gESiqMrQzbbcFpZas0sAAAkrH2DL0Z4Nq5ywBO-pQ6k,2141
|
|
102
|
+
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=LATVAlDYwL7HyM7Nogd6n9XTTk-j9s64o4z0LpKHMhQ,4819
|
|
103
|
+
data_designer/engine/dataset_builders/artifact_storage.py,sha256=CKpTBtJTde7OQvsFZQa1v1autVz5yUxlBHkIKeATFnE,10999
|
|
104
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=_3_JJJG-tA9qLhNiEKbHxl1EHYBbMVAGUtaAdqO_wsc,15736
|
|
105
|
+
data_designer/engine/dataset_builders/errors.py,sha256=gLXtPcGSMBG10PzQ85dOXskdA0mKbBQrHa_VtP9sbVY,400
|
|
106
|
+
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=U4Pg0ETCBq5phRhb2zt8IFa4fRx-aTMakomKOBnrs0U,1660
|
|
107
|
+
data_designer/engine/dataset_builders/utils/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
108
|
+
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=Q0ro9UY-3-FFzfi3MZ29nMTSiDZgg1Um6y_HQFztDhk,8338
|
|
109
|
+
data_designer/engine/dataset_builders/utils/config_compiler.py,sha256=NGI6U0vgG88d5YKj7oW_SIJ4-_fhA6VFhPbjqGRHea4,2441
|
|
110
|
+
data_designer/engine/dataset_builders/utils/dag.py,sha256=RIEI75OtiphkuDl1vfI_MQC1xMiiIg29s-0C_fNZkWQ,2613
|
|
111
|
+
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=IfWd_HcfEzIPhgFp2dJaxNIKRlrPsHqYATFXauvCfaw,8133
|
|
112
|
+
data_designer/engine/dataset_builders/utils/errors.py,sha256=G1MIkQDXguSqHK1EP-60FkG_bys7bJ1UgJnSvcNgtt8,411
|
|
113
|
+
data_designer/engine/models/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
114
|
+
data_designer/engine/models/errors.py,sha256=k9oZnmk8DRD8U2SVKJJRLwrcdsCcVoJiOb_Q7ZyEdvg,12271
|
|
115
|
+
data_designer/engine/models/facade.py,sha256=UBMpw_o2JcsWpJsPdpTPKfFZCh_i0eeG_oaWi1XeKds,12582
|
|
116
|
+
data_designer/engine/models/factory.py,sha256=2NjI0iiGv8ayQ1c249lsJtha4pDmvmtSjdwvlvitRds,1581
|
|
117
|
+
data_designer/engine/models/litellm_overrides.py,sha256=4qsTLZ2UHmh1YxdgqdvDBPt1taM348CVhtNZ5omnQRM,5742
|
|
118
|
+
data_designer/engine/models/registry.py,sha256=7hZ6TQwwZf259yRZmc3ZI20a4wAo3PCOozPi9Mc5KLo,6827
|
|
119
|
+
data_designer/engine/models/telemetry.py,sha256=wmuekvPRZjNz7p7ImKx5H_hqDRhTv_dSB-u2S6Ze3uo,12502
|
|
120
|
+
data_designer/engine/models/usage.py,sha256=A0LV9Ycuj_7snOsaqnirs4mlkAjozv2mzj2om2FpDoU,2410
|
|
121
|
+
data_designer/engine/models/utils.py,sha256=HS5pXAAz7IcOcijeClC-xxq6R6DUmC2ykZu8Vr33Ivk,1259
|
|
122
|
+
data_designer/engine/models/parsers/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
123
|
+
data_designer/engine/models/parsers/errors.py,sha256=ODcZ4TOsmZyH4-MoNkKXhjiMm_4gLWPsz90qKtNF9_Q,1053
|
|
124
|
+
data_designer/engine/models/parsers/parser.py,sha256=XkdDt2WEnolvsv2bArq4hhujfJ3kLmG6G2jkRXMYA8c,9489
|
|
125
|
+
data_designer/engine/models/parsers/postprocessors.py,sha256=GwgPAdaz8GNahnXbyzQmvo_fpHZDK8ddmrM7-dn9X48,2896
|
|
126
|
+
data_designer/engine/models/parsers/tag_parsers.py,sha256=HNAIBfXW1Wjdkw4IX-P9sHodir1UUt-4Lp91Tz0XWPA,2036
|
|
127
|
+
data_designer/engine/models/parsers/types.py,sha256=wEt80al1FykbMplZVjJ5uXFtacMx-a9GE4_QoqDJ6Us,2631
|
|
128
|
+
data_designer/engine/models/recipes/base.py,sha256=AQg3Ay_E0hBEVg-sqSNVVZNMJfJ3r1eT14-b9yqymnQ,2630
|
|
129
|
+
data_designer/engine/models/recipes/response_recipes.py,sha256=UX9m-8RTDj3sXkzEdKpkSj5z7jO-fQhdca3MSByb_Js,10189
|
|
130
|
+
data_designer/engine/processing/utils.py,sha256=iu7JJ4foI3Gfd29ppIBGn9c0syO64PTyvW9CiaLVAHE,5201
|
|
131
|
+
data_designer/engine/processing/ginja/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
132
|
+
data_designer/engine/processing/ginja/ast.py,sha256=w62yt434RDnJYrcfofIDThGv0C5H9XJE3VHOnxEzJVM,1964
|
|
133
|
+
data_designer/engine/processing/ginja/environment.py,sha256=wJRbzPuUCQGvCi4zS4g8sYzihgu_6fn-tE_nYSL1AoU,18974
|
|
134
|
+
data_designer/engine/processing/ginja/exceptions.py,sha256=o1ogMKtItC336cu_sBWHAGSVhWCKNHEIqd7dNN_13DA,1926
|
|
135
|
+
data_designer/engine/processing/ginja/record.py,sha256=eD6M0CUbEtElYLEtxjyYFlx3yuwHFSgJVWDmsGbTlBQ,1100
|
|
136
|
+
data_designer/engine/processing/gsonschema/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
137
|
+
data_designer/engine/processing/gsonschema/exceptions.py,sha256=iiWHnOtrRs1sEsQ8K5HQUl_M_IS0rTAelHkCgKKKQ3A,425
|
|
138
|
+
data_designer/engine/processing/gsonschema/schema_transformers.py,sha256=P2qmeVBF385Dz2GSbMuQYTGMCIbXgw_R4I7ZqB0wcjQ,3107
|
|
139
|
+
data_designer/engine/processing/gsonschema/types.py,sha256=nCrs0d7NADz0YzflhYmWpmKpq-F3ukUWFIISc_q1Kso,354
|
|
140
|
+
data_designer/engine/processing/gsonschema/validators.py,sha256=ui3PzGjIclI6Hlw48GUDuAJ3_cgyQr4NANMn4NVxBKE,6960
|
|
141
|
+
data_designer/engine/processing/processors/base.py,sha256=bkAQO0yK6ATJ3zTwS7F9FXobenJqydCyfijSP2MM-70,472
|
|
142
|
+
data_designer/engine/processing/processors/drop_columns.py,sha256=xT7ym2pQc-R0-YHIuYDQGFn2uAf74309-pV4H878Wlk,1866
|
|
143
|
+
data_designer/engine/processing/processors/registry.py,sha256=ewuFY8QeXpql5CNTZZa_87aYPGPNv1H0hpJR7CBVuzI,1097
|
|
144
|
+
data_designer/engine/processing/processors/schema_transform.py,sha256=RhLXXKoj9MFpOqsXZ2hfSaTr7_yUUNI3gmFBS4XtEy4,2006
|
|
145
|
+
data_designer/engine/registry/base.py,sha256=eACpE7o_c2btiiXrOFJw7o0VvACo7DSqhj8AntkNkCQ,3579
|
|
146
|
+
data_designer/engine/registry/data_designer_registry.py,sha256=mz8ksE49pS1JRVDNubYSxTs0j-8Q6sd08F_dYyTCWSE,1528
|
|
147
|
+
data_designer/engine/registry/errors.py,sha256=k1EaV7egNQwNmRsI8EfymTfeNprcDutPf2M6Vc1nbn8,350
|
|
148
|
+
data_designer/engine/resources/managed_dataset_generator.py,sha256=2wGc-tH5usXAPXgDkXzslLsCkAsAQgYa3uIYJC5_Oa0,1495
|
|
149
|
+
data_designer/engine/resources/managed_dataset_repository.py,sha256=lx8NTtAPxheZdqkgilYSmqZv4Nd_CeHXXUaXHzGLLVk,7684
|
|
150
|
+
data_designer/engine/resources/managed_storage.py,sha256=8tLJjKGvDbuHnsESL2VZVu9vfEH3--OLZaiZe-LZo_8,2120
|
|
151
|
+
data_designer/engine/resources/resource_provider.py,sha256=1D-a4g1s5r_ECZE5-mR4TwFLxWPH8GLcaBZnd-j51E8,3047
|
|
152
|
+
data_designer/engine/resources/seed_reader.py,sha256=GQiOqf9t-yRag2g5Io3-kQPhpyKJbXgHn2YTUoAgftI,5717
|
|
153
|
+
data_designer/engine/sampling_gen/column.py,sha256=0aQzeJtcM0DNEaarG1ybXV4LLJH0iiOaXvi46Ay4qOE,3987
|
|
154
|
+
data_designer/engine/sampling_gen/constraints.py,sha256=AvFoyZ1QU--R9kGyIaPHClm3mG_ZoPuOE3IQQqYUPqw,3157
|
|
155
|
+
data_designer/engine/sampling_gen/errors.py,sha256=42shYMUNk5bd3FxTOCsBWXa7jlgMZ1ZyE9yyhFzwE7g,869
|
|
156
|
+
data_designer/engine/sampling_gen/generator.py,sha256=olwpzBwSNEerppReBzRXlcoO3Ts3fZxEwVki5Hem50Y,5501
|
|
157
|
+
data_designer/engine/sampling_gen/jinja_utils.py,sha256=DMMunGEonyXUaKd_WyAg9yo39RL65346DYTUJKp7dP4,2136
|
|
158
|
+
data_designer/engine/sampling_gen/people_gen.py,sha256=vplDwZ66VnjMM7AGX0odKPXY__cktlC-nW-z6aMHCKc,8417
|
|
159
|
+
data_designer/engine/sampling_gen/person_constants.py,sha256=jp6SJ9NwAObu31wqE7WC6hBRUGEAOelaDUmqbxkLpcs,1202
|
|
160
|
+
data_designer/engine/sampling_gen/schema.py,sha256=qdgKGNFwdiHvYl4ZhjRv0P8857wAdDKiekpTH8nL43Y,6240
|
|
161
|
+
data_designer/engine/sampling_gen/schema_builder.py,sha256=PvVyWbo3T8zgiVonecD1ST10uwNGO02KtDcczonhCRE,2313
|
|
162
|
+
data_designer/engine/sampling_gen/utils.py,sha256=wdgTQgsKoQWCqE3rnocCZdzHXWWCkD7nR3n9rmQ6C9w,1500
|
|
163
|
+
data_designer/engine/sampling_gen/data_sources/base.py,sha256=zUG5XTplD5pgHh4ytCMFumeuU2O8jr39bxgpGaA3oVc,7447
|
|
164
|
+
data_designer/engine/sampling_gen/data_sources/errors.py,sha256=_9rbwUpaz0Pd2Ods4AVDQ7Uq4JvPyfHhTp51BdtJDto,367
|
|
165
|
+
data_designer/engine/sampling_gen/data_sources/sources.py,sha256=53KVPp7REjNKA0rajGmT_tBkxwQqwrcIKhcijBGcfcs,13647
|
|
166
|
+
data_designer/engine/sampling_gen/entities/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
167
|
+
data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py,sha256=0_eUTtrWFGxTfTfqlz9ig9bJEtYeckb50J7w5LhYTr8,1883
|
|
168
|
+
data_designer/engine/sampling_gen/entities/email_address_utils.py,sha256=THfD7muq5tMHkRWOATN-N3iSFgkKjT4e8hKquDFMTlU,5272
|
|
169
|
+
data_designer/engine/sampling_gen/entities/errors.py,sha256=SbtwwG6JgoY4k6pq2-y-lD60nX_pqjf5QftmwgXt0us,352
|
|
170
|
+
data_designer/engine/sampling_gen/entities/national_id_utils.py,sha256=XUFB6RhfLGFQUNyy0B6BSgtrG9NdEnIjfSALBwJplho,2652
|
|
171
|
+
data_designer/engine/sampling_gen/entities/person.py,sha256=9S-xAj6_8ZaFX4G_I7CMMKN2Ju_0YeDSnq1ajIIAdhE,5719
|
|
172
|
+
data_designer/engine/sampling_gen/entities/phone_number.py,sha256=dGY5LRwCz19RBH0mJDTpnBb0a98piDSNgkQRemgwqV0,4818
|
|
173
|
+
data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet,sha256=L6G4laXExB7uRCWHlF4XGDk0yMh41jbDnp9LIy7jNHM,576064
|
|
174
|
+
data_designer/engine/validators/__init__.py,sha256=uT0CTJF9Ce97zoAdMSWvfYn7mO5ja0lIgyPRKJLcsOU,693
|
|
175
|
+
data_designer/engine/validators/base.py,sha256=XfDDMMP0PusoKAjM9rXdIYkyWlLiQPAJChMgtkcdspw,1005
|
|
176
|
+
data_designer/engine/validators/local_callable.py,sha256=JaL-yOXrTFpubiO2QlSt4QbiJzD_ddChmfcHyMhbgaQ,1531
|
|
177
|
+
data_designer/engine/validators/python.py,sha256=omXjwMaomQYiyq4g6XqKt2wexVuI_rWue9Dk-CYc-do,8039
|
|
178
|
+
data_designer/engine/validators/remote.py,sha256=rythhIrH2GvqncMQeF3FiJa9Om0KZWeK3cWjW-ZubaM,3077
|
|
179
|
+
data_designer/engine/validators/sql.py,sha256=AMaEdA-gj9j0zwVp809x3ycKltd51wVEhI8mMYGyxd4,2408
|
|
180
|
+
data_designer/essentials/__init__.py,sha256=dIGYH9s0_VQJ1lG8S-ElZiISz59LHo9v7Y5upizcA1M,1135
|
|
181
|
+
data_designer/interface/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
182
|
+
data_designer/interface/data_designer.py,sha256=aX3Etg1qGpjivQQdplQ0Oi3aB7_6jzClk_4yrpcE5fY,17322
|
|
183
|
+
data_designer/interface/errors.py,sha256=Ft9GMeIrOHJv_PC_1rU6hWcNyq1GHdsFYZSc9HnUrxU,606
|
|
184
|
+
data_designer/interface/results.py,sha256=3fGwlhif4ufqUGh-EgsGccrob4S6a7WZ6BgFiszTo_A,3871
|
|
185
|
+
data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8tPWMI,275
|
|
186
|
+
data_designer/plugins/errors.py,sha256=d7FMed3ueQvZHwuhwyPLzF4E34bO1mdj3aBVEw6p34o,386
|
|
187
|
+
data_designer/plugins/plugin.py,sha256=TVyyOaQBWAt0FQwUmtihTZ9MDJD85HwggrQ3L9CviPQ,5367
|
|
188
|
+
data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQRE_dY,3053
|
|
189
|
+
data_designer/plugins/testing/__init__.py,sha256=yyxrrH_i3q0Xb56QO9Ma35WtHlQ5PJF1b2pQoKa16xU,296
|
|
190
|
+
data_designer/plugins/testing/stubs.py,sha256=9tUF209ayZR6f0Q1LsRDW4kEOTgPoIxV8jlq4QoWuW0,3498
|
|
191
|
+
data_designer/plugins/testing/utils.py,sha256=a9LEgK827cnIzHEkgXOdgywrKDLBE36cyttrpG1ctT4,973
|
|
192
|
+
data_designer-0.3.5.dist-info/METADATA,sha256=W1hr3pOCTlU0ZAfXpWZfOBLcvvWNlSLY5p-AtaW5dwQ,8119
|
|
193
|
+
data_designer-0.3.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
194
|
+
data_designer-0.3.5.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
|
|
195
|
+
data_designer-0.3.5.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
|
|
196
|
+
data_designer-0.3.5.dist-info/RECORD,,
|