data-designer 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/__init__.py +15 -0
- data_designer/_version.py +34 -0
- data_designer/cli/README.md +236 -0
- data_designer/cli/__init__.py +6 -0
- data_designer/cli/commands/__init__.py +2 -0
- data_designer/cli/commands/list.py +130 -0
- data_designer/cli/commands/models.py +10 -0
- data_designer/cli/commands/providers.py +11 -0
- data_designer/cli/commands/reset.py +100 -0
- data_designer/cli/controllers/__init__.py +7 -0
- data_designer/cli/controllers/model_controller.py +246 -0
- data_designer/cli/controllers/provider_controller.py +317 -0
- data_designer/cli/forms/__init__.py +20 -0
- data_designer/cli/forms/builder.py +51 -0
- data_designer/cli/forms/field.py +180 -0
- data_designer/cli/forms/form.py +59 -0
- data_designer/cli/forms/model_builder.py +125 -0
- data_designer/cli/forms/provider_builder.py +76 -0
- data_designer/cli/main.py +44 -0
- data_designer/cli/repositories/__init__.py +8 -0
- data_designer/cli/repositories/base.py +39 -0
- data_designer/cli/repositories/model_repository.py +42 -0
- data_designer/cli/repositories/provider_repository.py +43 -0
- data_designer/cli/services/__init__.py +7 -0
- data_designer/cli/services/model_service.py +116 -0
- data_designer/cli/services/provider_service.py +111 -0
- data_designer/cli/ui.py +448 -0
- data_designer/cli/utils.py +47 -0
- data_designer/config/__init__.py +2 -0
- data_designer/config/analysis/column_profilers.py +89 -0
- data_designer/config/analysis/column_statistics.py +274 -0
- data_designer/config/analysis/dataset_profiler.py +60 -0
- data_designer/config/analysis/utils/errors.py +8 -0
- data_designer/config/analysis/utils/reporting.py +188 -0
- data_designer/config/base.py +68 -0
- data_designer/config/column_configs.py +354 -0
- data_designer/config/column_types.py +168 -0
- data_designer/config/config_builder.py +660 -0
- data_designer/config/data_designer_config.py +40 -0
- data_designer/config/dataset_builders.py +11 -0
- data_designer/config/datastore.py +151 -0
- data_designer/config/default_model_settings.py +123 -0
- data_designer/config/errors.py +19 -0
- data_designer/config/interface.py +54 -0
- data_designer/config/models.py +231 -0
- data_designer/config/preview_results.py +32 -0
- data_designer/config/processors.py +41 -0
- data_designer/config/sampler_constraints.py +51 -0
- data_designer/config/sampler_params.py +604 -0
- data_designer/config/seed.py +145 -0
- data_designer/config/utils/code_lang.py +83 -0
- data_designer/config/utils/constants.py +313 -0
- data_designer/config/utils/errors.py +19 -0
- data_designer/config/utils/info.py +88 -0
- data_designer/config/utils/io_helpers.py +273 -0
- data_designer/config/utils/misc.py +81 -0
- data_designer/config/utils/numerical_helpers.py +28 -0
- data_designer/config/utils/type_helpers.py +100 -0
- data_designer/config/utils/validation.py +336 -0
- data_designer/config/utils/visualization.py +427 -0
- data_designer/config/validator_params.py +96 -0
- data_designer/engine/__init__.py +2 -0
- data_designer/engine/analysis/column_profilers/base.py +55 -0
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +160 -0
- data_designer/engine/analysis/column_profilers/registry.py +20 -0
- data_designer/engine/analysis/column_statistics.py +142 -0
- data_designer/engine/analysis/dataset_profiler.py +125 -0
- data_designer/engine/analysis/errors.py +7 -0
- data_designer/engine/analysis/utils/column_statistics_calculations.py +209 -0
- data_designer/engine/analysis/utils/judge_score_processing.py +128 -0
- data_designer/engine/column_generators/__init__.py +2 -0
- data_designer/engine/column_generators/generators/__init__.py +2 -0
- data_designer/engine/column_generators/generators/base.py +61 -0
- data_designer/engine/column_generators/generators/expression.py +63 -0
- data_designer/engine/column_generators/generators/llm_generators.py +172 -0
- data_designer/engine/column_generators/generators/samplers.py +75 -0
- data_designer/engine/column_generators/generators/seed_dataset.py +149 -0
- data_designer/engine/column_generators/generators/validation.py +147 -0
- data_designer/engine/column_generators/registry.py +56 -0
- data_designer/engine/column_generators/utils/errors.py +13 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +57 -0
- data_designer/engine/column_generators/utils/prompt_renderer.py +98 -0
- data_designer/engine/configurable_task.py +82 -0
- data_designer/engine/dataset_builders/artifact_storage.py +181 -0
- data_designer/engine/dataset_builders/column_wise_builder.py +287 -0
- data_designer/engine/dataset_builders/errors.py +13 -0
- data_designer/engine/dataset_builders/multi_column_configs.py +44 -0
- data_designer/engine/dataset_builders/utils/__init__.py +2 -0
- data_designer/engine/dataset_builders/utils/concurrency.py +184 -0
- data_designer/engine/dataset_builders/utils/config_compiler.py +60 -0
- data_designer/engine/dataset_builders/utils/dag.py +56 -0
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +190 -0
- data_designer/engine/dataset_builders/utils/errors.py +13 -0
- data_designer/engine/errors.py +49 -0
- data_designer/engine/model_provider.py +75 -0
- data_designer/engine/models/__init__.py +2 -0
- data_designer/engine/models/errors.py +308 -0
- data_designer/engine/models/facade.py +225 -0
- data_designer/engine/models/litellm_overrides.py +162 -0
- data_designer/engine/models/parsers/__init__.py +2 -0
- data_designer/engine/models/parsers/errors.py +34 -0
- data_designer/engine/models/parsers/parser.py +236 -0
- data_designer/engine/models/parsers/postprocessors.py +93 -0
- data_designer/engine/models/parsers/tag_parsers.py +60 -0
- data_designer/engine/models/parsers/types.py +82 -0
- data_designer/engine/models/recipes/base.py +79 -0
- data_designer/engine/models/recipes/response_recipes.py +291 -0
- data_designer/engine/models/registry.py +118 -0
- data_designer/engine/models/usage.py +75 -0
- data_designer/engine/models/utils.py +38 -0
- data_designer/engine/processing/ginja/__init__.py +2 -0
- data_designer/engine/processing/ginja/ast.py +64 -0
- data_designer/engine/processing/ginja/environment.py +461 -0
- data_designer/engine/processing/ginja/exceptions.py +54 -0
- data_designer/engine/processing/ginja/record.py +30 -0
- data_designer/engine/processing/gsonschema/__init__.py +2 -0
- data_designer/engine/processing/gsonschema/exceptions.py +8 -0
- data_designer/engine/processing/gsonschema/schema_transformers.py +81 -0
- data_designer/engine/processing/gsonschema/types.py +8 -0
- data_designer/engine/processing/gsonschema/validators.py +143 -0
- data_designer/engine/processing/processors/base.py +15 -0
- data_designer/engine/processing/processors/drop_columns.py +46 -0
- data_designer/engine/processing/processors/registry.py +20 -0
- data_designer/engine/processing/utils.py +120 -0
- data_designer/engine/registry/base.py +97 -0
- data_designer/engine/registry/data_designer_registry.py +37 -0
- data_designer/engine/registry/errors.py +10 -0
- data_designer/engine/resources/managed_dataset_generator.py +35 -0
- data_designer/engine/resources/managed_dataset_repository.py +194 -0
- data_designer/engine/resources/managed_storage.py +63 -0
- data_designer/engine/resources/resource_provider.py +46 -0
- data_designer/engine/resources/seed_dataset_data_store.py +66 -0
- data_designer/engine/sampling_gen/column.py +89 -0
- data_designer/engine/sampling_gen/constraints.py +95 -0
- data_designer/engine/sampling_gen/data_sources/base.py +214 -0
- data_designer/engine/sampling_gen/data_sources/errors.py +10 -0
- data_designer/engine/sampling_gen/data_sources/sources.py +342 -0
- data_designer/engine/sampling_gen/entities/__init__.py +2 -0
- data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +64 -0
- data_designer/engine/sampling_gen/entities/email_address_utils.py +169 -0
- data_designer/engine/sampling_gen/entities/errors.py +8 -0
- data_designer/engine/sampling_gen/entities/national_id_utils.py +100 -0
- data_designer/engine/sampling_gen/entities/person.py +142 -0
- data_designer/engine/sampling_gen/entities/phone_number.py +122 -0
- data_designer/engine/sampling_gen/errors.py +24 -0
- data_designer/engine/sampling_gen/generator.py +121 -0
- data_designer/engine/sampling_gen/jinja_utils.py +60 -0
- data_designer/engine/sampling_gen/people_gen.py +203 -0
- data_designer/engine/sampling_gen/person_constants.py +54 -0
- data_designer/engine/sampling_gen/schema.py +143 -0
- data_designer/engine/sampling_gen/schema_builder.py +59 -0
- data_designer/engine/sampling_gen/utils.py +40 -0
- data_designer/engine/secret_resolver.py +80 -0
- data_designer/engine/validators/__init__.py +17 -0
- data_designer/engine/validators/base.py +36 -0
- data_designer/engine/validators/local_callable.py +34 -0
- data_designer/engine/validators/python.py +245 -0
- data_designer/engine/validators/remote.py +83 -0
- data_designer/engine/validators/sql.py +60 -0
- data_designer/errors.py +5 -0
- data_designer/essentials/__init__.py +137 -0
- data_designer/interface/__init__.py +2 -0
- data_designer/interface/data_designer.py +351 -0
- data_designer/interface/errors.py +16 -0
- data_designer/interface/results.py +55 -0
- data_designer/logging.py +161 -0
- data_designer/plugin_manager.py +83 -0
- data_designer/plugins/__init__.py +6 -0
- data_designer/plugins/errors.py +10 -0
- data_designer/plugins/plugin.py +69 -0
- data_designer/plugins/registry.py +86 -0
- data_designer-0.1.0.dist-info/METADATA +173 -0
- data_designer-0.1.0.dist-info/RECORD +177 -0
- data_designer-0.1.0.dist-info/WHEEL +4 -0
- data_designer-0.1.0.dist-info/entry_points.txt +2 -0
- data_designer-0.1.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from enum import Enum
|
|
5
|
+
from typing import Literal, Type, get_origin
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, model_validator
|
|
8
|
+
from typing_extensions import Self
|
|
9
|
+
|
|
10
|
+
from data_designer.config.base import ConfigBase
|
|
11
|
+
from data_designer.engine.configurable_task import ConfigurableTask
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class PluginType(str, Enum):
|
|
15
|
+
COLUMN_GENERATOR = "column-generator"
|
|
16
|
+
|
|
17
|
+
@property
|
|
18
|
+
def discriminator_field(self) -> str:
|
|
19
|
+
if self == PluginType.COLUMN_GENERATOR:
|
|
20
|
+
return "column_type"
|
|
21
|
+
else:
|
|
22
|
+
raise ValueError(f"Invalid plugin type: {self.value}")
|
|
23
|
+
|
|
24
|
+
@property
|
|
25
|
+
def display_name(self) -> str:
|
|
26
|
+
return self.value.replace("-", " ")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Plugin(BaseModel):
|
|
30
|
+
task_cls: Type[ConfigurableTask]
|
|
31
|
+
config_cls: Type[ConfigBase]
|
|
32
|
+
plugin_type: PluginType
|
|
33
|
+
emoji: str = "🔌"
|
|
34
|
+
|
|
35
|
+
@property
|
|
36
|
+
def config_type_as_class_name(self) -> str:
|
|
37
|
+
return self.enum_key_name.title().replace("_", "")
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def enum_key_name(self) -> str:
|
|
41
|
+
return self.name.replace("-", "_").upper()
|
|
42
|
+
|
|
43
|
+
@property
|
|
44
|
+
def name(self) -> str:
|
|
45
|
+
return self.config_cls.model_fields[self.discriminator_field].default
|
|
46
|
+
|
|
47
|
+
@property
|
|
48
|
+
def discriminator_field(self) -> str:
|
|
49
|
+
return self.plugin_type.discriminator_field
|
|
50
|
+
|
|
51
|
+
@model_validator(mode="after")
|
|
52
|
+
def validate_discriminator_field(self) -> Self:
|
|
53
|
+
cfg = self.config_cls.__name__
|
|
54
|
+
field = self.plugin_type.discriminator_field
|
|
55
|
+
if field not in self.config_cls.model_fields:
|
|
56
|
+
raise ValueError(f"Discriminator field '{field}' not found in config class {cfg}")
|
|
57
|
+
field_info = self.config_cls.model_fields[field]
|
|
58
|
+
if get_origin(field_info.annotation) is not Literal:
|
|
59
|
+
raise ValueError(f"Field '{field}' of {cfg} must be a Literal type, not {field_info.annotation}.")
|
|
60
|
+
if not isinstance(field_info.default, str):
|
|
61
|
+
raise ValueError(f"The default of '{field}' must be a string, not {type(field_info.default)}.")
|
|
62
|
+
enum_key = field_info.default.replace("-", "_").upper()
|
|
63
|
+
if not enum_key.isidentifier():
|
|
64
|
+
raise ValueError(
|
|
65
|
+
f"The default value '{field_info.default}' for discriminator field '{field}' "
|
|
66
|
+
f"cannot be converted to a valid enum key. The converted key '{enum_key}' "
|
|
67
|
+
f"must be a valid Python identifier."
|
|
68
|
+
)
|
|
69
|
+
return self
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from importlib.metadata import entry_points
|
|
5
|
+
import logging
|
|
6
|
+
import os
|
|
7
|
+
import threading
|
|
8
|
+
from typing import Type, TypeAlias
|
|
9
|
+
|
|
10
|
+
from typing_extensions import Self
|
|
11
|
+
|
|
12
|
+
from data_designer.plugins.errors import PluginNotFoundError
|
|
13
|
+
from data_designer.plugins.plugin import Plugin, PluginType
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
PLUGINS_DISABLED = os.getenv("DISABLE_DATA_DESIGNER_PLUGINS", "false").lower() == "true"
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class PluginRegistry:
|
|
22
|
+
_instance = None
|
|
23
|
+
_plugins_discovered = False
|
|
24
|
+
_lock = threading.Lock()
|
|
25
|
+
|
|
26
|
+
_plugins: dict[str, Plugin] = {}
|
|
27
|
+
|
|
28
|
+
def __init__(self):
|
|
29
|
+
with self._lock:
|
|
30
|
+
if not self._plugins_discovered:
|
|
31
|
+
self._discover()
|
|
32
|
+
|
|
33
|
+
@classmethod
|
|
34
|
+
def reset(cls) -> None:
|
|
35
|
+
with cls._lock:
|
|
36
|
+
cls._instance = None
|
|
37
|
+
cls._plugins_discovered = False
|
|
38
|
+
cls._plugins = {}
|
|
39
|
+
|
|
40
|
+
def add_plugin_types_to_union(self, type_union: Type[TypeAlias], plugin_type: PluginType) -> Type[TypeAlias]:
|
|
41
|
+
for plugin in self.get_plugins(plugin_type):
|
|
42
|
+
if plugin.config_cls not in type_union.__args__:
|
|
43
|
+
type_union |= plugin.config_cls
|
|
44
|
+
return type_union
|
|
45
|
+
|
|
46
|
+
def get_plugin(self, plugin_name: str) -> Plugin:
|
|
47
|
+
if plugin_name not in self._plugins:
|
|
48
|
+
raise PluginNotFoundError(f"Plugin {plugin_name!r} not found.")
|
|
49
|
+
return self._plugins[plugin_name]
|
|
50
|
+
|
|
51
|
+
def get_plugins(self, plugin_type: PluginType) -> list[Plugin]:
|
|
52
|
+
return [plugin for plugin in self._plugins.values() if plugin.plugin_type == plugin_type]
|
|
53
|
+
|
|
54
|
+
def get_plugin_names(self, plugin_type: PluginType) -> list[str]:
|
|
55
|
+
return [plugin.name for plugin in self.get_plugins(plugin_type)]
|
|
56
|
+
|
|
57
|
+
def num_plugins(self, plugin_type: PluginType) -> int:
|
|
58
|
+
return len(self.get_plugins(plugin_type))
|
|
59
|
+
|
|
60
|
+
def plugin_exists(self, plugin_name: str) -> bool:
|
|
61
|
+
return plugin_name in self._plugins
|
|
62
|
+
|
|
63
|
+
def _discover(self) -> Self:
|
|
64
|
+
if PLUGINS_DISABLED:
|
|
65
|
+
return self
|
|
66
|
+
for ep in entry_points(group="data_designer.plugins"):
|
|
67
|
+
try:
|
|
68
|
+
plugin = ep.load()
|
|
69
|
+
if isinstance(plugin, Plugin):
|
|
70
|
+
logger.info(
|
|
71
|
+
f"🔌 Plugin discovered ➜ {plugin.plugin_type.display_name} "
|
|
72
|
+
f"{plugin.enum_key_name} is now available ⚡️"
|
|
73
|
+
)
|
|
74
|
+
self._plugins[plugin.name] = plugin
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logger.warning(f"🛑 Failed to load plugin from entry point {ep.name!r}: {e}")
|
|
77
|
+
self._plugins_discovered = True
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def __new__(cls, *args, **kwargs):
|
|
81
|
+
"""Plugin manager is a singleton."""
|
|
82
|
+
if not cls._instance:
|
|
83
|
+
with cls._lock:
|
|
84
|
+
if not cls._instance:
|
|
85
|
+
cls._instance = super().__new__(cls)
|
|
86
|
+
return cls._instance
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: data-designer
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: General framework for synthetic data generation
|
|
5
|
+
License-File: LICENSE
|
|
6
|
+
Classifier: Development Status :: 4 - Beta
|
|
7
|
+
Classifier: Intended Audience :: Developers
|
|
8
|
+
Classifier: Intended Audience :: Science/Research
|
|
9
|
+
Classifier: License :: Other/Proprietary License
|
|
10
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Human Machine Interfaces
|
|
16
|
+
Classifier: Topic :: Software Development
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Requires-Dist: anyascii<1.0,>=0.3.3
|
|
19
|
+
Requires-Dist: datasets>=4.0.0
|
|
20
|
+
Requires-Dist: duckdb==1.1.3
|
|
21
|
+
Requires-Dist: faker==20.1.0
|
|
22
|
+
Requires-Dist: httpx-retries>=0.4.2
|
|
23
|
+
Requires-Dist: httpx>=0.27.2
|
|
24
|
+
Requires-Dist: huggingface-hub>=0.34.4
|
|
25
|
+
Requires-Dist: jinja2<4,>=3.1.6
|
|
26
|
+
Requires-Dist: json-repair==0.48.0
|
|
27
|
+
Requires-Dist: jsonpath-rust-bindings>=1.0
|
|
28
|
+
Requires-Dist: litellm==1.73.6
|
|
29
|
+
Requires-Dist: lxml>=6.0.2
|
|
30
|
+
Requires-Dist: marko==2.1.2
|
|
31
|
+
Requires-Dist: networkx==3.0
|
|
32
|
+
Requires-Dist: numpy>=1.23.5
|
|
33
|
+
Requires-Dist: pandas>=1.5.3
|
|
34
|
+
Requires-Dist: prompt-toolkit>=3.0.0
|
|
35
|
+
Requires-Dist: pyarrow>=19.0.1
|
|
36
|
+
Requires-Dist: pydantic>=2.9.2
|
|
37
|
+
Requires-Dist: pydantic[email]>=2.9.2
|
|
38
|
+
Requires-Dist: pygments>=2.19.2
|
|
39
|
+
Requires-Dist: python-json-logger==2.0.7
|
|
40
|
+
Requires-Dist: pyyaml>=6.0.1
|
|
41
|
+
Requires-Dist: requests<3,>=2.32.2
|
|
42
|
+
Requires-Dist: rich>=13.7.1
|
|
43
|
+
Requires-Dist: ruff==0.12.3
|
|
44
|
+
Requires-Dist: scipy>=1.11.0
|
|
45
|
+
Requires-Dist: sqlfluff==3.2.0
|
|
46
|
+
Requires-Dist: tiktoken>=0.8.0
|
|
47
|
+
Requires-Dist: typer>=0.12.0
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
|
|
50
|
+
# 🎨 NeMo Data Designer
|
|
51
|
+
|
|
52
|
+
[](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml)
|
|
53
|
+
[](https://opensource.org/licenses/Apache-2.0)
|
|
54
|
+
[](https://www.python.org/downloads/) [](https://docs.nvidia.com/nemo/microservices/latest/index.html)
|
|
55
|
+
|
|
56
|
+
**Generate high-quality synthetic datasets from scratch or using your own seed data.**
|
|
57
|
+
|
|
58
|
+
---
|
|
59
|
+
|
|
60
|
+
## Welcome!
|
|
61
|
+
|
|
62
|
+
Data Designer helps you create synthetic datasets that go beyond simple LLM prompting. Whether you need diverse statistical distributions, meaningful correlations between fields, or validated high-quality outputs, Data Designer provides a flexible framework for building production-grade synthetic data.
|
|
63
|
+
|
|
64
|
+
## What can you do with Data Designer?
|
|
65
|
+
|
|
66
|
+
- **Generate diverse data** using statistical samplers, LLMs, or existing seed datasets
|
|
67
|
+
- **Control relationships** between fields with dependency-aware generation
|
|
68
|
+
- **Validate quality** with built-in Python, SQL, and custom local and remote validators
|
|
69
|
+
- **Score outputs** using LLM-as-a-judge for quality assessment
|
|
70
|
+
- **Iterate quickly** with preview mode before full-scale generation
|
|
71
|
+
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
## Quick Start
|
|
75
|
+
|
|
76
|
+
### 1. Install
|
|
77
|
+
|
|
78
|
+
```bash
|
|
79
|
+
pip install data-designer
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
Or install from source:
|
|
83
|
+
|
|
84
|
+
```bash
|
|
85
|
+
git clone https://github.com/NVIDIA-NeMo/DataDesigner.git
|
|
86
|
+
cd DataDesigner
|
|
87
|
+
make install
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### 2. Set your API key
|
|
91
|
+
|
|
92
|
+
Get your API key from [build.nvidia.com](https://build.nvidia.com) or [OpenAI](https://platform.openai.com/api-keys):
|
|
93
|
+
|
|
94
|
+
```bash
|
|
95
|
+
export NVIDIA_API_KEY="your-api-key-here"
|
|
96
|
+
# Or use OpenAI
|
|
97
|
+
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### 3. Generate your first dataset
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
from data_designer.essentials import (
|
|
104
|
+
CategorySamplerParams,
|
|
105
|
+
DataDesigner,
|
|
106
|
+
DataDesignerConfigBuilder,
|
|
107
|
+
LLMTextColumnConfig,
|
|
108
|
+
PersonSamplerParams,
|
|
109
|
+
SamplerColumnConfig,
|
|
110
|
+
SamplerType,
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
# Initialize with default settings
|
|
114
|
+
data_designer = DataDesigner()
|
|
115
|
+
config_builder = DataDesignerConfigBuilder()
|
|
116
|
+
|
|
117
|
+
# Add a product category
|
|
118
|
+
config_builder.add_column(
|
|
119
|
+
SamplerColumnConfig(
|
|
120
|
+
name="product_category",
|
|
121
|
+
sampler_type=SamplerType.CATEGORY,
|
|
122
|
+
params=CategorySamplerParams(
|
|
123
|
+
values=["Electronics", "Clothing", "Home & Kitchen", "Books"],
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
# Generate personalized customer reviews
|
|
129
|
+
config_builder.add_column(
|
|
130
|
+
LLMTextColumnConfig(
|
|
131
|
+
name="review",
|
|
132
|
+
model_alias="nvidia-text",
|
|
133
|
+
prompt="""Write a brief product review for a {{ product_category }} item you recently purchased.""",
|
|
134
|
+
)
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# Preview your dataset
|
|
138
|
+
preview = data_designer.preview(config_builder=config_builder)
|
|
139
|
+
preview.display_sample_record()
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
**That's it!** You've created a dataset.
|
|
143
|
+
|
|
144
|
+
---
|
|
145
|
+
|
|
146
|
+
## What's next?
|
|
147
|
+
|
|
148
|
+
### 📚 Learn more
|
|
149
|
+
|
|
150
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner)** – Detailed walkthrough with more examples
|
|
151
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/1-the-basics/)** – Step-by-step interactive tutorials
|
|
152
|
+
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
153
|
+
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
|
|
154
|
+
|
|
155
|
+
### 🔧 Configure models via CLI
|
|
156
|
+
|
|
157
|
+
```bash
|
|
158
|
+
data-designer config providers # Configure model providers
|
|
159
|
+
data-designer config models # Set up your model configurations
|
|
160
|
+
data-designer config list # View current settings
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### 🤝 Get involved
|
|
164
|
+
|
|
165
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING.md)** – Help improve Data Designer
|
|
166
|
+
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or request features
|
|
167
|
+
- **[GitHub Discussions](https://github.com/NVIDIA-NeMo/DataDesigner/discussions)** – Ask questions and share ideas
|
|
168
|
+
|
|
169
|
+
---
|
|
170
|
+
|
|
171
|
+
## License
|
|
172
|
+
|
|
173
|
+
Apache License 2.0 – see [LICENSE](LICENSE) for details.
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
data_designer/__init__.py,sha256=iCeqRnb640RrL2QpA630GY5Ng7JiDt83Vq0DwLnNugU,461
|
|
2
|
+
data_designer/_version.py,sha256=5jwwVncvCiTnhOedfkzzxmxsggwmTBORdFL_4wq0ZeY,704
|
|
3
|
+
data_designer/errors.py,sha256=Z4eN9XwzZvGRdBluSNoSqQYkPPzNQIDf0ET_OqWRZh8,179
|
|
4
|
+
data_designer/logging.py,sha256=O6LlQRj4IdkvEEYiMkKfMb_ZDgN1YpkGQUCqcp7nY6w,5354
|
|
5
|
+
data_designer/plugin_manager.py,sha256=jWoo80x0oCiOIJMA43t-vK-_hVv9_xt4WhBcurYoDqw,3098
|
|
6
|
+
data_designer/cli/README.md,sha256=YRmrABHqesBmDG9KJvG8jNrhfekoKwya4YizFFAde9U,8463
|
|
7
|
+
data_designer/cli/__init__.py,sha256=kTfolrDol0izniNPXtuaUJ_oXRfJ-jGUPuVR5IwibEM,210
|
|
8
|
+
data_designer/cli/main.py,sha256=oTx-x0lIkgpa37ZBt12a4Cbj6uNcUowFxPUBG5B1pfk,1581
|
|
9
|
+
data_designer/cli/ui.py,sha256=5eHP42hFBF6TwLa-LmwL8reUWd8Lov_OCeZQp7XHBQY,13702
|
|
10
|
+
data_designer/cli/utils.py,sha256=6_ulYvu7DVsYzi1crl7dkx7McMOmQiQGFicvuRiw8Oc,1292
|
|
11
|
+
data_designer/cli/commands/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
12
|
+
data_designer/cli/commands/list.py,sha256=z6253gk37HZPGGRuoZgK3nAg0wyarnx5hidPJt-zJiA,4758
|
|
13
|
+
data_designer/cli/commands/models.py,sha256=QgWv-648eUN0yrVfUaqa2QpMEe4T54GZMwfwvglwNzc,387
|
|
14
|
+
data_designer/cli/commands/providers.py,sha256=BAtVUBGmUw4d4YFpUZNduIXtvIHZumxqoFs1strJWqI,450
|
|
15
|
+
data_designer/cli/commands/reset.py,sha256=ggRHpzQ616-881LqsPTifkFBLcbcraq-j15r0egFnYM,3473
|
|
16
|
+
data_designer/cli/controllers/__init__.py,sha256=j-wWZcZVsQNyKj1doKCtizBOTIVe6I3Y1cFloeT-zdI,347
|
|
17
|
+
data_designer/cli/controllers/model_controller.py,sha256=xkY_DPaLGY8i6tpfdcuCCS0VjJbqii3pvhH6xJLnaw0,8837
|
|
18
|
+
data_designer/cli/controllers/provider_controller.py,sha256=xq8pjxp10vdg2ZdFM_LMssSFHpev-70TcJtGQOiwa-Y,12196
|
|
19
|
+
data_designer/cli/forms/__init__.py,sha256=BGLbNJCHCgYiQWoAdTbUjzqgVlJymTQOV8sNWwKn-iI,672
|
|
20
|
+
data_designer/cli/forms/builder.py,sha256=QMCutZb7l3DeL4nXFGCUaiS1bxBu1BdaBWwlb1rmiIE,1690
|
|
21
|
+
data_designer/cli/forms/field.py,sha256=hWLTtxxRqpLBqFypxtghCcV7bH5yvVekLD99MvvjTZY,5505
|
|
22
|
+
data_designer/cli/forms/form.py,sha256=f6_LdSlk4kddB9a4mGotA-VlR2mlXAU_9RtLkbliI38,2025
|
|
23
|
+
data_designer/cli/forms/model_builder.py,sha256=wdKw933VCFRRlF8J2onzlYIVZxeDLGpJ1PP-RlFNeEU,4722
|
|
24
|
+
data_designer/cli/forms/provider_builder.py,sha256=xphQlNlnfDLm0XwqbPC6SJ3wXwlU45xVo_35Pe1EBdU,2895
|
|
25
|
+
data_designer/cli/repositories/__init__.py,sha256=RBOWAkIOOpr-L-kVz-PDIPmMXdlGLCinxkwvKS6bAB4,434
|
|
26
|
+
data_designer/cli/repositories/base.py,sha256=LQ0i_KrTdhS5o48qphlr4uWoAVrz02Lf_ZH1JIhcuBQ,1054
|
|
27
|
+
data_designer/cli/repositories/model_repository.py,sha256=yy4FQw_g-0y8d4OIv86CaxjxIY8Mt5GNXFpuB_nAnJ8,1446
|
|
28
|
+
data_designer/cli/repositories/provider_repository.py,sha256=GDVd6J_zqUFC4wxe78EC8VFD5wp2urIjfzqu01LbENY,1515
|
|
29
|
+
data_designer/cli/services/__init__.py,sha256=9FrVkrrLA4Oyv4lXoNXowJWUnjE5BhY6zejdkpVRppE,323
|
|
30
|
+
data_designer/cli/services/model_service.py,sha256=Fn3c0qMZqFAEqzBr0haLjp-nLKAkkaJEHACp8aG5eok,3885
|
|
31
|
+
data_designer/cli/services/provider_service.py,sha256=pdD2_C4yK0YBabcuan95H86UreZJ5zWFGI3Ue99mXXo,3916
|
|
32
|
+
data_designer/config/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
33
|
+
data_designer/config/base.py,sha256=xCbvwxXKRityWqeGP4zTXVuPHAOoUdpuQr8_t8vY8f8,2423
|
|
34
|
+
data_designer/config/column_configs.py,sha256=QG65him__Xj4d47YX8x7jgVOZz81FrB9C8hpWTGzxLM,16640
|
|
35
|
+
data_designer/config/column_types.py,sha256=V0Ijwb-asYOX-GQyG9W-X_A-FIbFSajKuus58sG8CSM,6774
|
|
36
|
+
data_designer/config/config_builder.py,sha256=NlAe6cwN6IAE90A8uPLsOdABmmYyUt6UnGYZwgmf_xE,27288
|
|
37
|
+
data_designer/config/data_designer_config.py,sha256=cvIXMVQzYn9vC4GINPz972pDBmt-HrV5dvw1568LVmE,1719
|
|
38
|
+
data_designer/config/dataset_builders.py,sha256=1pNFy_pkQ5lJ6AVZ43AeTuSbz6yC_l7Ndcyp5yaT8hQ,327
|
|
39
|
+
data_designer/config/datastore.py,sha256=oPC4jeupalPexhe8K2BkMSlPvDaOZWAyoDuaq9m-Uo4,6272
|
|
40
|
+
data_designer/config/default_model_settings.py,sha256=TMnxGQNAE7ipTmPF1R0qJBEUX199FWdTnjNiy5oR1Bo,4668
|
|
41
|
+
data_designer/config/errors.py,sha256=XneHH6tKHG2sZ71HzmPr7k3UBZ_psnSANknT30n-aa8,449
|
|
42
|
+
data_designer/config/interface.py,sha256=2_tHvxtKAv0C5L7K4ztm-Xa1A-u9Njlwo2drdPa2qmk,1499
|
|
43
|
+
data_designer/config/models.py,sha256=5Cy55BnKYyr-I1UHLUTqZxe6Ca9uVQWpUiwt9X0ZlrU,7521
|
|
44
|
+
data_designer/config/preview_results.py,sha256=H6ETFI6L1TW8MEC9KYsJ1tXGIC5cloCggBCCZd6jiEE,1087
|
|
45
|
+
data_designer/config/processors.py,sha256=qOF_plBoh6UEFNwUpyDgkqIuSDUaSM2S7k-kSAEB5p8,1328
|
|
46
|
+
data_designer/config/sampler_constraints.py,sha256=4JxP-nge5KstqtctJnVg5RLM1w9mA7qFi_BjgTJl9CE,1167
|
|
47
|
+
data_designer/config/sampler_params.py,sha256=rrub7LPnXb032ClEZfo0eB0WhMekW8DFH8yr20xSz3s,25759
|
|
48
|
+
data_designer/config/seed.py,sha256=g-iUToYSIFuTv3sbwSG_dF-9RwC8r8AvCD-vS8c_jDg,5487
|
|
49
|
+
data_designer/config/validator_params.py,sha256=sNxFIF2bk_N4jJD-aMH1N5MQynDip08AoMI1ajxtRdc,3909
|
|
50
|
+
data_designer/config/analysis/column_profilers.py,sha256=Qss9gr7oHNcjijW_MMIX9JkFX-V9v5vPwYWCnxLjMDY,2749
|
|
51
|
+
data_designer/config/analysis/column_statistics.py,sha256=399XYRzUPc4mEPEuU9zMCX_rN2VxkI39tYbcqFgY39k,10516
|
|
52
|
+
data_designer/config/analysis/dataset_profiler.py,sha256=BKHpz_Yx85MKaK4X2-dduABl9UJDuatVSWkn0qpU5JI,2766
|
|
53
|
+
data_designer/config/analysis/utils/errors.py,sha256=1b50TtzSBdJU5gMlskVKXGFh_VprXN0fQOodfpGRrVs,279
|
|
54
|
+
data_designer/config/analysis/utils/reporting.py,sha256=bX3CzxWT5lrJLXKXVA1THNMLXo_vWnyELCTnn2gMzSo,6958
|
|
55
|
+
data_designer/config/utils/code_lang.py,sha256=NtBOfK08D6EjKSYvUo5Qo61SQxbtW4cieyaMKYFpZD8,2410
|
|
56
|
+
data_designer/config/utils/constants.py,sha256=POo0bU0brd1Irv-EUlvN5CFarScjU9k4ZK7-4ky1ks0,6547
|
|
57
|
+
data_designer/config/utils/errors.py,sha256=zKx3NDnvrr4iNaAqb9H1lu1EfyvFAOjjumqSG111jjI,468
|
|
58
|
+
data_designer/config/utils/info.py,sha256=4amyxkLjsQqKcjUt9foDE-PepyPUqwgOgdLtNNnMjnI,3321
|
|
59
|
+
data_designer/config/utils/io_helpers.py,sha256=-lKVbB1csM5vS_zQOfdJ_WZHS9qhyfgPerGvH2BWqIU,9162
|
|
60
|
+
data_designer/config/utils/misc.py,sha256=9wDWldkZXA_Ixc6U82pb7TChrc1uPB1g9_2FOrwR-Cs,2492
|
|
61
|
+
data_designer/config/utils/numerical_helpers.py,sha256=SPJNUKpw8H5zmqbD9sCcaziUibGcAL6accM3QwvEWhw,777
|
|
62
|
+
data_designer/config/utils/type_helpers.py,sha256=C5G1wXFm3OV_Nx4zvqZcSEizdpq6nW7CyV_lV_KcjDo,3963
|
|
63
|
+
data_designer/config/utils/validation.py,sha256=1Tj-YeUQf6YJnoruFDZoO7mcz-dydfpB365Ee2qyzSg,12970
|
|
64
|
+
data_designer/config/utils/visualization.py,sha256=Nx_bAysdFOlXWFqrrxSnqSKEcHLDJZSSfRpfEMX4qXo,15611
|
|
65
|
+
data_designer/engine/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
66
|
+
data_designer/engine/configurable_task.py,sha256=GnaBG6xVBQ1ELpzumNctwKYZJvKKjh2LMKhws4W2GS4,3124
|
|
67
|
+
data_designer/engine/errors.py,sha256=8XevBxlUtQbwm5FbuEihWWNKwETH6AffJCD93-66aZ4,1250
|
|
68
|
+
data_designer/engine/model_provider.py,sha256=w_7EZpDKgOLdzmCWJ6v6oKdM0GdRijir3iK102bBtg8,2782
|
|
69
|
+
data_designer/engine/secret_resolver.py,sha256=wPseqPoLgOtyufyq6e8lN0wR0W8ZhEXh2PbjxHDZUrc,2427
|
|
70
|
+
data_designer/engine/analysis/column_statistics.py,sha256=_4coay-IeVViRtQI8JybD_TNVtsVuT7hh6yDdb8Td7Y,5748
|
|
71
|
+
data_designer/engine/analysis/dataset_profiler.py,sha256=sSOMMQLRtdbL8czQ92dAfOPzgE9pFNYnslGs9jcZLrU,5849
|
|
72
|
+
data_designer/engine/analysis/errors.py,sha256=VBeKREcPcInWhjAo3U2x_9UnJBi8zcGnUjLXNippPtA,255
|
|
73
|
+
data_designer/engine/analysis/column_profilers/base.py,sha256=lm3ZvMGfX_gvclOkzGxdH0lC0H0B2gjvxmKh6rKITPg,1921
|
|
74
|
+
data_designer/engine/analysis/column_profilers/judge_score_profiler.py,sha256=Ekk8wslnoIwrha4ynxL3RC7Hwd3PXbKSs7MjDybjN9Q,6823
|
|
75
|
+
data_designer/engine/analysis/column_profilers/registry.py,sha256=GpudnadaJxb8ub_YZzFpu1JzLYe8zdMjYGCPSHtMdJs,882
|
|
76
|
+
data_designer/engine/analysis/utils/column_statistics_calculations.py,sha256=XgpIBEVZn0A7NEGFSkl4QEe4Vu9Z_R0ugqU9b9pqLng,8199
|
|
77
|
+
data_designer/engine/analysis/utils/judge_score_processing.py,sha256=lkZ0jQh7WqjJeB_mkIFQXNZNMJksHFKFRIP2jK5b7Dc,4798
|
|
78
|
+
data_designer/engine/column_generators/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
79
|
+
data_designer/engine/column_generators/registry.py,sha256=EG2yxV_Zvp3xlQTkEBaoNscYk0SiZ-oIYB9H2264Yv8,2836
|
|
80
|
+
data_designer/engine/column_generators/generators/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
81
|
+
data_designer/engine/column_generators/generators/base.py,sha256=RhFyAKDEEwvwnsQbvnQhevikmCNxoVTDaZOt4_V0WVo,1834
|
|
82
|
+
data_designer/engine/column_generators/generators/expression.py,sha256=7xniEj8aPscWDYLrnNbG2mF3s08C7aR8ZgNUCzr_x8g,2539
|
|
83
|
+
data_designer/engine/column_generators/generators/llm_generators.py,sha256=jXbUCn3ZOk3e8ZE8nrQ1zR-PyIBuogtdvZfZeZqtZsU,6780
|
|
84
|
+
data_designer/engine/column_generators/generators/samplers.py,sha256=UyoDcuS6nKw7HulycqvRWEjcwSQsjWdQPvs8Yx9yLhs,3579
|
|
85
|
+
data_designer/engine/column_generators/generators/seed_dataset.py,sha256=QUegAT55AxyBHY5VhAtJKv9BRgGJ2jxN0Yff7YvkLDI,7018
|
|
86
|
+
data_designer/engine/column_generators/generators/validation.py,sha256=MbDFXzieftv6-77rRdltNUnquUe5FxCVkBEHsAwvwh4,6591
|
|
87
|
+
data_designer/engine/column_generators/utils/errors.py,sha256=ugNwaqnPdrPZI7YnKLbYwFjYUSm0WAzgaVu_u6i5Rc8,365
|
|
88
|
+
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=JRoaZgRGK24dH0zx7MNGSccK196tQK_l0sbwNkurg7c,2132
|
|
89
|
+
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=d4tbyPsgmFDikW3nxL5is9RNaajMkoPDCrfkQkxw7rc,4760
|
|
90
|
+
data_designer/engine/dataset_builders/artifact_storage.py,sha256=NlO8H4g4ZaI5iDwI-xnhyyKGTdLX5JunqQuiQNXW-yI,7303
|
|
91
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=pu7mJIc5Ld4TLeTDsh9sCzKHgCbe7cC5PDF4RmxXw8o,13077
|
|
92
|
+
data_designer/engine/dataset_builders/errors.py,sha256=1kChleChG4rASWIiL4Bel6Ox6aFZjQUrh5ogPt1CDWo,359
|
|
93
|
+
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=t28fhI-WRIBohFnAJ80l5EAETEDB5rJ5RSWInMiRfyE,1619
|
|
94
|
+
data_designer/engine/dataset_builders/utils/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
95
|
+
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=n85ei99y5kHy6TrdQ8L0bUO90GmTZSckH5_FDz4sZpU,7361
|
|
96
|
+
data_designer/engine/dataset_builders/utils/config_compiler.py,sha256=verC3CBA0MjuTQN32RBX10fFvVOefG-DnPDF5Ql2hjg,2402
|
|
97
|
+
data_designer/engine/dataset_builders/utils/dag.py,sha256=8h7jEu0XiYGSKHIe4CGFi6SC9HGyAgvkD23ZECNWDC0,2388
|
|
98
|
+
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=oMzMrUbHVosbdq44FsT20zp_csYwbTWAMhkeLiJF7aM,7769
|
|
99
|
+
data_designer/engine/dataset_builders/utils/errors.py,sha256=qW_TFOKNVODbb8bYrUlbqMAkheDAg12DDo3RmAhHrCg,370
|
|
100
|
+
data_designer/engine/models/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
101
|
+
data_designer/engine/models/errors.py,sha256=nS39JvQG0aKFQGds8cizjbSE5zRZvb-l-40odtP02Vc,12174
|
|
102
|
+
data_designer/engine/models/facade.py,sha256=MFYj0C4jn4o2ezIFP4IKScmSkFn815kbRQueRYM1fx4,9877
|
|
103
|
+
data_designer/engine/models/litellm_overrides.py,sha256=Rm6wIQrbVQsVcAOd70LxjuuWv2LdsJQdTDuTtK0OrfQ,5563
|
|
104
|
+
data_designer/engine/models/registry.py,sha256=KuEiZ5c-0mg9nt6EOMK8sCRoaBcPXV2KXz3sNTqDjJA,4741
|
|
105
|
+
data_designer/engine/models/usage.py,sha256=HOwZxAuG5NNoJx8ZxzXkwJYlbP5Juprpy6STotJ6d14,2470
|
|
106
|
+
data_designer/engine/models/utils.py,sha256=91oPXpHsnER1rEWxIkBhphlln8VOuTOoLGJL9eyWYBo,1254
|
|
107
|
+
data_designer/engine/models/parsers/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
108
|
+
data_designer/engine/models/parsers/errors.py,sha256=JhhZNY4USK1lPUhwOLxFCJXn0TvwSWwvEwBZzZd-T5o,1050
|
|
109
|
+
data_designer/engine/models/parsers/parser.py,sha256=RPdTcBK-aupnoL8DJT_ria12XdY_E-UtwI6qiRBJZko,9484
|
|
110
|
+
data_designer/engine/models/parsers/postprocessors.py,sha256=6oBqeP-_kgll_X5Y9o8Wmt0FGSY5L8_H3a3CAWzr-Is,2893
|
|
111
|
+
data_designer/engine/models/parsers/tag_parsers.py,sha256=Aa0XpZzM4LkQ_vt3GsemaxPNWgOEzrBijAN1p-ub8w0,1995
|
|
112
|
+
data_designer/engine/models/parsers/types.py,sha256=pPc1PoOzBHZG02rdKJfb18ZO8Am4PP93837RKgCWSbE,2608
|
|
113
|
+
data_designer/engine/models/recipes/base.py,sha256=fKez8YoLJrsKYfJDvFN9YyxNYIwvgf50r53A72ESIG8,2589
|
|
114
|
+
data_designer/engine/models/recipes/response_recipes.py,sha256=f_t_rtU_ZbfHvPgTIFyh4RNH7LCMxsr8cV_5-n-mniQ,10148
|
|
115
|
+
data_designer/engine/processing/utils.py,sha256=79Omd7g9OB3sswme7kbw5E3dRtIBBgPdAh8fyyaamA4,4002
|
|
116
|
+
data_designer/engine/processing/ginja/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
117
|
+
data_designer/engine/processing/ginja/ast.py,sha256=JsF8uAVwG5fZ9hYvYH9b8zwdQ6ccKCXkVHqORUZHI8o,1960
|
|
118
|
+
data_designer/engine/processing/ginja/environment.py,sha256=69Cm75MGOoGGn99PaUjeJf-O4k5X1bE1S-EwbxZpK08,18933
|
|
119
|
+
data_designer/engine/processing/ginja/exceptions.py,sha256=4XsrC7cCGX1JbM6Pid_MhsuIw7YgdRgw99HCHQJ3pCM,1885
|
|
120
|
+
data_designer/engine/processing/ginja/record.py,sha256=ulEmWvjJispH_VaIuvAloSFlV-HABQ0y76s-RiTV4qw,1059
|
|
121
|
+
data_designer/engine/processing/gsonschema/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
122
|
+
data_designer/engine/processing/gsonschema/exceptions.py,sha256=IoMlQE-eRJcBUlzKnkCCBSVSlGjsoYZSE0OVwcikxlI,281
|
|
123
|
+
data_designer/engine/processing/gsonschema/schema_transformers.py,sha256=__-dfrCFxDs5-XcTzi1Z-FZL9z0eWUS7Zppr32OxgiY,3066
|
|
124
|
+
data_designer/engine/processing/gsonschema/types.py,sha256=-x_K2HrVnZ_Z7fzYl4T2Gd7QHf6B6ADvn7E7iYvw5Kc,313
|
|
125
|
+
data_designer/engine/processing/gsonschema/validators.py,sha256=S7exUs9MIvu-vvIyRiDiepxCrVGZM2bcpytoAu4AX5w,4704
|
|
126
|
+
data_designer/engine/processing/processors/base.py,sha256=WJl7_0dtiUppjfY-lrQ3lDiIgYqRDSEYUwSAQNN7nFE,548
|
|
127
|
+
data_designer/engine/processing/processors/drop_columns.py,sha256=-ATddFz8efrM2jwiG6w7vgtj48VVy4ZoDvSbGY0aZfY,2050
|
|
128
|
+
data_designer/engine/processing/processors/registry.py,sha256=2zr91IjEMy7duN43fkborPekXohA_X1J8BSKVc1rJKk,804
|
|
129
|
+
data_designer/engine/registry/base.py,sha256=8h5MRPccLGSGcss3qFoQ-i7XGzvn8gdiRR0tYr7mDgk,3544
|
|
130
|
+
data_designer/engine/registry/data_designer_registry.py,sha256=0nO7JEezwc2wnnDRKAX5BZz6RhBI3-kNU3Eb1WAdCFI,1487
|
|
131
|
+
data_designer/engine/registry/errors.py,sha256=nO794QVy4DovKGKWEjycVDN9cdDlH-skbZLTb354M3Y,309
|
|
132
|
+
data_designer/engine/resources/managed_dataset_generator.py,sha256=KXrWdgod-NFaCZvmWSwoJKp2daQgqf8XBIVXvrk6fHI,1369
|
|
133
|
+
data_designer/engine/resources/managed_dataset_repository.py,sha256=lqVxuoCxc07QTrhnAR1mgDiHFkzjjkx2IwcrxrdbloY,7547
|
|
134
|
+
data_designer/engine/resources/managed_storage.py,sha256=jRnGeCTGlu6FxC6tOCssPiSpbHEf0mbqFfm3mM0utdA,2079
|
|
135
|
+
data_designer/engine/resources/resource_provider.py,sha256=CbB2D538ECGkvyHF1V63_TDn-wStCoklV7bF0y4mabY,1859
|
|
136
|
+
data_designer/engine/resources/seed_dataset_data_store.py,sha256=uD8g_7dmVvGmOIG5NMnkMok_0zSdEHVQ1kQcfFqWIG4,2226
|
|
137
|
+
data_designer/engine/sampling_gen/column.py,sha256=gDIPth7vK2797rGtLhf_kVGMAC-khefKHodeeDoqV-I,3946
|
|
138
|
+
data_designer/engine/sampling_gen/constraints.py,sha256=RyhRF9KeUOwEiHr_TN3QwLWOVLTpuCFpCI_3Qr-9Whs,3028
|
|
139
|
+
data_designer/engine/sampling_gen/errors.py,sha256=UBZBtosD07EisCdeo8r-Uq4h0QL3tYS1qwtEmca8_jM,828
|
|
140
|
+
data_designer/engine/sampling_gen/generator.py,sha256=Oz84bVLKE7nhh_PsonizZaa0JIhtoKqQ80OepmVwTQk,5428
|
|
141
|
+
data_designer/engine/sampling_gen/jinja_utils.py,sha256=7495GZHfp6jht48hKOwxq1AGU78QLEH2yDyPef9IRm8,2010
|
|
142
|
+
data_designer/engine/sampling_gen/people_gen.py,sha256=N8K2MEofaBcaVuJGfym6SyJW-N-Y9D2qd3sXe1Fo7PI,8552
|
|
143
|
+
data_designer/engine/sampling_gen/person_constants.py,sha256=e7xjnt_db2dJFS9a9kGRUezEdaLyPU_-XDNBbWwwE0E,1161
|
|
144
|
+
data_designer/engine/sampling_gen/schema.py,sha256=_qhmBB7a-nUYUG79mwNjFPQvk50LYVCl-qBlQ8Gq_Ns,6131
|
|
145
|
+
data_designer/engine/sampling_gen/schema_builder.py,sha256=8glDmLQg0G9xfXXYz-z_1zJHJBnRtXXrFwsLhOw8boc,2272
|
|
146
|
+
data_designer/engine/sampling_gen/utils.py,sha256=c-42r4VNiZG28wWqeenAFh-7CZufAGRxnihxj6C_O14,1355
|
|
147
|
+
data_designer/engine/sampling_gen/data_sources/base.py,sha256=BRU9pzDvgB5B1Mgtj8UT6x_C0qPLEwtBBWzgYgQ9sNw,7364
|
|
148
|
+
data_designer/engine/sampling_gen/data_sources/errors.py,sha256=5pq42e5yvUqaH-g09jWvJolYCO2I2Rdrqo1O0gwet8Y,326
|
|
149
|
+
data_designer/engine/sampling_gen/data_sources/sources.py,sha256=63YaRau37NIc2TDn8JvTOsd0zfnY4_aaF9UOU5ryKSo,13387
|
|
150
|
+
data_designer/engine/sampling_gen/entities/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
151
|
+
data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py,sha256=-_ebkhKeRYtlGpY8ZKGuc40aJfeWQahW2L-BBRxRnO0,1316
|
|
152
|
+
data_designer/engine/sampling_gen/entities/email_address_utils.py,sha256=-V4zuuFq1t3nzzO_FqzCWApPcWNKAh-ZQYFMmCiu5RE,5231
|
|
153
|
+
data_designer/engine/sampling_gen/entities/errors.py,sha256=QEq-6Ld9OlModEYbse0pvY21OC5CyO-OalrL03-iLME,311
|
|
154
|
+
data_designer/engine/sampling_gen/entities/national_id_utils.py,sha256=vxxHnrfQP98W8dWGysCjvfIT-h1xEGdfxn5xF_-UeXw,2611
|
|
155
|
+
data_designer/engine/sampling_gen/entities/person.py,sha256=CMzpjg4-y84VTb1mFjJwjjzMiNPHe2Nmo6GQJUO-fsA,5678
|
|
156
|
+
data_designer/engine/sampling_gen/entities/phone_number.py,sha256=fbGE_dbJD6ESlQwAY-PY1lLjjetb7mYQjHlgvXiM5XM,4703
|
|
157
|
+
data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet,sha256=L6G4laXExB7uRCWHlF4XGDk0yMh41jbDnp9LIy7jNHM,576064
|
|
158
|
+
data_designer/engine/validators/__init__.py,sha256=lMouN5BTbDNi31KfTQNjV7qrL46q-ssejXNT24iDTGI,652
|
|
159
|
+
data_designer/engine/validators/base.py,sha256=85wlVW0NOil223-WQkT7g8Ckxo8zUFv5kFKl-o9Ggec,977
|
|
160
|
+
data_designer/engine/validators/local_callable.py,sha256=oCUXj_NRt0gVqUIh0fLrvw-iURDR6OHFrVi5GOMhXj8,1387
|
|
161
|
+
data_designer/engine/validators/python.py,sha256=_k5WFY7eFlUkZqJUFw5mL9GrdLGgBqq9f3LfFBXrs08,7871
|
|
162
|
+
data_designer/engine/validators/remote.py,sha256=jtDIvWzfHh17m2ac_Fp93p49Th8RlkBzzih2jiqD7gk,2929
|
|
163
|
+
data_designer/engine/validators/sql.py,sha256=bxbyxPxDT9yuwjhABVEY40iR1pzWRFi65WU4tPgG2bE,2250
|
|
164
|
+
data_designer/essentials/__init__.py,sha256=zrDZ7hahOmOhCPdfoj0z9ALN10lXIesfwd2qXRqTcdY,4125
|
|
165
|
+
data_designer/interface/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
166
|
+
data_designer/interface/data_designer.py,sha256=yh_lqEvL0LoqXX-KYDflVjVp8yGFkhSUe_yzZxtV__Q,14904
|
|
167
|
+
data_designer/interface/errors.py,sha256=jagKT3tPUnYq4e3e6AkTnBkcayHyEfxjPMBzx-GEKe4,565
|
|
168
|
+
data_designer/interface/results.py,sha256=qFxa8SuCXeADiRpaCMBwJcExkJBCfUPeGCdcJSTjoTc,2111
|
|
169
|
+
data_designer/plugins/__init__.py,sha256=c_V7q4QhfVoNf_uc9UwmXCsWqwtyWogI7YoN_0PzzE4,234
|
|
170
|
+
data_designer/plugins/errors.py,sha256=yPIHpSddEr-o9ZcNVibb2hI-73O15Kg_Od8SlmQlnRs,297
|
|
171
|
+
data_designer/plugins/plugin.py,sha256=7ErdUyrTdOb5PCBE3msdhTOrvQpldjOQw90-Bu4Bosc,2522
|
|
172
|
+
data_designer/plugins/registry.py,sha256=iPDTh4duV1cKt7H1fXkj1bKLG6SyUKmzQ9xh-vjEoaM,3018
|
|
173
|
+
data_designer-0.1.0.dist-info/METADATA,sha256=pW_EXcja79dhuYz8nL5RuenZqpBSEnS8r85TY6B87dc,5918
|
|
174
|
+
data_designer-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
175
|
+
data_designer-0.1.0.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
|
|
176
|
+
data_designer-0.1.0.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
|
|
177
|
+
data_designer-0.1.0.dist-info/RECORD,,
|