data-designer 0.3.8rc1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/cli/commands/__init__.py +1 -1
- data_designer/interface/__init__.py +21 -1
- data_designer/{_version.py → interface/_version.py} +2 -2
- data_designer/interface/data_designer.py +8 -11
- {data_designer-0.3.8rc1.dist-info → data_designer-0.4.0.dist-info}/METADATA +10 -42
- data_designer-0.4.0.dist-info/RECORD +39 -0
- data_designer/__init__.py +0 -17
- data_designer/config/__init__.py +0 -2
- data_designer/config/analysis/__init__.py +0 -2
- data_designer/config/analysis/column_profilers.py +0 -159
- data_designer/config/analysis/column_statistics.py +0 -421
- data_designer/config/analysis/dataset_profiler.py +0 -84
- data_designer/config/analysis/utils/errors.py +0 -10
- data_designer/config/analysis/utils/reporting.py +0 -192
- data_designer/config/base.py +0 -69
- data_designer/config/column_configs.py +0 -470
- data_designer/config/column_types.py +0 -141
- data_designer/config/config_builder.py +0 -595
- data_designer/config/data_designer_config.py +0 -40
- data_designer/config/dataset_builders.py +0 -13
- data_designer/config/dataset_metadata.py +0 -18
- data_designer/config/default_model_settings.py +0 -121
- data_designer/config/errors.py +0 -24
- data_designer/config/exports.py +0 -145
- data_designer/config/interface.py +0 -55
- data_designer/config/models.py +0 -455
- data_designer/config/preview_results.py +0 -41
- data_designer/config/processors.py +0 -148
- data_designer/config/run_config.py +0 -48
- data_designer/config/sampler_constraints.py +0 -52
- data_designer/config/sampler_params.py +0 -639
- data_designer/config/seed.py +0 -116
- data_designer/config/seed_source.py +0 -84
- data_designer/config/seed_source_types.py +0 -19
- data_designer/config/utils/code_lang.py +0 -82
- data_designer/config/utils/constants.py +0 -363
- data_designer/config/utils/errors.py +0 -21
- data_designer/config/utils/info.py +0 -94
- data_designer/config/utils/io_helpers.py +0 -258
- data_designer/config/utils/misc.py +0 -78
- data_designer/config/utils/numerical_helpers.py +0 -30
- data_designer/config/utils/type_helpers.py +0 -106
- data_designer/config/utils/visualization.py +0 -482
- data_designer/config/validator_params.py +0 -94
- data_designer/engine/__init__.py +0 -2
- data_designer/engine/analysis/column_profilers/base.py +0 -49
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
- data_designer/engine/analysis/column_profilers/registry.py +0 -22
- data_designer/engine/analysis/column_statistics.py +0 -145
- data_designer/engine/analysis/dataset_profiler.py +0 -149
- data_designer/engine/analysis/errors.py +0 -9
- data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
- data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
- data_designer/engine/column_generators/__init__.py +0 -2
- data_designer/engine/column_generators/generators/__init__.py +0 -2
- data_designer/engine/column_generators/generators/base.py +0 -122
- data_designer/engine/column_generators/generators/embedding.py +0 -35
- data_designer/engine/column_generators/generators/expression.py +0 -55
- data_designer/engine/column_generators/generators/llm_completion.py +0 -113
- data_designer/engine/column_generators/generators/samplers.py +0 -69
- data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
- data_designer/engine/column_generators/generators/validation.py +0 -140
- data_designer/engine/column_generators/registry.py +0 -60
- data_designer/engine/column_generators/utils/errors.py +0 -15
- data_designer/engine/column_generators/utils/generator_classification.py +0 -43
- data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
- data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
- data_designer/engine/compiler.py +0 -97
- data_designer/engine/configurable_task.py +0 -71
- data_designer/engine/dataset_builders/artifact_storage.py +0 -283
- data_designer/engine/dataset_builders/column_wise_builder.py +0 -338
- data_designer/engine/dataset_builders/errors.py +0 -15
- data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
- data_designer/engine/dataset_builders/utils/__init__.py +0 -2
- data_designer/engine/dataset_builders/utils/concurrency.py +0 -215
- data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
- data_designer/engine/dataset_builders/utils/dag.py +0 -62
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
- data_designer/engine/dataset_builders/utils/errors.py +0 -15
- data_designer/engine/errors.py +0 -51
- data_designer/engine/model_provider.py +0 -77
- data_designer/engine/models/__init__.py +0 -2
- data_designer/engine/models/errors.py +0 -300
- data_designer/engine/models/facade.py +0 -287
- data_designer/engine/models/factory.py +0 -42
- data_designer/engine/models/litellm_overrides.py +0 -179
- data_designer/engine/models/parsers/__init__.py +0 -2
- data_designer/engine/models/parsers/errors.py +0 -34
- data_designer/engine/models/parsers/parser.py +0 -235
- data_designer/engine/models/parsers/postprocessors.py +0 -93
- data_designer/engine/models/parsers/tag_parsers.py +0 -62
- data_designer/engine/models/parsers/types.py +0 -84
- data_designer/engine/models/recipes/base.py +0 -81
- data_designer/engine/models/recipes/response_recipes.py +0 -293
- data_designer/engine/models/registry.py +0 -146
- data_designer/engine/models/telemetry.py +0 -359
- data_designer/engine/models/usage.py +0 -73
- data_designer/engine/models/utils.py +0 -38
- data_designer/engine/processing/ginja/__init__.py +0 -2
- data_designer/engine/processing/ginja/ast.py +0 -65
- data_designer/engine/processing/ginja/environment.py +0 -463
- data_designer/engine/processing/ginja/exceptions.py +0 -56
- data_designer/engine/processing/ginja/record.py +0 -32
- data_designer/engine/processing/gsonschema/__init__.py +0 -2
- data_designer/engine/processing/gsonschema/exceptions.py +0 -15
- data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
- data_designer/engine/processing/gsonschema/types.py +0 -10
- data_designer/engine/processing/gsonschema/validators.py +0 -202
- data_designer/engine/processing/processors/base.py +0 -13
- data_designer/engine/processing/processors/drop_columns.py +0 -42
- data_designer/engine/processing/processors/registry.py +0 -25
- data_designer/engine/processing/processors/schema_transform.py +0 -49
- data_designer/engine/processing/utils.py +0 -169
- data_designer/engine/registry/base.py +0 -99
- data_designer/engine/registry/data_designer_registry.py +0 -39
- data_designer/engine/registry/errors.py +0 -12
- data_designer/engine/resources/managed_dataset_generator.py +0 -39
- data_designer/engine/resources/managed_dataset_repository.py +0 -197
- data_designer/engine/resources/managed_storage.py +0 -65
- data_designer/engine/resources/resource_provider.py +0 -77
- data_designer/engine/resources/seed_reader.py +0 -154
- data_designer/engine/sampling_gen/column.py +0 -91
- data_designer/engine/sampling_gen/constraints.py +0 -100
- data_designer/engine/sampling_gen/data_sources/base.py +0 -217
- data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
- data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
- data_designer/engine/sampling_gen/entities/__init__.py +0 -2
- data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
- data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
- data_designer/engine/sampling_gen/entities/errors.py +0 -10
- data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
- data_designer/engine/sampling_gen/entities/person.py +0 -144
- data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
- data_designer/engine/sampling_gen/errors.py +0 -26
- data_designer/engine/sampling_gen/generator.py +0 -122
- data_designer/engine/sampling_gen/jinja_utils.py +0 -64
- data_designer/engine/sampling_gen/people_gen.py +0 -199
- data_designer/engine/sampling_gen/person_constants.py +0 -56
- data_designer/engine/sampling_gen/schema.py +0 -147
- data_designer/engine/sampling_gen/schema_builder.py +0 -61
- data_designer/engine/sampling_gen/utils.py +0 -46
- data_designer/engine/secret_resolver.py +0 -82
- data_designer/engine/validation.py +0 -367
- data_designer/engine/validators/__init__.py +0 -19
- data_designer/engine/validators/base.py +0 -38
- data_designer/engine/validators/local_callable.py +0 -39
- data_designer/engine/validators/python.py +0 -254
- data_designer/engine/validators/remote.py +0 -89
- data_designer/engine/validators/sql.py +0 -65
- data_designer/errors.py +0 -7
- data_designer/essentials/__init__.py +0 -33
- data_designer/lazy_heavy_imports.py +0 -54
- data_designer/logging.py +0 -163
- data_designer/plugin_manager.py +0 -78
- data_designer/plugins/__init__.py +0 -8
- data_designer/plugins/errors.py +0 -15
- data_designer/plugins/plugin.py +0 -141
- data_designer/plugins/registry.py +0 -88
- data_designer/plugins/testing/__init__.py +0 -10
- data_designer/plugins/testing/stubs.py +0 -116
- data_designer/plugins/testing/utils.py +0 -20
- data_designer-0.3.8rc1.dist-info/RECORD +0 -196
- data_designer-0.3.8rc1.dist-info/licenses/LICENSE +0 -201
- {data_designer-0.3.8rc1.dist-info → data_designer-0.4.0.dist-info}/WHEEL +0 -0
- {data_designer-0.3.8rc1.dist-info → data_designer-0.4.0.dist-info}/entry_points.txt +0 -0
|
@@ -1,121 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import logging
|
|
7
|
-
import os
|
|
8
|
-
from functools import lru_cache
|
|
9
|
-
from pathlib import Path
|
|
10
|
-
from typing import Any, Literal
|
|
11
|
-
|
|
12
|
-
from data_designer.config.models import (
|
|
13
|
-
ChatCompletionInferenceParams,
|
|
14
|
-
EmbeddingInferenceParams,
|
|
15
|
-
InferenceParamsT,
|
|
16
|
-
ModelConfig,
|
|
17
|
-
ModelProvider,
|
|
18
|
-
)
|
|
19
|
-
from data_designer.config.utils.constants import (
|
|
20
|
-
MANAGED_ASSETS_PATH,
|
|
21
|
-
MODEL_CONFIGS_FILE_PATH,
|
|
22
|
-
MODEL_PROVIDERS_FILE_PATH,
|
|
23
|
-
PREDEFINED_PROVIDERS,
|
|
24
|
-
PREDEFINED_PROVIDERS_MODEL_MAP,
|
|
25
|
-
)
|
|
26
|
-
from data_designer.config.utils.io_helpers import load_config_file, save_config_file
|
|
27
|
-
|
|
28
|
-
logger = logging.getLogger(__name__)
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
def get_default_inference_parameters(
|
|
32
|
-
model_alias: Literal["text", "reasoning", "vision", "embedding"],
|
|
33
|
-
inference_parameters: dict[str, Any],
|
|
34
|
-
) -> InferenceParamsT:
|
|
35
|
-
if model_alias == "reasoning":
|
|
36
|
-
return ChatCompletionInferenceParams(**inference_parameters)
|
|
37
|
-
elif model_alias == "vision":
|
|
38
|
-
return ChatCompletionInferenceParams(**inference_parameters)
|
|
39
|
-
elif model_alias == "embedding":
|
|
40
|
-
return EmbeddingInferenceParams(**inference_parameters)
|
|
41
|
-
else:
|
|
42
|
-
return ChatCompletionInferenceParams(**inference_parameters)
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def get_builtin_model_configs() -> list[ModelConfig]:
|
|
46
|
-
model_configs = []
|
|
47
|
-
for provider, model_alias_map in PREDEFINED_PROVIDERS_MODEL_MAP.items():
|
|
48
|
-
for model_alias, settings in model_alias_map.items():
|
|
49
|
-
model_configs.append(
|
|
50
|
-
ModelConfig(
|
|
51
|
-
alias=f"{provider}-{model_alias}",
|
|
52
|
-
model=settings["model"],
|
|
53
|
-
provider=provider,
|
|
54
|
-
inference_parameters=get_default_inference_parameters(
|
|
55
|
-
model_alias, settings["inference_parameters"]
|
|
56
|
-
),
|
|
57
|
-
)
|
|
58
|
-
)
|
|
59
|
-
return model_configs
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def get_builtin_model_providers() -> list[ModelProvider]:
|
|
63
|
-
return [ModelProvider.model_validate(provider) for provider in PREDEFINED_PROVIDERS]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
def get_default_model_configs() -> list[ModelConfig]:
|
|
67
|
-
if MODEL_CONFIGS_FILE_PATH.exists():
|
|
68
|
-
config_dict = load_config_file(MODEL_CONFIGS_FILE_PATH)
|
|
69
|
-
if "model_configs" in config_dict:
|
|
70
|
-
return [ModelConfig.model_validate(mc) for mc in config_dict["model_configs"]]
|
|
71
|
-
return []
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def get_default_model_providers_missing_api_keys() -> list[str]:
|
|
75
|
-
missing_api_keys = []
|
|
76
|
-
for predefined_provider in PREDEFINED_PROVIDERS:
|
|
77
|
-
if os.environ.get(predefined_provider["api_key"]) is None:
|
|
78
|
-
missing_api_keys.append(predefined_provider["api_key"])
|
|
79
|
-
return missing_api_keys
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def get_default_providers() -> list[ModelProvider]:
|
|
83
|
-
config_dict = _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH)
|
|
84
|
-
if "providers" in config_dict:
|
|
85
|
-
return [ModelProvider.model_validate(p) for p in config_dict["providers"]]
|
|
86
|
-
return []
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def get_default_provider_name() -> str | None:
|
|
90
|
-
return _get_default_providers_file_content(MODEL_PROVIDERS_FILE_PATH).get("default")
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def resolve_seed_default_model_settings() -> None:
|
|
94
|
-
if not MODEL_CONFIGS_FILE_PATH.exists():
|
|
95
|
-
logger.debug(
|
|
96
|
-
f"🍾 Default model configs were not found, so writing the following to {str(MODEL_CONFIGS_FILE_PATH)!r}"
|
|
97
|
-
)
|
|
98
|
-
save_config_file(
|
|
99
|
-
MODEL_CONFIGS_FILE_PATH,
|
|
100
|
-
{"model_configs": [mc.model_dump(mode="json") for mc in get_builtin_model_configs()]},
|
|
101
|
-
)
|
|
102
|
-
|
|
103
|
-
if not MODEL_PROVIDERS_FILE_PATH.exists():
|
|
104
|
-
logger.debug(
|
|
105
|
-
f"🪄 Default model providers were not found, so writing the following to {str(MODEL_PROVIDERS_FILE_PATH)!r}"
|
|
106
|
-
)
|
|
107
|
-
save_config_file(
|
|
108
|
-
MODEL_PROVIDERS_FILE_PATH, {"providers": [p.model_dump(mode="json") for p in get_builtin_model_providers()]}
|
|
109
|
-
)
|
|
110
|
-
|
|
111
|
-
if not MANAGED_ASSETS_PATH.exists():
|
|
112
|
-
logger.debug(f"🏗️ Default managed assets path was not found, so creating it at {str(MANAGED_ASSETS_PATH)!r}")
|
|
113
|
-
MANAGED_ASSETS_PATH.mkdir(parents=True, exist_ok=True)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
@lru_cache(maxsize=1)
|
|
117
|
-
def _get_default_providers_file_content(file_path: Path) -> dict[str, Any]:
|
|
118
|
-
"""Load and cache the default providers file content."""
|
|
119
|
-
if file_path.exists():
|
|
120
|
-
return load_config_file(file_path)
|
|
121
|
-
raise FileNotFoundError(f"Default model providers file not found at {str(file_path)!r}")
|
data_designer/config/errors.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.errors import DataDesignerError
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class BuilderConfigurationError(DataDesignerError): ...
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class BuilderSerializationError(DataDesignerError): ...
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class InvalidColumnTypeError(DataDesignerError): ...
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
class InvalidConfigError(DataDesignerError): ...
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
class InvalidFilePathError(DataDesignerError): ...
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
class InvalidFileFormatError(DataDesignerError): ...
|
data_designer/config/exports.py
DELETED
|
@@ -1,145 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.config.analysis.column_profilers import JudgeScoreProfilerConfig
|
|
7
|
-
from data_designer.config.column_configs import (
|
|
8
|
-
EmbeddingColumnConfig,
|
|
9
|
-
ExpressionColumnConfig,
|
|
10
|
-
LLMCodeColumnConfig,
|
|
11
|
-
LLMJudgeColumnConfig,
|
|
12
|
-
LLMStructuredColumnConfig,
|
|
13
|
-
LLMTextColumnConfig,
|
|
14
|
-
SamplerColumnConfig,
|
|
15
|
-
Score,
|
|
16
|
-
SeedDatasetColumnConfig,
|
|
17
|
-
ValidationColumnConfig,
|
|
18
|
-
)
|
|
19
|
-
from data_designer.config.column_types import DataDesignerColumnType
|
|
20
|
-
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
21
|
-
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
22
|
-
from data_designer.config.dataset_builders import BuildStage
|
|
23
|
-
from data_designer.config.models import (
|
|
24
|
-
ChatCompletionInferenceParams,
|
|
25
|
-
EmbeddingInferenceParams,
|
|
26
|
-
GenerationType,
|
|
27
|
-
ImageContext,
|
|
28
|
-
ImageFormat,
|
|
29
|
-
ManualDistribution,
|
|
30
|
-
ManualDistributionParams,
|
|
31
|
-
Modality,
|
|
32
|
-
ModalityContext,
|
|
33
|
-
ModalityDataType,
|
|
34
|
-
ModelConfig,
|
|
35
|
-
ModelProvider,
|
|
36
|
-
UniformDistribution,
|
|
37
|
-
UniformDistributionParams,
|
|
38
|
-
)
|
|
39
|
-
from data_designer.config.processors import (
|
|
40
|
-
DropColumnsProcessorConfig,
|
|
41
|
-
ProcessorType,
|
|
42
|
-
SchemaTransformProcessorConfig,
|
|
43
|
-
)
|
|
44
|
-
from data_designer.config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
|
|
45
|
-
from data_designer.config.sampler_params import (
|
|
46
|
-
BernoulliMixtureSamplerParams,
|
|
47
|
-
BernoulliSamplerParams,
|
|
48
|
-
BinomialSamplerParams,
|
|
49
|
-
CategorySamplerParams,
|
|
50
|
-
DatetimeSamplerParams,
|
|
51
|
-
GaussianSamplerParams,
|
|
52
|
-
PersonFromFakerSamplerParams,
|
|
53
|
-
PersonSamplerParams,
|
|
54
|
-
PoissonSamplerParams,
|
|
55
|
-
SamplerType,
|
|
56
|
-
ScipySamplerParams,
|
|
57
|
-
SubcategorySamplerParams,
|
|
58
|
-
TimeDeltaSamplerParams,
|
|
59
|
-
UniformSamplerParams,
|
|
60
|
-
UUIDSamplerParams,
|
|
61
|
-
)
|
|
62
|
-
from data_designer.config.seed import (
|
|
63
|
-
IndexRange,
|
|
64
|
-
PartitionBlock,
|
|
65
|
-
SamplingStrategy,
|
|
66
|
-
SeedConfig,
|
|
67
|
-
)
|
|
68
|
-
from data_designer.config.seed_source import (
|
|
69
|
-
DataFrameSeedSource,
|
|
70
|
-
HuggingFaceSeedSource,
|
|
71
|
-
LocalFileSeedSource,
|
|
72
|
-
)
|
|
73
|
-
from data_designer.config.utils.code_lang import CodeLang
|
|
74
|
-
from data_designer.config.utils.info import InfoType
|
|
75
|
-
from data_designer.config.validator_params import (
|
|
76
|
-
CodeValidatorParams,
|
|
77
|
-
RemoteValidatorParams,
|
|
78
|
-
ValidatorType,
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def get_config_exports() -> list[str]:
|
|
83
|
-
return [
|
|
84
|
-
SchemaTransformProcessorConfig.__name__,
|
|
85
|
-
BernoulliMixtureSamplerParams.__name__,
|
|
86
|
-
BernoulliSamplerParams.__name__,
|
|
87
|
-
BinomialSamplerParams.__name__,
|
|
88
|
-
CategorySamplerParams.__name__,
|
|
89
|
-
CodeLang.__name__,
|
|
90
|
-
CodeValidatorParams.__name__,
|
|
91
|
-
ColumnInequalityConstraint.__name__,
|
|
92
|
-
ChatCompletionInferenceParams.__name__,
|
|
93
|
-
DataDesignerColumnType.__name__,
|
|
94
|
-
DataDesignerConfig.__name__,
|
|
95
|
-
DataDesignerConfigBuilder.__name__,
|
|
96
|
-
DataFrameSeedSource.__name__,
|
|
97
|
-
BuildStage.__name__,
|
|
98
|
-
DatetimeSamplerParams.__name__,
|
|
99
|
-
DropColumnsProcessorConfig.__name__,
|
|
100
|
-
EmbeddingColumnConfig.__name__,
|
|
101
|
-
EmbeddingInferenceParams.__name__,
|
|
102
|
-
ExpressionColumnConfig.__name__,
|
|
103
|
-
GaussianSamplerParams.__name__,
|
|
104
|
-
GenerationType.__name__,
|
|
105
|
-
HuggingFaceSeedSource.__name__,
|
|
106
|
-
IndexRange.__name__,
|
|
107
|
-
InfoType.__name__,
|
|
108
|
-
ImageContext.__name__,
|
|
109
|
-
ImageFormat.__name__,
|
|
110
|
-
JudgeScoreProfilerConfig.__name__,
|
|
111
|
-
LLMCodeColumnConfig.__name__,
|
|
112
|
-
LLMJudgeColumnConfig.__name__,
|
|
113
|
-
LLMStructuredColumnConfig.__name__,
|
|
114
|
-
LLMTextColumnConfig.__name__,
|
|
115
|
-
LocalFileSeedSource.__name__,
|
|
116
|
-
ManualDistribution.__name__,
|
|
117
|
-
ManualDistributionParams.__name__,
|
|
118
|
-
Modality.__name__,
|
|
119
|
-
ModalityContext.__name__,
|
|
120
|
-
ModalityDataType.__name__,
|
|
121
|
-
ModelConfig.__name__,
|
|
122
|
-
ModelProvider.__name__,
|
|
123
|
-
PartitionBlock.__name__,
|
|
124
|
-
PersonSamplerParams.__name__,
|
|
125
|
-
PersonFromFakerSamplerParams.__name__,
|
|
126
|
-
PoissonSamplerParams.__name__,
|
|
127
|
-
ProcessorType.__name__,
|
|
128
|
-
RemoteValidatorParams.__name__,
|
|
129
|
-
SamplerColumnConfig.__name__,
|
|
130
|
-
SamplerType.__name__,
|
|
131
|
-
SamplingStrategy.__name__,
|
|
132
|
-
ScalarInequalityConstraint.__name__,
|
|
133
|
-
ScipySamplerParams.__name__,
|
|
134
|
-
Score.__name__,
|
|
135
|
-
SeedConfig.__name__,
|
|
136
|
-
SeedDatasetColumnConfig.__name__,
|
|
137
|
-
SubcategorySamplerParams.__name__,
|
|
138
|
-
TimeDeltaSamplerParams.__name__,
|
|
139
|
-
UniformDistribution.__name__,
|
|
140
|
-
UniformDistributionParams.__name__,
|
|
141
|
-
UniformSamplerParams.__name__,
|
|
142
|
-
UUIDSamplerParams.__name__,
|
|
143
|
-
ValidationColumnConfig.__name__,
|
|
144
|
-
ValidatorType.__name__,
|
|
145
|
-
]
|
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from abc import ABC, abstractmethod
|
|
7
|
-
from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
|
|
8
|
-
|
|
9
|
-
from data_designer.config.models import ModelConfig, ModelProvider
|
|
10
|
-
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
|
|
11
|
-
from data_designer.config.utils.info import InterfaceInfo
|
|
12
|
-
from data_designer.lazy_heavy_imports import pd
|
|
13
|
-
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
import pandas as pd
|
|
16
|
-
|
|
17
|
-
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
18
|
-
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
19
|
-
from data_designer.config.preview_results import PreviewResults
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
class ResultsProtocol(Protocol):
|
|
23
|
-
def load_analysis(self) -> DatasetProfilerResults: ...
|
|
24
|
-
def load_dataset(self) -> pd.DataFrame: ...
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
ResultsT = TypeVar("ResultsT", bound=ResultsProtocol)
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class DataDesignerInterface(ABC, Generic[ResultsT]):
|
|
31
|
-
@abstractmethod
|
|
32
|
-
def create(
|
|
33
|
-
self,
|
|
34
|
-
config_builder: DataDesignerConfigBuilder,
|
|
35
|
-
*,
|
|
36
|
-
num_records: int = DEFAULT_NUM_RECORDS,
|
|
37
|
-
) -> ResultsT: ...
|
|
38
|
-
|
|
39
|
-
@abstractmethod
|
|
40
|
-
def preview(
|
|
41
|
-
self,
|
|
42
|
-
config_builder: DataDesignerConfigBuilder,
|
|
43
|
-
*,
|
|
44
|
-
num_records: int = DEFAULT_NUM_RECORDS,
|
|
45
|
-
) -> PreviewResults: ...
|
|
46
|
-
|
|
47
|
-
@abstractmethod
|
|
48
|
-
def get_default_model_configs(self) -> list[ModelConfig]: ...
|
|
49
|
-
|
|
50
|
-
@abstractmethod
|
|
51
|
-
def get_default_model_providers(self) -> list[ModelProvider]: ...
|
|
52
|
-
|
|
53
|
-
@property
|
|
54
|
-
@abstractmethod
|
|
55
|
-
def info(self) -> InterfaceInfo: ...
|