data-designer 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/__init__.py +1 -1
- data_designer/_version.py +2 -2
- data_designer/cli/__init__.py +1 -1
- data_designer/cli/commands/__init__.py +1 -1
- data_designer/cli/commands/download.py +1 -1
- data_designer/cli/commands/list.py +1 -1
- data_designer/cli/commands/models.py +1 -1
- data_designer/cli/commands/providers.py +1 -1
- data_designer/cli/commands/reset.py +1 -1
- data_designer/cli/controllers/__init__.py +1 -1
- data_designer/cli/controllers/download_controller.py +1 -1
- data_designer/cli/controllers/model_controller.py +1 -1
- data_designer/cli/controllers/provider_controller.py +1 -1
- data_designer/cli/forms/__init__.py +1 -1
- data_designer/cli/forms/builder.py +1 -1
- data_designer/cli/forms/field.py +1 -1
- data_designer/cli/forms/form.py +1 -1
- data_designer/cli/forms/model_builder.py +1 -1
- data_designer/cli/forms/provider_builder.py +1 -1
- data_designer/cli/main.py +1 -1
- data_designer/cli/repositories/__init__.py +1 -1
- data_designer/cli/repositories/base.py +1 -1
- data_designer/cli/repositories/model_repository.py +1 -1
- data_designer/cli/repositories/persona_repository.py +1 -1
- data_designer/cli/repositories/provider_repository.py +1 -1
- data_designer/cli/services/__init__.py +1 -1
- data_designer/cli/services/download_service.py +1 -1
- data_designer/cli/services/model_service.py +1 -1
- data_designer/cli/services/provider_service.py +1 -1
- data_designer/cli/ui.py +1 -1
- data_designer/cli/utils.py +1 -1
- data_designer/config/__init__.py +1 -1
- data_designer/config/analysis/__init__.py +1 -1
- data_designer/config/analysis/column_profilers.py +1 -1
- data_designer/config/analysis/column_statistics.py +1 -1
- data_designer/config/analysis/dataset_profiler.py +1 -1
- data_designer/config/analysis/utils/errors.py +1 -1
- data_designer/config/analysis/utils/reporting.py +1 -1
- data_designer/config/base.py +1 -1
- data_designer/config/column_configs.py +1 -1
- data_designer/config/column_types.py +1 -36
- data_designer/config/config_builder.py +4 -21
- data_designer/config/data_designer_config.py +1 -1
- data_designer/config/dataset_builders.py +1 -1
- data_designer/config/default_model_settings.py +1 -1
- data_designer/config/errors.py +1 -1
- data_designer/config/exports.py +1 -1
- data_designer/config/interface.py +1 -1
- data_designer/config/models.py +1 -1
- data_designer/config/preview_results.py +1 -1
- data_designer/config/processors.py +1 -1
- data_designer/config/run_config.py +1 -1
- data_designer/config/sampler_constraints.py +1 -1
- data_designer/config/sampler_params.py +1 -1
- data_designer/config/seed.py +2 -2
- data_designer/config/seed_source.py +2 -8
- data_designer/config/seed_source_types.py +17 -0
- data_designer/config/utils/code_lang.py +1 -1
- data_designer/config/utils/constants.py +1 -1
- data_designer/config/utils/errors.py +1 -1
- data_designer/config/utils/info.py +1 -1
- data_designer/config/utils/io_helpers.py +1 -1
- data_designer/config/utils/misc.py +1 -1
- data_designer/config/utils/numerical_helpers.py +1 -1
- data_designer/config/utils/type_helpers.py +1 -1
- data_designer/config/utils/visualization.py +1 -1
- data_designer/config/validator_params.py +1 -1
- data_designer/engine/__init__.py +1 -1
- data_designer/engine/analysis/column_profilers/base.py +1 -1
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +1 -3
- data_designer/engine/analysis/column_profilers/registry.py +1 -1
- data_designer/engine/analysis/column_statistics.py +1 -1
- data_designer/engine/analysis/dataset_profiler.py +1 -1
- data_designer/engine/analysis/errors.py +1 -1
- data_designer/engine/analysis/utils/column_statistics_calculations.py +1 -1
- data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
- data_designer/engine/column_generators/__init__.py +1 -1
- data_designer/engine/column_generators/generators/__init__.py +1 -1
- data_designer/engine/column_generators/generators/base.py +26 -14
- data_designer/engine/column_generators/generators/embedding.py +3 -6
- data_designer/engine/column_generators/generators/expression.py +3 -2
- data_designer/engine/column_generators/generators/llm_completion.py +8 -13
- data_designer/engine/column_generators/generators/samplers.py +3 -3
- data_designer/engine/column_generators/generators/seed_dataset.py +4 -3
- data_designer/engine/column_generators/generators/validation.py +3 -2
- data_designer/engine/column_generators/registry.py +1 -1
- data_designer/engine/column_generators/utils/errors.py +1 -1
- data_designer/engine/column_generators/utils/generator_classification.py +41 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +1 -1
- data_designer/engine/column_generators/utils/prompt_renderer.py +1 -1
- data_designer/engine/compiler.py +1 -1
- data_designer/engine/configurable_task.py +3 -13
- data_designer/engine/dataset_builders/artifact_storage.py +1 -1
- data_designer/engine/dataset_builders/column_wise_builder.py +8 -5
- data_designer/engine/dataset_builders/errors.py +1 -1
- data_designer/engine/dataset_builders/multi_column_configs.py +1 -1
- data_designer/engine/dataset_builders/utils/__init__.py +1 -1
- data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
- data_designer/engine/dataset_builders/utils/config_compiler.py +1 -1
- data_designer/engine/dataset_builders/utils/dag.py +3 -2
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
- data_designer/engine/dataset_builders/utils/errors.py +1 -1
- data_designer/engine/errors.py +1 -1
- data_designer/engine/model_provider.py +1 -1
- data_designer/engine/models/__init__.py +1 -1
- data_designer/engine/models/errors.py +1 -1
- data_designer/engine/models/facade.py +1 -1
- data_designer/engine/models/litellm_overrides.py +1 -1
- data_designer/engine/models/parsers/__init__.py +1 -1
- data_designer/engine/models/parsers/errors.py +1 -1
- data_designer/engine/models/parsers/parser.py +1 -1
- data_designer/engine/models/parsers/postprocessors.py +1 -1
- data_designer/engine/models/parsers/tag_parsers.py +1 -1
- data_designer/engine/models/parsers/types.py +1 -1
- data_designer/engine/models/recipes/base.py +1 -1
- data_designer/engine/models/recipes/response_recipes.py +1 -1
- data_designer/engine/models/registry.py +1 -1
- data_designer/engine/models/telemetry.py +1 -1
- data_designer/engine/models/usage.py +1 -1
- data_designer/engine/models/utils.py +1 -1
- data_designer/engine/processing/ginja/__init__.py +1 -1
- data_designer/engine/processing/ginja/ast.py +1 -1
- data_designer/engine/processing/ginja/environment.py +1 -1
- data_designer/engine/processing/ginja/exceptions.py +1 -1
- data_designer/engine/processing/ginja/record.py +1 -1
- data_designer/engine/processing/gsonschema/__init__.py +1 -1
- data_designer/engine/processing/gsonschema/exceptions.py +1 -1
- data_designer/engine/processing/gsonschema/schema_transformers.py +1 -1
- data_designer/engine/processing/gsonschema/types.py +1 -1
- data_designer/engine/processing/gsonschema/validators.py +1 -1
- data_designer/engine/processing/processors/base.py +3 -1
- data_designer/engine/processing/processors/drop_columns.py +1 -2
- data_designer/engine/processing/processors/registry.py +1 -1
- data_designer/engine/processing/processors/schema_transform.py +1 -2
- data_designer/engine/processing/utils.py +1 -1
- data_designer/engine/registry/base.py +1 -1
- data_designer/engine/registry/data_designer_registry.py +1 -1
- data_designer/engine/registry/errors.py +1 -1
- data_designer/engine/resources/managed_dataset_generator.py +1 -1
- data_designer/engine/resources/managed_dataset_repository.py +1 -1
- data_designer/engine/resources/managed_storage.py +1 -1
- data_designer/engine/resources/resource_provider.py +1 -1
- data_designer/engine/resources/seed_reader.py +1 -1
- data_designer/engine/sampling_gen/column.py +1 -1
- data_designer/engine/sampling_gen/constraints.py +1 -1
- data_designer/engine/sampling_gen/data_sources/base.py +1 -1
- data_designer/engine/sampling_gen/data_sources/errors.py +1 -1
- data_designer/engine/sampling_gen/data_sources/sources.py +1 -1
- data_designer/engine/sampling_gen/entities/__init__.py +1 -1
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +1 -1
- data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/errors.py +1 -1
- data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/person.py +1 -1
- data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
- data_designer/engine/sampling_gen/errors.py +1 -1
- data_designer/engine/sampling_gen/generator.py +1 -1
- data_designer/engine/sampling_gen/jinja_utils.py +1 -1
- data_designer/engine/sampling_gen/people_gen.py +1 -1
- data_designer/engine/sampling_gen/person_constants.py +1 -1
- data_designer/engine/sampling_gen/schema.py +1 -1
- data_designer/engine/sampling_gen/schema_builder.py +1 -1
- data_designer/engine/sampling_gen/utils.py +1 -1
- data_designer/engine/secret_resolver.py +1 -1
- data_designer/engine/validation.py +3 -2
- data_designer/engine/validators/__init__.py +1 -1
- data_designer/engine/validators/base.py +1 -1
- data_designer/engine/validators/local_callable.py +1 -1
- data_designer/engine/validators/python.py +1 -1
- data_designer/engine/validators/remote.py +1 -1
- data_designer/engine/validators/sql.py +1 -1
- data_designer/errors.py +1 -1
- data_designer/essentials/__init__.py +1 -1
- data_designer/interface/__init__.py +1 -1
- data_designer/interface/data_designer.py +5 -1
- data_designer/interface/errors.py +1 -1
- data_designer/interface/results.py +1 -1
- data_designer/logging.py +1 -1
- data_designer/plugin_manager.py +14 -7
- data_designer/plugins/__init__.py +1 -1
- data_designer/plugins/errors.py +1 -1
- data_designer/plugins/plugin.py +4 -1
- data_designer/plugins/registry.py +1 -1
- data_designer/plugins/testing/__init__.py +1 -1
- data_designer/plugins/testing/stubs.py +1 -8
- data_designer/plugins/testing/utils.py +10 -3
- {data_designer-0.3.1.dist-info → data_designer-0.3.2.dist-info}/METADATA +2 -2
- data_designer-0.3.2.dist-info/RECORD +193 -0
- data_designer-0.3.1.dist-info/RECORD +0 -191
- {data_designer-0.3.1.dist-info → data_designer-0.3.2.dist-info}/WHEEL +0 -0
- {data_designer-0.3.1.dist-info → data_designer-0.3.2.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.1.dist-info → data_designer-0.3.2.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -31,7 +31,6 @@ from data_designer.engine.analysis.utils.judge_score_processing import (
|
|
|
31
31
|
)
|
|
32
32
|
from data_designer.engine.models.facade import ModelFacade
|
|
33
33
|
from data_designer.engine.models.recipes.response_recipes import TextResponseRecipe
|
|
34
|
-
from data_designer.engine.resources.resource_provider import ResourceType
|
|
35
34
|
|
|
36
35
|
logger = logging.getLogger(__name__)
|
|
37
36
|
|
|
@@ -42,7 +41,6 @@ class JudgeScoreProfiler(ColumnProfiler[JudgeScoreProfilerConfig]):
|
|
|
42
41
|
return ColumnProfilerMetadata(
|
|
43
42
|
name="judge_score_profiler",
|
|
44
43
|
description="Analyzes LLM-as-judge score distributions in a Data Designer dataset.",
|
|
45
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
46
44
|
applicable_column_types=[DataDesignerColumnType.LLM_JUDGE],
|
|
47
45
|
)
|
|
48
46
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.config.analysis.column_profilers import ColumnProfilerType
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.errors import DataDesignerError
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -16,6 +16,7 @@ from data_designer.engine.configurable_task import ConfigurableTask, Configurabl
|
|
|
16
16
|
if TYPE_CHECKING:
|
|
17
17
|
from data_designer.config.models import BaseInferenceParams, ModelConfig
|
|
18
18
|
from data_designer.engine.models.facade import ModelFacade
|
|
19
|
+
from data_designer.engine.models.registry import ModelRegistry
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
logger = logging.getLogger(__name__)
|
|
@@ -72,27 +73,38 @@ class FromScratchColumnGenerator(ColumnGenerator[TaskConfigT], ABC):
|
|
|
72
73
|
def generate_from_scratch(self, num_records: int) -> pd.DataFrame: ...
|
|
73
74
|
|
|
74
75
|
|
|
75
|
-
class
|
|
76
|
+
class ColumnGeneratorWithModelRegistry(ColumnGenerator[TaskConfigT], ABC):
|
|
77
|
+
@property
|
|
78
|
+
def model_registry(self) -> ModelRegistry:
|
|
79
|
+
return self.resource_provider.model_registry
|
|
80
|
+
|
|
81
|
+
def get_model(self, model_alias: str) -> ModelFacade:
|
|
82
|
+
return self.model_registry.get_model(model_alias=model_alias)
|
|
83
|
+
|
|
84
|
+
def get_model_config(self, model_alias: str) -> ModelConfig:
|
|
85
|
+
return self.model_registry.get_model_config(model_alias=model_alias)
|
|
86
|
+
|
|
87
|
+
def get_model_provider_name(self, model_alias: str) -> str:
|
|
88
|
+
provider = self.model_registry.get_model_provider(model_alias=model_alias)
|
|
89
|
+
return provider.name
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
class ColumnGeneratorWithModel(ColumnGeneratorWithModelRegistry[TaskConfigT], ABC):
|
|
76
93
|
@functools.cached_property
|
|
77
94
|
def model(self) -> ModelFacade:
|
|
78
|
-
return self.
|
|
95
|
+
return self.get_model(model_alias=self.config.model_alias)
|
|
79
96
|
|
|
80
97
|
@functools.cached_property
|
|
81
98
|
def model_config(self) -> ModelConfig:
|
|
82
|
-
return self.
|
|
99
|
+
return self.get_model_config(model_alias=self.config.model_alias)
|
|
83
100
|
|
|
84
101
|
@functools.cached_property
|
|
85
102
|
def inference_parameters(self) -> BaseInferenceParams:
|
|
86
103
|
return self.model_config.inference_parameters
|
|
87
104
|
|
|
88
105
|
def log_pre_generation(self) -> None:
|
|
89
|
-
logger.info(f"
|
|
90
|
-
logger.info(f" |--
|
|
91
|
-
logger.info(f" |-- model
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
def _get_provider_name(self) -> str:
|
|
96
|
-
model_alias = self.model_config.alias
|
|
97
|
-
provider = self.resource_provider.model_registry.get_model_provider(model_alias=model_alias)
|
|
98
|
-
return provider.name
|
|
106
|
+
logger.info(f"{self.config.column_type} model configuration for generating column '{self.config.name}'")
|
|
107
|
+
logger.info(f" |-- model: {self.model_config.model!r}")
|
|
108
|
+
logger.info(f" |-- model alias: {self.config.model_alias!r}")
|
|
109
|
+
logger.info(f" |-- model provider: {self.get_model_provider_name(model_alias=self.config.model_alias)!r}")
|
|
110
|
+
logger.info(f" |-- inference parameters: {self.inference_parameters.format_for_display()}")
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
|
|
@@ -6,13 +6,11 @@ from pydantic import BaseModel, computed_field
|
|
|
6
6
|
|
|
7
7
|
from data_designer.config.column_configs import EmbeddingColumnConfig
|
|
8
8
|
from data_designer.engine.column_generators.generators.base import (
|
|
9
|
-
|
|
9
|
+
ColumnGeneratorWithModel,
|
|
10
10
|
GenerationStrategy,
|
|
11
11
|
GeneratorMetadata,
|
|
12
|
-
WithModelGeneration,
|
|
13
12
|
)
|
|
14
13
|
from data_designer.engine.processing.utils import deserialize_json_values, parse_list_string
|
|
15
|
-
from data_designer.engine.resources.resource_provider import ResourceType
|
|
16
14
|
|
|
17
15
|
|
|
18
16
|
class EmbeddingGenerationResult(BaseModel):
|
|
@@ -27,14 +25,13 @@ class EmbeddingGenerationResult(BaseModel):
|
|
|
27
25
|
return len(self.embeddings[0]) if len(self.embeddings) > 0 else 0
|
|
28
26
|
|
|
29
27
|
|
|
30
|
-
class EmbeddingCellGenerator(
|
|
28
|
+
class EmbeddingCellGenerator(ColumnGeneratorWithModel[EmbeddingColumnConfig]):
|
|
31
29
|
@staticmethod
|
|
32
30
|
def metadata() -> GeneratorMetadata:
|
|
33
31
|
return GeneratorMetadata(
|
|
34
32
|
name="embedding_cell_generator",
|
|
35
33
|
description="Generate embeddings for a text column.",
|
|
36
34
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
37
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
38
35
|
)
|
|
39
36
|
|
|
40
37
|
def generate(self, data: dict) -> dict:
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
@@ -25,7 +27,6 @@ class ExpressionColumnGenerator(WithJinja2UserTemplateRendering, ColumnGenerator
|
|
|
25
27
|
name="expression_generator",
|
|
26
28
|
description="Generate a column from a jinja2 expression.",
|
|
27
29
|
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
28
|
-
required_resources=None,
|
|
29
30
|
)
|
|
30
31
|
|
|
31
32
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import functools
|
|
@@ -12,19 +12,18 @@ from data_designer.config.column_configs import (
|
|
|
12
12
|
)
|
|
13
13
|
from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
14
14
|
from data_designer.engine.column_generators.generators.base import (
|
|
15
|
-
|
|
15
|
+
ColumnGeneratorWithModel,
|
|
16
16
|
GenerationStrategy,
|
|
17
17
|
GeneratorMetadata,
|
|
18
|
-
WithModelGeneration,
|
|
19
18
|
)
|
|
20
19
|
from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|
21
20
|
PromptType,
|
|
22
21
|
RecordBasedPromptRenderer,
|
|
23
22
|
create_response_recipe,
|
|
24
23
|
)
|
|
24
|
+
from data_designer.engine.configurable_task import TaskConfigT
|
|
25
25
|
from data_designer.engine.models.recipes.base import ResponseRecipe
|
|
26
26
|
from data_designer.engine.processing.utils import deserialize_json_values
|
|
27
|
-
from data_designer.engine.resources.resource_provider import ResourceType
|
|
28
27
|
|
|
29
28
|
logger = logging.getLogger(__name__)
|
|
30
29
|
|
|
@@ -33,7 +32,7 @@ DEFAULT_MAX_CONVERSATION_RESTARTS = 5
|
|
|
33
32
|
DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0
|
|
34
33
|
|
|
35
34
|
|
|
36
|
-
class
|
|
35
|
+
class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfigT]):
|
|
37
36
|
@functools.cached_property
|
|
38
37
|
def response_recipe(self) -> ResponseRecipe:
|
|
39
38
|
return create_response_recipe(self.config, self.model_config)
|
|
@@ -92,47 +91,43 @@ class WithChatCompletionGeneration(WithModelGeneration):
|
|
|
92
91
|
return data
|
|
93
92
|
|
|
94
93
|
|
|
95
|
-
class LLMTextCellGenerator(
|
|
94
|
+
class LLMTextCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMTextColumnConfig]):
|
|
96
95
|
@staticmethod
|
|
97
96
|
def metadata() -> GeneratorMetadata:
|
|
98
97
|
return GeneratorMetadata(
|
|
99
98
|
name="llm_text_generator",
|
|
100
99
|
description="Generate a new dataset cell from a prompt template",
|
|
101
100
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
102
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
103
101
|
)
|
|
104
102
|
|
|
105
103
|
|
|
106
|
-
class LLMCodeCellGenerator(
|
|
104
|
+
class LLMCodeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMCodeColumnConfig]):
|
|
107
105
|
@staticmethod
|
|
108
106
|
def metadata() -> GeneratorMetadata:
|
|
109
107
|
return GeneratorMetadata(
|
|
110
108
|
name="llm_code_generator",
|
|
111
109
|
description="Generate a new dataset cell from a prompt template",
|
|
112
110
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
113
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
114
111
|
)
|
|
115
112
|
|
|
116
113
|
|
|
117
|
-
class LLMStructuredCellGenerator(
|
|
114
|
+
class LLMStructuredCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMStructuredColumnConfig]):
|
|
118
115
|
@staticmethod
|
|
119
116
|
def metadata() -> GeneratorMetadata:
|
|
120
117
|
return GeneratorMetadata(
|
|
121
118
|
name="llm_structured_generator",
|
|
122
119
|
description="Generate a new dataset cell from a prompt template",
|
|
123
120
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
124
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
125
121
|
)
|
|
126
122
|
|
|
127
123
|
|
|
128
|
-
class LLMJudgeCellGenerator(
|
|
124
|
+
class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColumnConfig]):
|
|
129
125
|
@staticmethod
|
|
130
126
|
def metadata() -> GeneratorMetadata:
|
|
131
127
|
return GeneratorMetadata(
|
|
132
128
|
name="llm_judge_generator",
|
|
133
129
|
description="Judge a new dataset cell based on a set of rubrics",
|
|
134
130
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
135
|
-
required_resources=[ResourceType.MODEL_REGISTRY],
|
|
136
131
|
)
|
|
137
132
|
|
|
138
133
|
@property
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
import random
|
|
6
8
|
from functools import partial
|
|
@@ -17,7 +19,6 @@ from data_designer.engine.column_generators.generators.base import (
|
|
|
17
19
|
from data_designer.engine.dataset_builders.multi_column_configs import SamplerMultiColumnConfig
|
|
18
20
|
from data_designer.engine.processing.utils import concat_datasets
|
|
19
21
|
from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
|
|
20
|
-
from data_designer.engine.resources.resource_provider import ResourceType
|
|
21
22
|
from data_designer.engine.sampling_gen.data_sources.sources import SamplerType
|
|
22
23
|
from data_designer.engine.sampling_gen.entities.person import load_person_data_sampler
|
|
23
24
|
from data_designer.engine.sampling_gen.generator import DatasetGenerator as SamplingDatasetGenerator
|
|
@@ -32,7 +33,6 @@ class SamplerColumnGenerator(FromScratchColumnGenerator[SamplerMultiColumnConfig
|
|
|
32
33
|
name="sampler_column_generator",
|
|
33
34
|
description="Generate columns using sampling-based method.",
|
|
34
35
|
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
35
|
-
required_resources=[ResourceType.BLOB_STORAGE],
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
4
7
|
import functools
|
|
5
8
|
import logging
|
|
6
9
|
|
|
@@ -16,7 +19,6 @@ from data_designer.engine.column_generators.generators.base import (
|
|
|
16
19
|
from data_designer.engine.column_generators.utils.errors import SeedDatasetError
|
|
17
20
|
from data_designer.engine.dataset_builders.multi_column_configs import SeedDatasetMultiColumnConfig
|
|
18
21
|
from data_designer.engine.processing.utils import concat_datasets
|
|
19
|
-
from data_designer.engine.resources.resource_provider import ResourceType
|
|
20
22
|
|
|
21
23
|
MAX_ZERO_RECORD_RESPONSE_FACTOR = 2
|
|
22
24
|
|
|
@@ -30,7 +32,6 @@ class SeedDatasetColumnGenerator(FromScratchColumnGenerator[SeedDatasetMultiColu
|
|
|
30
32
|
name="seed_dataset_column_generator",
|
|
31
33
|
description="Sample columns from a seed dataset.",
|
|
32
34
|
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
33
|
-
required_resources=[ResourceType.SEED_READER],
|
|
34
35
|
)
|
|
35
36
|
|
|
36
37
|
@property
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
|
|
6
8
|
import pandas as pd
|
|
@@ -50,7 +52,6 @@ class ValidationColumnGenerator(ColumnGenerator[ValidationColumnConfig]):
|
|
|
50
52
|
name="validate",
|
|
51
53
|
description="Validate data.",
|
|
52
54
|
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
53
|
-
required_resources=None,
|
|
54
55
|
)
|
|
55
56
|
|
|
56
57
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.config.base import ConfigBase
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.engine.errors import DataDesignerError
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from data_designer.config.column_types import DataDesignerColumnType
|
|
5
|
+
from data_designer.config.utils.type_helpers import resolve_string_enum
|
|
6
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
|
|
7
|
+
from data_designer.plugin_manager import PluginManager
|
|
8
|
+
|
|
9
|
+
plugin_manager = PluginManager()
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def column_type_used_in_execution_dag(column_type: str | DataDesignerColumnType) -> bool:
|
|
13
|
+
"""Return True if the column type is used in the workflow execution DAG."""
|
|
14
|
+
column_type = resolve_string_enum(column_type, DataDesignerColumnType)
|
|
15
|
+
dag_column_types = {
|
|
16
|
+
DataDesignerColumnType.EXPRESSION,
|
|
17
|
+
DataDesignerColumnType.LLM_CODE,
|
|
18
|
+
DataDesignerColumnType.LLM_JUDGE,
|
|
19
|
+
DataDesignerColumnType.LLM_STRUCTURED,
|
|
20
|
+
DataDesignerColumnType.LLM_TEXT,
|
|
21
|
+
DataDesignerColumnType.VALIDATION,
|
|
22
|
+
DataDesignerColumnType.EMBEDDING,
|
|
23
|
+
}
|
|
24
|
+
dag_column_types.update(plugin_manager.get_plugin_column_types(DataDesignerColumnType))
|
|
25
|
+
return column_type in dag_column_types
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def column_type_is_model_generated(column_type: str | DataDesignerColumnType) -> bool:
|
|
29
|
+
"""Return True if the column type is a model-generated column."""
|
|
30
|
+
column_type = resolve_string_enum(column_type, DataDesignerColumnType)
|
|
31
|
+
model_generated_column_types = {
|
|
32
|
+
DataDesignerColumnType.LLM_TEXT,
|
|
33
|
+
DataDesignerColumnType.LLM_CODE,
|
|
34
|
+
DataDesignerColumnType.LLM_STRUCTURED,
|
|
35
|
+
DataDesignerColumnType.LLM_JUDGE,
|
|
36
|
+
DataDesignerColumnType.EMBEDDING,
|
|
37
|
+
}
|
|
38
|
+
for plugin in plugin_manager.get_column_generator_plugins():
|
|
39
|
+
if issubclass(plugin.impl_cls, ColumnGeneratorWithModelRegistry):
|
|
40
|
+
model_generated_column_types.add(plugin.name)
|
|
41
|
+
return column_type in model_generated_column_types
|
data_designer/engine/compiler.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
@@ -9,7 +9,7 @@ import pandas as pd
|
|
|
9
9
|
|
|
10
10
|
from data_designer.config.base import ConfigBase
|
|
11
11
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
12
|
-
from data_designer.engine.resources.resource_provider import ResourceProvider
|
|
12
|
+
from data_designer.engine.resources.resource_provider import ResourceProvider
|
|
13
13
|
|
|
14
14
|
DataT = TypeVar("DataT", dict, pd.DataFrame)
|
|
15
15
|
TaskConfigT = TypeVar("ConfigT", bound=ConfigBase)
|
|
@@ -18,14 +18,12 @@ TaskConfigT = TypeVar("ConfigT", bound=ConfigBase)
|
|
|
18
18
|
class ConfigurableTaskMetadata(ConfigBase):
|
|
19
19
|
name: str
|
|
20
20
|
description: str
|
|
21
|
-
required_resources: list[ResourceType] | None
|
|
22
21
|
|
|
23
22
|
|
|
24
23
|
class ConfigurableTask(ABC, Generic[TaskConfigT]):
|
|
25
|
-
def __init__(self, config: TaskConfigT,
|
|
24
|
+
def __init__(self, config: TaskConfigT, resource_provider: ResourceProvider):
|
|
26
25
|
self._config = self.get_config_type().model_validate(config)
|
|
27
26
|
self._resource_provider = resource_provider
|
|
28
|
-
self._validate_resources()
|
|
29
27
|
self._validate()
|
|
30
28
|
self._initialize()
|
|
31
29
|
|
|
@@ -61,8 +59,6 @@ class ConfigurableTask(ABC, Generic[TaskConfigT]):
|
|
|
61
59
|
|
|
62
60
|
@property
|
|
63
61
|
def resource_provider(self) -> ResourceProvider:
|
|
64
|
-
if self._resource_provider is None:
|
|
65
|
-
raise ValueError(f"No resource provider provided for the `{self.metadata().name}` task.")
|
|
66
62
|
return self._resource_provider
|
|
67
63
|
|
|
68
64
|
@staticmethod
|
|
@@ -74,9 +70,3 @@ class ConfigurableTask(ABC, Generic[TaskConfigT]):
|
|
|
74
70
|
|
|
75
71
|
def _validate(self) -> None:
|
|
76
72
|
"""An internal method for custom validation logic, which will be called in the constructor."""
|
|
77
|
-
|
|
78
|
-
def _validate_resources(self) -> None:
|
|
79
|
-
for resource in self.metadata().required_resources or []:
|
|
80
|
-
if resource is not None:
|
|
81
|
-
if getattr(self.resource_provider, ResourceType(resource).value) is None:
|
|
82
|
-
raise ValueError(f"Resource {resource} is required for the `{self.metadata().name}`")
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
3
4
|
from __future__ import annotations
|
|
4
5
|
|
|
5
6
|
import functools
|
|
@@ -13,7 +14,7 @@ from typing import TYPE_CHECKING, Callable
|
|
|
13
14
|
|
|
14
15
|
import pandas as pd
|
|
15
16
|
|
|
16
|
-
from data_designer.config.column_types import ColumnConfigT
|
|
17
|
+
from data_designer.config.column_types import ColumnConfigT
|
|
17
18
|
from data_designer.config.dataset_builders import BuildStage
|
|
18
19
|
from data_designer.config.processors import (
|
|
19
20
|
DropColumnsProcessorConfig,
|
|
@@ -22,9 +23,10 @@ from data_designer.config.processors import (
|
|
|
22
23
|
)
|
|
23
24
|
from data_designer.engine.column_generators.generators.base import (
|
|
24
25
|
ColumnGenerator,
|
|
26
|
+
ColumnGeneratorWithModel,
|
|
25
27
|
GenerationStrategy,
|
|
26
|
-
WithModelGeneration,
|
|
27
28
|
)
|
|
29
|
+
from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated
|
|
28
30
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
29
31
|
from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError
|
|
30
32
|
from data_designer.engine.dataset_builders.multi_column_configs import (
|
|
@@ -45,6 +47,7 @@ from data_designer.engine.registry.data_designer_registry import DataDesignerReg
|
|
|
45
47
|
from data_designer.engine.resources.resource_provider import ResourceProvider
|
|
46
48
|
|
|
47
49
|
if TYPE_CHECKING:
|
|
50
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
|
|
48
51
|
from data_designer.engine.models.usage import ModelUsageStats
|
|
49
52
|
|
|
50
53
|
logger = logging.getLogger(__name__)
|
|
@@ -192,7 +195,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
192
195
|
|
|
193
196
|
def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None:
|
|
194
197
|
max_workers = MAX_CONCURRENCY_PER_NON_LLM_GENERATOR
|
|
195
|
-
if isinstance(generator,
|
|
198
|
+
if isinstance(generator, ColumnGeneratorWithModel):
|
|
196
199
|
max_workers = generator.inference_parameters.max_parallel_requests
|
|
197
200
|
self._fan_out_with_threads(generator, max_workers=max_workers)
|
|
198
201
|
|
|
@@ -206,7 +209,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
206
209
|
list(set(config.model_alias for config in self.llm_generated_column_configs))
|
|
207
210
|
)
|
|
208
211
|
|
|
209
|
-
def _fan_out_with_threads(self, generator:
|
|
212
|
+
def _fan_out_with_threads(self, generator: ColumnGeneratorWithModelRegistry, max_workers: int) -> None:
|
|
210
213
|
if generator.generation_strategy != GenerationStrategy.CELL_BY_CELL:
|
|
211
214
|
raise DatasetGenerationError(
|
|
212
215
|
f"Generator {generator.metadata().name} is not a {GenerationStrategy.CELL_BY_CELL} "
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.engine.errors import DataDesignerError
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.config.column_types import DataDesignerColumnType
|
|
@@ -1,11 +1,12 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import logging
|
|
5
5
|
|
|
6
6
|
import networkx as nx
|
|
7
7
|
|
|
8
|
-
from data_designer.config.column_types import ColumnConfigT
|
|
8
|
+
from data_designer.config.column_types import ColumnConfigT
|
|
9
|
+
from data_designer.engine.column_generators.utils.generator_classification import column_type_used_in_execution_dag
|
|
9
10
|
from data_designer.engine.dataset_builders.utils.errors import DAGCircularDependencyError
|
|
10
11
|
|
|
11
12
|
logger = logging.getLogger(__name__)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.engine.errors import DataDesignerError
|
data_designer/engine/errors.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel, Field
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from functools import cached_property
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|