data-designer 0.3.0__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/__init__.py +1 -1
- data_designer/_version.py +2 -2
- data_designer/cli/__init__.py +1 -1
- data_designer/cli/commands/__init__.py +1 -1
- data_designer/cli/commands/download.py +1 -1
- data_designer/cli/commands/list.py +1 -1
- data_designer/cli/commands/models.py +1 -1
- data_designer/cli/commands/providers.py +1 -1
- data_designer/cli/commands/reset.py +1 -1
- data_designer/cli/controllers/__init__.py +1 -1
- data_designer/cli/controllers/download_controller.py +1 -1
- data_designer/cli/controllers/model_controller.py +1 -1
- data_designer/cli/controllers/provider_controller.py +1 -1
- data_designer/cli/forms/__init__.py +1 -1
- data_designer/cli/forms/builder.py +1 -1
- data_designer/cli/forms/field.py +1 -1
- data_designer/cli/forms/form.py +1 -1
- data_designer/cli/forms/model_builder.py +1 -1
- data_designer/cli/forms/provider_builder.py +1 -1
- data_designer/cli/main.py +1 -1
- data_designer/cli/repositories/__init__.py +1 -1
- data_designer/cli/repositories/base.py +1 -1
- data_designer/cli/repositories/model_repository.py +1 -1
- data_designer/cli/repositories/persona_repository.py +1 -1
- data_designer/cli/repositories/provider_repository.py +1 -1
- data_designer/cli/services/__init__.py +1 -1
- data_designer/cli/services/download_service.py +1 -1
- data_designer/cli/services/model_service.py +1 -1
- data_designer/cli/services/provider_service.py +1 -1
- data_designer/cli/ui.py +1 -1
- data_designer/cli/utils.py +1 -1
- data_designer/config/__init__.py +1 -1
- data_designer/config/analysis/__init__.py +1 -1
- data_designer/config/analysis/column_profilers.py +1 -1
- data_designer/config/analysis/column_statistics.py +1 -1
- data_designer/config/analysis/dataset_profiler.py +1 -1
- data_designer/config/analysis/utils/errors.py +1 -1
- data_designer/config/analysis/utils/reporting.py +1 -1
- data_designer/config/base.py +1 -1
- data_designer/config/column_configs.py +1 -1
- data_designer/config/column_types.py +1 -36
- data_designer/config/config_builder.py +4 -21
- data_designer/config/data_designer_config.py +1 -1
- data_designer/config/dataset_builders.py +1 -1
- data_designer/config/default_model_settings.py +1 -1
- data_designer/config/errors.py +1 -1
- data_designer/config/exports.py +1 -1
- data_designer/config/interface.py +1 -1
- data_designer/config/models.py +1 -1
- data_designer/config/preview_results.py +1 -1
- data_designer/config/processors.py +1 -1
- data_designer/config/run_config.py +1 -1
- data_designer/config/sampler_constraints.py +1 -1
- data_designer/config/sampler_params.py +1 -1
- data_designer/config/seed.py +2 -2
- data_designer/config/seed_source.py +15 -10
- data_designer/config/seed_source_types.py +17 -0
- data_designer/config/utils/code_lang.py +1 -1
- data_designer/config/utils/constants.py +1 -1
- data_designer/config/utils/errors.py +1 -1
- data_designer/config/utils/info.py +1 -1
- data_designer/config/utils/io_helpers.py +1 -1
- data_designer/config/utils/misc.py +1 -1
- data_designer/config/utils/numerical_helpers.py +1 -1
- data_designer/config/utils/type_helpers.py +1 -1
- data_designer/config/utils/visualization.py +1 -1
- data_designer/config/validator_params.py +1 -1
- data_designer/engine/__init__.py +1 -1
- data_designer/engine/analysis/column_profilers/base.py +1 -1
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +1 -3
- data_designer/engine/analysis/column_profilers/registry.py +1 -1
- data_designer/engine/analysis/column_statistics.py +1 -1
- data_designer/engine/analysis/dataset_profiler.py +1 -1
- data_designer/engine/analysis/errors.py +1 -1
- data_designer/engine/analysis/utils/column_statistics_calculations.py +1 -1
- data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
- data_designer/engine/column_generators/__init__.py +1 -1
- data_designer/engine/column_generators/generators/__init__.py +1 -1
- data_designer/engine/column_generators/generators/base.py +26 -14
- data_designer/engine/column_generators/generators/embedding.py +3 -6
- data_designer/engine/column_generators/generators/expression.py +3 -2
- data_designer/engine/column_generators/generators/llm_completion.py +8 -13
- data_designer/engine/column_generators/generators/samplers.py +3 -3
- data_designer/engine/column_generators/generators/seed_dataset.py +4 -3
- data_designer/engine/column_generators/generators/validation.py +3 -2
- data_designer/engine/column_generators/registry.py +1 -1
- data_designer/engine/column_generators/utils/errors.py +1 -1
- data_designer/engine/column_generators/utils/generator_classification.py +41 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +1 -1
- data_designer/engine/column_generators/utils/prompt_renderer.py +1 -1
- data_designer/engine/compiler.py +1 -1
- data_designer/engine/configurable_task.py +3 -13
- data_designer/engine/dataset_builders/artifact_storage.py +1 -1
- data_designer/engine/dataset_builders/column_wise_builder.py +8 -5
- data_designer/engine/dataset_builders/errors.py +1 -1
- data_designer/engine/dataset_builders/multi_column_configs.py +1 -1
- data_designer/engine/dataset_builders/utils/__init__.py +1 -1
- data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
- data_designer/engine/dataset_builders/utils/config_compiler.py +1 -1
- data_designer/engine/dataset_builders/utils/dag.py +3 -2
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
- data_designer/engine/dataset_builders/utils/errors.py +1 -1
- data_designer/engine/errors.py +1 -1
- data_designer/engine/model_provider.py +1 -1
- data_designer/engine/models/__init__.py +1 -1
- data_designer/engine/models/errors.py +1 -1
- data_designer/engine/models/facade.py +1 -1
- data_designer/engine/models/litellm_overrides.py +1 -1
- data_designer/engine/models/parsers/__init__.py +1 -1
- data_designer/engine/models/parsers/errors.py +1 -1
- data_designer/engine/models/parsers/parser.py +1 -1
- data_designer/engine/models/parsers/postprocessors.py +1 -1
- data_designer/engine/models/parsers/tag_parsers.py +1 -1
- data_designer/engine/models/parsers/types.py +1 -1
- data_designer/engine/models/recipes/base.py +1 -1
- data_designer/engine/models/recipes/response_recipes.py +1 -1
- data_designer/engine/models/registry.py +1 -1
- data_designer/engine/models/telemetry.py +1 -1
- data_designer/engine/models/usage.py +1 -1
- data_designer/engine/models/utils.py +1 -1
- data_designer/engine/processing/ginja/__init__.py +1 -1
- data_designer/engine/processing/ginja/ast.py +1 -1
- data_designer/engine/processing/ginja/environment.py +1 -1
- data_designer/engine/processing/ginja/exceptions.py +1 -1
- data_designer/engine/processing/ginja/record.py +1 -1
- data_designer/engine/processing/gsonschema/__init__.py +1 -1
- data_designer/engine/processing/gsonschema/exceptions.py +1 -1
- data_designer/engine/processing/gsonschema/schema_transformers.py +1 -1
- data_designer/engine/processing/gsonschema/types.py +1 -1
- data_designer/engine/processing/gsonschema/validators.py +1 -1
- data_designer/engine/processing/processors/base.py +3 -1
- data_designer/engine/processing/processors/drop_columns.py +1 -2
- data_designer/engine/processing/processors/registry.py +1 -1
- data_designer/engine/processing/processors/schema_transform.py +1 -2
- data_designer/engine/processing/utils.py +1 -1
- data_designer/engine/registry/base.py +1 -1
- data_designer/engine/registry/data_designer_registry.py +1 -1
- data_designer/engine/registry/errors.py +1 -1
- data_designer/engine/resources/managed_dataset_generator.py +1 -1
- data_designer/engine/resources/managed_dataset_repository.py +1 -1
- data_designer/engine/resources/managed_storage.py +1 -1
- data_designer/engine/resources/resource_provider.py +1 -1
- data_designer/engine/resources/seed_reader.py +1 -1
- data_designer/engine/sampling_gen/column.py +1 -1
- data_designer/engine/sampling_gen/constraints.py +1 -1
- data_designer/engine/sampling_gen/data_sources/base.py +1 -1
- data_designer/engine/sampling_gen/data_sources/errors.py +1 -1
- data_designer/engine/sampling_gen/data_sources/sources.py +1 -1
- data_designer/engine/sampling_gen/entities/__init__.py +1 -1
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +1 -1
- data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/errors.py +1 -1
- data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/person.py +1 -1
- data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
- data_designer/engine/sampling_gen/errors.py +1 -1
- data_designer/engine/sampling_gen/generator.py +1 -1
- data_designer/engine/sampling_gen/jinja_utils.py +1 -1
- data_designer/engine/sampling_gen/people_gen.py +1 -1
- data_designer/engine/sampling_gen/person_constants.py +1 -1
- data_designer/engine/sampling_gen/schema.py +1 -1
- data_designer/engine/sampling_gen/schema_builder.py +1 -1
- data_designer/engine/sampling_gen/utils.py +1 -1
- data_designer/engine/secret_resolver.py +1 -1
- data_designer/engine/validation.py +3 -2
- data_designer/engine/validators/__init__.py +1 -1
- data_designer/engine/validators/base.py +1 -1
- data_designer/engine/validators/local_callable.py +1 -1
- data_designer/engine/validators/python.py +1 -1
- data_designer/engine/validators/remote.py +1 -1
- data_designer/engine/validators/sql.py +1 -1
- data_designer/errors.py +1 -1
- data_designer/essentials/__init__.py +1 -1
- data_designer/interface/__init__.py +1 -1
- data_designer/interface/data_designer.py +5 -1
- data_designer/interface/errors.py +1 -1
- data_designer/interface/results.py +1 -1
- data_designer/logging.py +1 -1
- data_designer/plugin_manager.py +14 -7
- data_designer/plugins/__init__.py +1 -1
- data_designer/plugins/errors.py +1 -1
- data_designer/plugins/plugin.py +4 -1
- data_designer/plugins/registry.py +1 -1
- data_designer/plugins/testing/__init__.py +1 -1
- data_designer/plugins/testing/stubs.py +1 -8
- data_designer/plugins/testing/utils.py +10 -3
- {data_designer-0.3.0.dist-info → data_designer-0.3.2.dist-info}/METADATA +2 -2
- data_designer-0.3.2.dist-info/RECORD +193 -0
- data_designer-0.3.0.dist-info/RECORD +0 -191
- {data_designer-0.3.0.dist-info → data_designer-0.3.2.dist-info}/WHEEL +0 -0
- {data_designer-0.3.0.dist-info → data_designer-0.3.2.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.0.dist-info → data_designer-0.3.2.dist-info}/licenses/LICENSE +0 -0
data_designer/__init__.py
CHANGED
data_designer/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3,
|
|
31
|
+
__version__ = version = '0.3.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
data_designer/cli/__init__.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.main import app, main
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from rich.table import Table
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.controllers.model_controller import ModelController
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.controllers.provider_controller import ProviderController
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.controllers.download_controller import DownloadController
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.forms.builder import FormBuilder
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
data_designer/cli/forms/field.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
data_designer/cli/forms/form.py
CHANGED
data_designer/cli/main.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.repositories.base import ConfigRepository
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pydantic import BaseModel
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pathlib import Path
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.services.download_service import DownloadService
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.repositories.model_repository import ModelConfigRegistry, ModelRepository
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository
|
data_designer/cli/ui.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from collections.abc import Callable
|
data_designer/cli/utils.py
CHANGED
data_designer/config/__init__.py
CHANGED
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,2 +1,2 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from functools import cached_property
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.errors import DataDesignerError
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
data_designer/config/base.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
|
|
@@ -62,41 +62,6 @@ COLUMN_TYPE_EMOJI_MAP.update(
|
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
|
|
65
|
-
def column_type_used_in_execution_dag(column_type: str | DataDesignerColumnType) -> bool:
|
|
66
|
-
"""Return True if the column type is used in the workflow execution DAG."""
|
|
67
|
-
column_type = resolve_string_enum(column_type, DataDesignerColumnType)
|
|
68
|
-
dag_column_types = {
|
|
69
|
-
DataDesignerColumnType.EXPRESSION,
|
|
70
|
-
DataDesignerColumnType.LLM_CODE,
|
|
71
|
-
DataDesignerColumnType.LLM_JUDGE,
|
|
72
|
-
DataDesignerColumnType.LLM_STRUCTURED,
|
|
73
|
-
DataDesignerColumnType.LLM_TEXT,
|
|
74
|
-
DataDesignerColumnType.VALIDATION,
|
|
75
|
-
DataDesignerColumnType.EMBEDDING,
|
|
76
|
-
}
|
|
77
|
-
dag_column_types.update(plugin_manager.get_plugin_column_types(DataDesignerColumnType))
|
|
78
|
-
return column_type in dag_column_types
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
def column_type_is_model_generated(column_type: str | DataDesignerColumnType) -> bool:
|
|
82
|
-
"""Return True if the column type is a model-generated column."""
|
|
83
|
-
column_type = resolve_string_enum(column_type, DataDesignerColumnType)
|
|
84
|
-
model_generated_column_types = {
|
|
85
|
-
DataDesignerColumnType.LLM_TEXT,
|
|
86
|
-
DataDesignerColumnType.LLM_CODE,
|
|
87
|
-
DataDesignerColumnType.LLM_STRUCTURED,
|
|
88
|
-
DataDesignerColumnType.LLM_JUDGE,
|
|
89
|
-
DataDesignerColumnType.EMBEDDING,
|
|
90
|
-
}
|
|
91
|
-
model_generated_column_types.update(
|
|
92
|
-
plugin_manager.get_plugin_column_types(
|
|
93
|
-
DataDesignerColumnType,
|
|
94
|
-
required_resources=["model_registry"],
|
|
95
|
-
)
|
|
96
|
-
)
|
|
97
|
-
return column_type in model_generated_column_types
|
|
98
|
-
|
|
99
|
-
|
|
100
65
|
def get_column_config_from_kwargs(name: str, column_type: DataDesignerColumnType, **kwargs) -> ColumnConfigT:
|
|
101
66
|
"""Create a Data Designer column config object from kwargs.
|
|
102
67
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -18,7 +18,6 @@ from data_designer.config.column_configs import SeedDatasetColumnConfig
|
|
|
18
18
|
from data_designer.config.column_types import (
|
|
19
19
|
ColumnConfigT,
|
|
20
20
|
DataDesignerColumnType,
|
|
21
|
-
column_type_is_model_generated,
|
|
22
21
|
get_column_config_from_kwargs,
|
|
23
22
|
get_column_display_order,
|
|
24
23
|
)
|
|
@@ -40,7 +39,8 @@ from data_designer.config.seed import (
|
|
|
40
39
|
SamplingStrategy,
|
|
41
40
|
SeedConfig,
|
|
42
41
|
)
|
|
43
|
-
from data_designer.config.seed_source import DataFrameSeedSource
|
|
42
|
+
from data_designer.config.seed_source import DataFrameSeedSource
|
|
43
|
+
from data_designer.config.seed_source_types import SeedSourceT
|
|
44
44
|
from data_designer.config.utils.constants import DEFAULT_REPR_HTML_STYLE, REPR_HTML_TEMPLATE
|
|
45
45
|
from data_designer.config.utils.info import ConfigBuilderInfo
|
|
46
46
|
from data_designer.config.utils.io_helpers import serialize_data, smart_load_yaml
|
|
@@ -422,23 +422,6 @@ class DataDesignerConfigBuilder:
|
|
|
422
422
|
"""
|
|
423
423
|
return [c for c in self._constraints if c.target_column == target_column]
|
|
424
424
|
|
|
425
|
-
def get_llm_gen_columns(self) -> list[ColumnConfigT]:
|
|
426
|
-
"""Get all model-generated column configurations.
|
|
427
|
-
|
|
428
|
-
Returns:
|
|
429
|
-
A list of column configurations that use model generation.
|
|
430
|
-
"""
|
|
431
|
-
logger.warning("get_llm_gen_columns is deprecated. Use get_model_gen_columns instead.")
|
|
432
|
-
return self.get_model_gen_columns()
|
|
433
|
-
|
|
434
|
-
def get_model_gen_columns(self) -> list[ColumnConfigT]:
|
|
435
|
-
"""Get all model-generated column configurations.
|
|
436
|
-
|
|
437
|
-
Returns:
|
|
438
|
-
A list of column configurations that use model generation.
|
|
439
|
-
"""
|
|
440
|
-
return [c for c in self._column_configs.values() if column_type_is_model_generated(c.column_type)]
|
|
441
|
-
|
|
442
425
|
def get_columns_of_type(self, column_type: DataDesignerColumnType) -> list[ColumnConfigT]:
|
|
443
426
|
"""Get all column configurations of the specified type.
|
|
444
427
|
|
|
@@ -492,7 +475,7 @@ class DataDesignerConfigBuilder:
|
|
|
492
475
|
|
|
493
476
|
def with_seed_dataset(
|
|
494
477
|
self,
|
|
495
|
-
seed_source:
|
|
478
|
+
seed_source: SeedSourceT,
|
|
496
479
|
*,
|
|
497
480
|
sampling_strategy: SamplingStrategy = SamplingStrategy.ORDERED,
|
|
498
481
|
selection_strategy: IndexRange | PartitionBlock | None = None,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
data_designer/config/errors.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.errors import DataDesignerError
|
data_designer/config/exports.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.config.analysis.column_profilers import JudgeScoreProfilerConfig
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
data_designer/config/models.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c)
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, model_validator
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
data_designer/config/seed.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from enum import Enum
|
|
@@ -7,7 +7,7 @@ from pydantic import Field, model_validator
|
|
|
7
7
|
from typing_extensions import Self
|
|
8
8
|
|
|
9
9
|
from data_designer.config.base import ConfigBase
|
|
10
|
-
from data_designer.config.
|
|
10
|
+
from data_designer.config.seed_source_types import SeedSourceT
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class SamplingStrategy(str, Enum):
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c)
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import Literal
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
@@ -53,7 +53,11 @@ class HuggingFaceSeedSource(SeedSource):
|
|
|
53
53
|
|
|
54
54
|
path: str = Field(
|
|
55
55
|
...,
|
|
56
|
-
description=
|
|
56
|
+
description=(
|
|
57
|
+
"Path to the seed data in HuggingFace. Wildcards are allowed. Examples include "
|
|
58
|
+
"'datasets/my-username/my-dataset/data/000_00000.parquet', 'datasets/my-username/my-dataset/data/*.parquet', "
|
|
59
|
+
"and 'datasets/my-username/my-dataset/**/*.parquet'"
|
|
60
|
+
),
|
|
57
61
|
)
|
|
58
62
|
token: str | None = None
|
|
59
63
|
endpoint: str = "https://huggingface.co"
|
|
@@ -64,10 +68,11 @@ class DataFrameSeedSource(SeedSource):
|
|
|
64
68
|
|
|
65
69
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
66
70
|
|
|
67
|
-
df: pd.DataFrame
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
71
|
+
df: pd.DataFrame = Field(
|
|
72
|
+
...,
|
|
73
|
+
exclude=True,
|
|
74
|
+
description=(
|
|
75
|
+
"DataFrame to use directly as the seed dataset. NOTE: if you need to write a Data Designer config, "
|
|
76
|
+
"you must use `LocalFileSeedSource` instead, since DataFrame objects are not serializable."
|
|
77
|
+
),
|
|
78
|
+
)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from typing import Annotated
|
|
5
|
+
|
|
6
|
+
from pydantic import Field
|
|
7
|
+
from typing_extensions import TypeAlias
|
|
8
|
+
|
|
9
|
+
from data_designer.config.seed_source import DataFrameSeedSource, HuggingFaceSeedSource, LocalFileSeedSource
|
|
10
|
+
from data_designer.plugin_manager import PluginManager
|
|
11
|
+
|
|
12
|
+
plugin_manager = PluginManager()
|
|
13
|
+
|
|
14
|
+
_SeedSourceT: TypeAlias = LocalFileSeedSource | HuggingFaceSeedSource | DataFrameSeedSource
|
|
15
|
+
_SeedSourceT = plugin_manager.inject_into_seed_source_type_union(_SeedSourceT)
|
|
16
|
+
|
|
17
|
+
SeedSourceT = Annotated[_SeedSourceT, Field(discriminator="seed_type")]
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.errors import DataDesignerError
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from abc import ABC, abstractmethod
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|