data-designer 0.1.3__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/_version.py +2 -2
- data_designer/config/analysis/column_profilers.py +4 -4
- data_designer/config/analysis/column_statistics.py +5 -5
- data_designer/config/analysis/dataset_profiler.py +6 -6
- data_designer/config/analysis/utils/errors.py +1 -1
- data_designer/config/analysis/utils/reporting.py +5 -5
- data_designer/config/base.py +2 -2
- data_designer/config/column_configs.py +8 -8
- data_designer/config/column_types.py +9 -5
- data_designer/config/config_builder.py +32 -27
- data_designer/config/data_designer_config.py +7 -7
- data_designer/config/datastore.py +4 -4
- data_designer/config/default_model_settings.py +4 -4
- data_designer/config/errors.py +1 -1
- data_designer/config/exports.py +133 -0
- data_designer/config/interface.py +6 -6
- data_designer/config/models.py +109 -5
- data_designer/config/preview_results.py +9 -6
- data_designer/config/processors.py +48 -4
- data_designer/config/sampler_constraints.py +1 -1
- data_designer/config/sampler_params.py +2 -2
- data_designer/config/seed.py +3 -3
- data_designer/config/utils/constants.py +1 -1
- data_designer/config/utils/errors.py +1 -1
- data_designer/config/utils/info.py +8 -4
- data_designer/config/utils/io_helpers.py +5 -5
- data_designer/config/utils/misc.py +3 -3
- data_designer/config/utils/numerical_helpers.py +1 -1
- data_designer/config/utils/type_helpers.py +7 -3
- data_designer/config/utils/validation.py +37 -6
- data_designer/config/utils/visualization.py +42 -10
- data_designer/config/validator_params.py +2 -2
- data_designer/engine/analysis/column_profilers/base.py +1 -1
- data_designer/engine/analysis/dataset_profiler.py +1 -1
- data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
- data_designer/engine/column_generators/generators/samplers.py +1 -1
- data_designer/engine/dataset_builders/artifact_storage.py +16 -6
- data_designer/engine/dataset_builders/column_wise_builder.py +4 -1
- data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
- data_designer/engine/errors.py +1 -1
- data_designer/engine/models/errors.py +1 -1
- data_designer/engine/models/facade.py +1 -1
- data_designer/engine/models/parsers/parser.py +2 -2
- data_designer/engine/models/recipes/response_recipes.py +1 -1
- data_designer/engine/processing/ginja/environment.py +1 -1
- data_designer/engine/processing/gsonschema/validators.py +1 -1
- data_designer/engine/processing/processors/drop_columns.py +1 -1
- data_designer/engine/processing/processors/registry.py +3 -0
- data_designer/engine/processing/processors/schema_transform.py +53 -0
- data_designer/engine/resources/managed_dataset_repository.py +4 -4
- data_designer/engine/resources/managed_storage.py +1 -1
- data_designer/engine/sampling_gen/constraints.py +1 -1
- data_designer/engine/sampling_gen/data_sources/base.py +1 -1
- data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/person.py +1 -1
- data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
- data_designer/engine/sampling_gen/people_gen.py +3 -3
- data_designer/engine/secret_resolver.py +1 -1
- data_designer/engine/validators/python.py +2 -2
- data_designer/essentials/__init__.py +20 -128
- data_designer/interface/data_designer.py +23 -19
- data_designer/interface/results.py +36 -0
- data_designer/logging.py +2 -2
- data_designer/plugin_manager.py +14 -26
- data_designer/plugins/registry.py +1 -1
- {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/METADATA +9 -9
- {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/RECORD +72 -70
- {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/WHEEL +0 -0
- {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/entry_points.txt +0 -0
- {data_designer-0.1.3.dist-info → data_designer-0.1.5.dist-info}/licenses/LICENSE +0 -0
data_designer/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.5'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 5)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -10,16 +10,16 @@ from rich.panel import Panel
|
|
|
10
10
|
from rich.table import Column, Table
|
|
11
11
|
from typing_extensions import TypeAlias
|
|
12
12
|
|
|
13
|
-
from
|
|
14
|
-
from ..utils.visualization import ColorPalette
|
|
15
|
-
from .column_statistics import (
|
|
13
|
+
from data_designer.config.analysis.column_statistics import (
|
|
16
14
|
CategoricalDistribution,
|
|
17
15
|
CategoricalHistogramData,
|
|
18
16
|
ColumnDistributionType,
|
|
19
17
|
MissingValue,
|
|
20
18
|
NumericalDistribution,
|
|
21
19
|
)
|
|
22
|
-
from .utils.reporting import TITLE_STYLE, create_judge_score_summary_table
|
|
20
|
+
from data_designer.config.analysis.utils.reporting import TITLE_STYLE, create_judge_score_summary_table
|
|
21
|
+
from data_designer.config.base import ConfigBase
|
|
22
|
+
from data_designer.config.utils.visualization import ColorPalette
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
class ColumnProfilerType(str, Enum):
|
|
@@ -11,11 +11,11 @@ from pandas import Series
|
|
|
11
11
|
from pydantic import BaseModel, ConfigDict, create_model, field_validator, model_validator
|
|
12
12
|
from typing_extensions import Self, TypeAlias
|
|
13
13
|
|
|
14
|
-
from
|
|
15
|
-
from
|
|
16
|
-
from
|
|
17
|
-
from
|
|
18
|
-
from
|
|
14
|
+
from data_designer.config.column_types import DataDesignerColumnType
|
|
15
|
+
from data_designer.config.sampler_params import SamplerType
|
|
16
|
+
from data_designer.config.utils.constants import EPSILON
|
|
17
|
+
from data_designer.config.utils.numerical_helpers import is_float, is_int, prepare_number_for_reporting
|
|
18
|
+
from data_designer.plugin_manager import PluginManager
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class MissingValue(str, Enum):
|
|
@@ -7,12 +7,12 @@ from typing import Annotated, Optional, Union
|
|
|
7
7
|
|
|
8
8
|
from pydantic import BaseModel, Field, field_validator
|
|
9
9
|
|
|
10
|
-
from
|
|
11
|
-
from
|
|
12
|
-
from
|
|
13
|
-
from .
|
|
14
|
-
from .
|
|
15
|
-
from .utils.
|
|
10
|
+
from data_designer.config.analysis.column_profilers import ColumnProfilerResultsT
|
|
11
|
+
from data_designer.config.analysis.column_statistics import ColumnStatisticsT
|
|
12
|
+
from data_designer.config.analysis.utils.reporting import ReportSection, generate_analysis_report
|
|
13
|
+
from data_designer.config.column_types import DataDesignerColumnType, get_column_display_order
|
|
14
|
+
from data_designer.config.utils.constants import EPSILON
|
|
15
|
+
from data_designer.config.utils.numerical_helpers import prepare_number_for_reporting
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
class DatasetProfilerResults(BaseModel):
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from data_designer.errors import DataDesignerError
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class AnalysisReportError(DataDesignerError):
|
|
@@ -14,18 +14,18 @@ from rich.rule import Rule
|
|
|
14
14
|
from rich.table import Column, Table
|
|
15
15
|
from rich.text import Text
|
|
16
16
|
|
|
17
|
-
from
|
|
18
|
-
from
|
|
19
|
-
from
|
|
17
|
+
from data_designer.config.analysis.column_statistics import CategoricalHistogramData
|
|
18
|
+
from data_designer.config.analysis.utils.errors import AnalysisReportError
|
|
19
|
+
from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP, DataDesignerColumnType, get_column_display_order
|
|
20
|
+
from data_designer.config.utils.visualization import (
|
|
20
21
|
ColorPalette,
|
|
21
22
|
convert_to_row_element,
|
|
22
23
|
create_rich_histogram_table,
|
|
23
24
|
pad_console_element,
|
|
24
25
|
)
|
|
25
|
-
from .errors import AnalysisReportError
|
|
26
26
|
|
|
27
27
|
if TYPE_CHECKING:
|
|
28
|
-
from
|
|
28
|
+
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
29
29
|
|
|
30
30
|
HEADER_STYLE = "dim"
|
|
31
31
|
RULE_STYLE = f"bold {ColorPalette.NVIDIA_GREEN.value}"
|
data_designer/config/base.py
CHANGED
|
@@ -6,10 +6,10 @@ from __future__ import annotations
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Any, Optional, Union
|
|
8
8
|
|
|
9
|
-
from pydantic import BaseModel, ConfigDict
|
|
10
9
|
import yaml
|
|
10
|
+
from pydantic import BaseModel, ConfigDict
|
|
11
11
|
|
|
12
|
-
from .utils.io_helpers import serialize_data
|
|
12
|
+
from data_designer.config.utils.io_helpers import serialize_data
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
class ConfigBase(BaseModel):
|
|
@@ -7,14 +7,14 @@ from typing import Annotated, Literal, Optional, Type, Union
|
|
|
7
7
|
from pydantic import BaseModel, Discriminator, Field, model_validator
|
|
8
8
|
from typing_extensions import Self
|
|
9
9
|
|
|
10
|
-
from .base import ConfigBase
|
|
11
|
-
from .errors import InvalidConfigError
|
|
12
|
-
from .models import ImageContext
|
|
13
|
-
from .sampler_params import SamplerParamsT, SamplerType
|
|
14
|
-
from .utils.code_lang import CodeLang
|
|
15
|
-
from .utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
16
|
-
from .utils.misc import assert_valid_jinja2_template, get_prompt_template_keywords
|
|
17
|
-
from .validator_params import ValidatorParamsT, ValidatorType
|
|
10
|
+
from data_designer.config.base import ConfigBase
|
|
11
|
+
from data_designer.config.errors import InvalidConfigError
|
|
12
|
+
from data_designer.config.models import ImageContext
|
|
13
|
+
from data_designer.config.sampler_params import SamplerParamsT, SamplerType
|
|
14
|
+
from data_designer.config.utils.code_lang import CodeLang
|
|
15
|
+
from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
16
|
+
from data_designer.config.utils.misc import assert_valid_jinja2_template, get_prompt_template_keywords
|
|
17
|
+
from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
|
|
18
18
|
|
|
19
19
|
|
|
20
20
|
class SingleColumnConfig(ConfigBase, ABC):
|
|
@@ -5,8 +5,7 @@ from typing import Union
|
|
|
5
5
|
|
|
6
6
|
from typing_extensions import TypeAlias
|
|
7
7
|
|
|
8
|
-
from
|
|
9
|
-
from .column_configs import (
|
|
8
|
+
from data_designer.config.column_configs import (
|
|
10
9
|
ExpressionColumnConfig,
|
|
11
10
|
LLMCodeColumnConfig,
|
|
12
11
|
LLMJudgeColumnConfig,
|
|
@@ -16,9 +15,14 @@ from .column_configs import (
|
|
|
16
15
|
SeedDatasetColumnConfig,
|
|
17
16
|
ValidationColumnConfig,
|
|
18
17
|
)
|
|
19
|
-
from .errors import InvalidColumnTypeError, InvalidConfigError
|
|
20
|
-
from .sampler_params import SamplerType
|
|
21
|
-
from .utils.type_helpers import
|
|
18
|
+
from data_designer.config.errors import InvalidColumnTypeError, InvalidConfigError
|
|
19
|
+
from data_designer.config.sampler_params import SamplerType
|
|
20
|
+
from data_designer.config.utils.type_helpers import (
|
|
21
|
+
SAMPLER_PARAMS,
|
|
22
|
+
create_str_enum_from_discriminated_type_union,
|
|
23
|
+
resolve_string_enum,
|
|
24
|
+
)
|
|
25
|
+
from data_designer.plugin_manager import PluginManager
|
|
22
26
|
|
|
23
27
|
plugin_manager = PluginManager()
|
|
24
28
|
|
|
@@ -13,30 +13,30 @@ from pygments.formatters import HtmlFormatter
|
|
|
13
13
|
from pygments.lexers import PythonLexer
|
|
14
14
|
from typing_extensions import Self
|
|
15
15
|
|
|
16
|
-
from .analysis.column_profilers import ColumnProfilerConfigT
|
|
17
|
-
from .base import ExportableConfigBase
|
|
18
|
-
from .column_configs import SeedDatasetColumnConfig
|
|
19
|
-
from .column_types import (
|
|
16
|
+
from data_designer.config.analysis.column_profilers import ColumnProfilerConfigT
|
|
17
|
+
from data_designer.config.base import ExportableConfigBase
|
|
18
|
+
from data_designer.config.column_configs import SeedDatasetColumnConfig
|
|
19
|
+
from data_designer.config.column_types import (
|
|
20
20
|
ColumnConfigT,
|
|
21
21
|
DataDesignerColumnType,
|
|
22
22
|
column_type_is_llm_generated,
|
|
23
23
|
get_column_config_from_kwargs,
|
|
24
24
|
get_column_display_order,
|
|
25
25
|
)
|
|
26
|
-
from .data_designer_config import DataDesignerConfig
|
|
27
|
-
from .dataset_builders import BuildStage
|
|
28
|
-
from .datastore import DatastoreSettings, fetch_seed_dataset_column_names
|
|
29
|
-
from .default_model_settings import get_default_model_configs
|
|
30
|
-
from .errors import BuilderConfigurationError, InvalidColumnTypeError, InvalidConfigError
|
|
31
|
-
from .models import ModelConfig, load_model_configs
|
|
32
|
-
from .processors import ProcessorConfig, ProcessorType, get_processor_config_from_kwargs
|
|
33
|
-
from .sampler_constraints import (
|
|
26
|
+
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
27
|
+
from data_designer.config.dataset_builders import BuildStage
|
|
28
|
+
from data_designer.config.datastore import DatastoreSettings, fetch_seed_dataset_column_names
|
|
29
|
+
from data_designer.config.default_model_settings import get_default_model_configs
|
|
30
|
+
from data_designer.config.errors import BuilderConfigurationError, InvalidColumnTypeError, InvalidConfigError
|
|
31
|
+
from data_designer.config.models import ModelConfig, load_model_configs
|
|
32
|
+
from data_designer.config.processors import ProcessorConfig, ProcessorType, get_processor_config_from_kwargs
|
|
33
|
+
from data_designer.config.sampler_constraints import (
|
|
34
34
|
ColumnConstraintT,
|
|
35
35
|
ColumnInequalityConstraint,
|
|
36
36
|
ConstraintType,
|
|
37
37
|
ScalarInequalityConstraint,
|
|
38
38
|
)
|
|
39
|
-
from .seed import (
|
|
39
|
+
from data_designer.config.seed import (
|
|
40
40
|
DatastoreSeedDatasetReference,
|
|
41
41
|
IndexRange,
|
|
42
42
|
LocalSeedDatasetReference,
|
|
@@ -45,19 +45,15 @@ from .seed import (
|
|
|
45
45
|
SeedConfig,
|
|
46
46
|
SeedDatasetReference,
|
|
47
47
|
)
|
|
48
|
-
from .utils.constants import DEFAULT_REPR_HTML_STYLE, REPR_HTML_TEMPLATE
|
|
49
|
-
from .utils.info import ConfigBuilderInfo
|
|
50
|
-
from .utils.io_helpers import serialize_data, smart_load_yaml
|
|
51
|
-
from .utils.misc import can_run_data_designer_locally, json_indent_list_of_strings, kebab_to_snake
|
|
52
|
-
from .utils.type_helpers import resolve_string_enum
|
|
53
|
-
from .utils.validation import ViolationLevel, rich_print_violations, validate_data_designer_config
|
|
48
|
+
from data_designer.config.utils.constants import DEFAULT_REPR_HTML_STYLE, REPR_HTML_TEMPLATE
|
|
49
|
+
from data_designer.config.utils.info import ConfigBuilderInfo
|
|
50
|
+
from data_designer.config.utils.io_helpers import serialize_data, smart_load_yaml
|
|
51
|
+
from data_designer.config.utils.misc import can_run_data_designer_locally, json_indent_list_of_strings, kebab_to_snake
|
|
52
|
+
from data_designer.config.utils.type_helpers import resolve_string_enum
|
|
53
|
+
from data_designer.config.utils.validation import ViolationLevel, rich_print_violations, validate_data_designer_config
|
|
54
54
|
|
|
55
55
|
logger = logging.getLogger(__name__)
|
|
56
56
|
|
|
57
|
-
# Resolve default model settings on import to ensure they are available when the library is used.
|
|
58
|
-
if can_run_data_designer_locally():
|
|
59
|
-
resolve_seed_default_model_settings()
|
|
60
|
-
|
|
61
57
|
|
|
62
58
|
class BuilderConfig(ExportableConfigBase):
|
|
63
59
|
"""Configuration container for Data Designer builder.
|
|
@@ -143,11 +139,8 @@ class DataDesignerConfigBuilder:
|
|
|
143
139
|
- A list of ModelConfig objects
|
|
144
140
|
- A string or Path to a model configuration file
|
|
145
141
|
"""
|
|
146
|
-
if not can_run_data_designer_locally() and (model_configs is None or len(model_configs) == 0):
|
|
147
|
-
raise BuilderConfigurationError("🛑 Model configurations are required!")
|
|
148
|
-
|
|
149
142
|
self._column_configs = {}
|
|
150
|
-
self._model_configs =
|
|
143
|
+
self._model_configs = _load_model_configs(model_configs)
|
|
151
144
|
self._processor_configs: list[ProcessorConfig] = []
|
|
152
145
|
self._seed_config: Optional[SeedConfig] = None
|
|
153
146
|
self._constraints: list[ColumnConstraintT] = []
|
|
@@ -658,3 +651,15 @@ class DataDesignerConfigBuilder:
|
|
|
658
651
|
highlighted_html = highlight(repr_string, PythonLexer(), formatter)
|
|
659
652
|
css = formatter.get_style_defs(".code")
|
|
660
653
|
return REPR_HTML_TEMPLATE.format(css=css, highlighted_html=highlighted_html)
|
|
654
|
+
|
|
655
|
+
|
|
656
|
+
def _load_model_configs(model_configs: Optional[Union[list[ModelConfig], str, Path]] = None) -> list[ModelConfig]:
|
|
657
|
+
"""Resolves the provided model_configs, which may be a string or Path to a model configuration file.
|
|
658
|
+
If None or empty, returns default model configurations if possible, otherwise raises an error.
|
|
659
|
+
"""
|
|
660
|
+
if model_configs:
|
|
661
|
+
return load_model_configs(model_configs)
|
|
662
|
+
elif can_run_data_designer_locally():
|
|
663
|
+
return get_default_model_configs()
|
|
664
|
+
else:
|
|
665
|
+
raise BuilderConfigurationError("🛑 Model configurations are required!")
|
|
@@ -7,13 +7,13 @@ from typing import Annotated, Optional
|
|
|
7
7
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
|
-
from .analysis.column_profilers import ColumnProfilerConfigT
|
|
11
|
-
from .base import ExportableConfigBase
|
|
12
|
-
from .column_types import ColumnConfigT
|
|
13
|
-
from .models import ModelConfig
|
|
14
|
-
from .processors import ProcessorConfig
|
|
15
|
-
from .sampler_constraints import ColumnConstraintT
|
|
16
|
-
from .seed import SeedConfig
|
|
10
|
+
from data_designer.config.analysis.column_profilers import ColumnProfilerConfigT
|
|
11
|
+
from data_designer.config.base import ExportableConfigBase
|
|
12
|
+
from data_designer.config.column_types import ColumnConfigT
|
|
13
|
+
from data_designer.config.models import ModelConfig
|
|
14
|
+
from data_designer.config.processors import ProcessorConfig
|
|
15
|
+
from data_designer.config.sampler_constraints import ColumnConstraintT
|
|
16
|
+
from data_designer.config.seed import SeedConfig
|
|
17
17
|
|
|
18
18
|
|
|
19
19
|
class DataDesignerConfig(ExportableConfigBase):
|
|
@@ -7,16 +7,16 @@ import logging
|
|
|
7
7
|
from pathlib import Path
|
|
8
8
|
from typing import TYPE_CHECKING, Optional, Union
|
|
9
9
|
|
|
10
|
-
from huggingface_hub import HfApi, HfFileSystem
|
|
11
10
|
import pandas as pd
|
|
12
11
|
import pyarrow.parquet as pq
|
|
12
|
+
from huggingface_hub import HfApi, HfFileSystem
|
|
13
13
|
from pydantic import BaseModel, Field
|
|
14
14
|
|
|
15
|
-
from .errors import InvalidConfigError, InvalidFileFormatError, InvalidFilePathError
|
|
16
|
-
from .utils.io_helpers import VALID_DATASET_FILE_EXTENSIONS, validate_path_contains_files_of_type
|
|
15
|
+
from data_designer.config.errors import InvalidConfigError, InvalidFileFormatError, InvalidFilePathError
|
|
16
|
+
from data_designer.config.utils.io_helpers import VALID_DATASET_FILE_EXTENSIONS, validate_path_contains_files_of_type
|
|
17
17
|
|
|
18
18
|
if TYPE_CHECKING:
|
|
19
|
-
from .seed import SeedDatasetReference
|
|
19
|
+
from data_designer.config.seed import SeedDatasetReference
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
|
@@ -2,21 +2,21 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
|
|
5
|
-
from functools import lru_cache
|
|
6
5
|
import logging
|
|
7
6
|
import os
|
|
7
|
+
from functools import lru_cache
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Any, Literal, Optional
|
|
10
10
|
|
|
11
|
-
from .models import InferenceParameters, ModelConfig, ModelProvider
|
|
12
|
-
from .utils.constants import (
|
|
11
|
+
from data_designer.config.models import InferenceParameters, ModelConfig, ModelProvider
|
|
12
|
+
from data_designer.config.utils.constants import (
|
|
13
13
|
MANAGED_ASSETS_PATH,
|
|
14
14
|
MODEL_CONFIGS_FILE_PATH,
|
|
15
15
|
MODEL_PROVIDERS_FILE_PATH,
|
|
16
16
|
PREDEFINED_PROVIDERS,
|
|
17
17
|
PREDEFINED_PROVIDERS_MODEL_MAP,
|
|
18
18
|
)
|
|
19
|
-
from .utils.io_helpers import load_config_file, save_config_file
|
|
19
|
+
from data_designer.config.utils.io_helpers import load_config_file, save_config_file
|
|
20
20
|
|
|
21
21
|
logger = logging.getLogger(__name__)
|
|
22
22
|
|
data_designer/config/errors.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from data_designer.errors import DataDesignerError
|
|
5
5
|
|
|
6
6
|
|
|
7
7
|
class BuilderConfigurationError(DataDesignerError): ...
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from data_designer.config.analysis.column_profilers import JudgeScoreProfilerConfig
|
|
5
|
+
from data_designer.config.column_configs import (
|
|
6
|
+
ExpressionColumnConfig,
|
|
7
|
+
LLMCodeColumnConfig,
|
|
8
|
+
LLMJudgeColumnConfig,
|
|
9
|
+
LLMStructuredColumnConfig,
|
|
10
|
+
LLMTextColumnConfig,
|
|
11
|
+
SamplerColumnConfig,
|
|
12
|
+
Score,
|
|
13
|
+
SeedDatasetColumnConfig,
|
|
14
|
+
ValidationColumnConfig,
|
|
15
|
+
)
|
|
16
|
+
from data_designer.config.column_types import DataDesignerColumnType
|
|
17
|
+
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
18
|
+
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
19
|
+
from data_designer.config.dataset_builders import BuildStage
|
|
20
|
+
from data_designer.config.datastore import DatastoreSettings
|
|
21
|
+
from data_designer.config.models import (
|
|
22
|
+
ImageContext,
|
|
23
|
+
ImageFormat,
|
|
24
|
+
InferenceParameters,
|
|
25
|
+
ManualDistribution,
|
|
26
|
+
ManualDistributionParams,
|
|
27
|
+
Modality,
|
|
28
|
+
ModalityContext,
|
|
29
|
+
ModalityDataType,
|
|
30
|
+
ModelConfig,
|
|
31
|
+
ModelProvider,
|
|
32
|
+
UniformDistribution,
|
|
33
|
+
UniformDistributionParams,
|
|
34
|
+
)
|
|
35
|
+
from data_designer.config.processors import (
|
|
36
|
+
DropColumnsProcessorConfig,
|
|
37
|
+
ProcessorType,
|
|
38
|
+
SchemaTransformProcessorConfig,
|
|
39
|
+
)
|
|
40
|
+
from data_designer.config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
|
|
41
|
+
from data_designer.config.sampler_params import (
|
|
42
|
+
BernoulliMixtureSamplerParams,
|
|
43
|
+
BernoulliSamplerParams,
|
|
44
|
+
BinomialSamplerParams,
|
|
45
|
+
CategorySamplerParams,
|
|
46
|
+
DatetimeSamplerParams,
|
|
47
|
+
GaussianSamplerParams,
|
|
48
|
+
PersonFromFakerSamplerParams,
|
|
49
|
+
PersonSamplerParams,
|
|
50
|
+
PoissonSamplerParams,
|
|
51
|
+
SamplerType,
|
|
52
|
+
ScipySamplerParams,
|
|
53
|
+
SubcategorySamplerParams,
|
|
54
|
+
TimeDeltaSamplerParams,
|
|
55
|
+
UniformSamplerParams,
|
|
56
|
+
UUIDSamplerParams,
|
|
57
|
+
)
|
|
58
|
+
from data_designer.config.seed import (
|
|
59
|
+
DatastoreSeedDatasetReference,
|
|
60
|
+
IndexRange,
|
|
61
|
+
PartitionBlock,
|
|
62
|
+
SamplingStrategy,
|
|
63
|
+
SeedConfig,
|
|
64
|
+
)
|
|
65
|
+
from data_designer.config.utils.code_lang import CodeLang
|
|
66
|
+
from data_designer.config.utils.info import InfoType
|
|
67
|
+
from data_designer.config.validator_params import (
|
|
68
|
+
CodeValidatorParams,
|
|
69
|
+
RemoteValidatorParams,
|
|
70
|
+
ValidatorType,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def get_config_exports() -> list[str]:
|
|
75
|
+
return [
|
|
76
|
+
SchemaTransformProcessorConfig.__name__,
|
|
77
|
+
BernoulliMixtureSamplerParams.__name__,
|
|
78
|
+
BernoulliSamplerParams.__name__,
|
|
79
|
+
BinomialSamplerParams.__name__,
|
|
80
|
+
CategorySamplerParams.__name__,
|
|
81
|
+
CodeLang.__name__,
|
|
82
|
+
CodeValidatorParams.__name__,
|
|
83
|
+
ColumnInequalityConstraint.__name__,
|
|
84
|
+
DataDesignerColumnType.__name__,
|
|
85
|
+
DataDesignerConfig.__name__,
|
|
86
|
+
DataDesignerConfigBuilder.__name__,
|
|
87
|
+
BuildStage.__name__,
|
|
88
|
+
DatastoreSeedDatasetReference.__name__,
|
|
89
|
+
DatastoreSettings.__name__,
|
|
90
|
+
DatetimeSamplerParams.__name__,
|
|
91
|
+
DropColumnsProcessorConfig.__name__,
|
|
92
|
+
ExpressionColumnConfig.__name__,
|
|
93
|
+
GaussianSamplerParams.__name__,
|
|
94
|
+
IndexRange.__name__,
|
|
95
|
+
InfoType.__name__,
|
|
96
|
+
ImageContext.__name__,
|
|
97
|
+
ImageFormat.__name__,
|
|
98
|
+
InferenceParameters.__name__,
|
|
99
|
+
JudgeScoreProfilerConfig.__name__,
|
|
100
|
+
LLMCodeColumnConfig.__name__,
|
|
101
|
+
LLMJudgeColumnConfig.__name__,
|
|
102
|
+
LLMStructuredColumnConfig.__name__,
|
|
103
|
+
LLMTextColumnConfig.__name__,
|
|
104
|
+
ManualDistribution.__name__,
|
|
105
|
+
ManualDistributionParams.__name__,
|
|
106
|
+
Modality.__name__,
|
|
107
|
+
ModalityContext.__name__,
|
|
108
|
+
ModalityDataType.__name__,
|
|
109
|
+
ModelConfig.__name__,
|
|
110
|
+
ModelProvider.__name__,
|
|
111
|
+
PartitionBlock.__name__,
|
|
112
|
+
PersonSamplerParams.__name__,
|
|
113
|
+
PersonFromFakerSamplerParams.__name__,
|
|
114
|
+
PoissonSamplerParams.__name__,
|
|
115
|
+
ProcessorType.__name__,
|
|
116
|
+
RemoteValidatorParams.__name__,
|
|
117
|
+
SamplerColumnConfig.__name__,
|
|
118
|
+
SamplerType.__name__,
|
|
119
|
+
SamplingStrategy.__name__,
|
|
120
|
+
ScalarInequalityConstraint.__name__,
|
|
121
|
+
ScipySamplerParams.__name__,
|
|
122
|
+
Score.__name__,
|
|
123
|
+
SeedConfig.__name__,
|
|
124
|
+
SeedDatasetColumnConfig.__name__,
|
|
125
|
+
SubcategorySamplerParams.__name__,
|
|
126
|
+
TimeDeltaSamplerParams.__name__,
|
|
127
|
+
UniformDistribution.__name__,
|
|
128
|
+
UniformDistributionParams.__name__,
|
|
129
|
+
UniformSamplerParams.__name__,
|
|
130
|
+
UUIDSamplerParams.__name__,
|
|
131
|
+
ValidationColumnConfig.__name__,
|
|
132
|
+
ValidatorType.__name__,
|
|
133
|
+
]
|
|
@@ -8,14 +8,14 @@ from typing import TYPE_CHECKING, Generic, Protocol, TypeVar
|
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
-
from .models import ModelConfig, ModelProvider
|
|
12
|
-
from .utils.constants import DEFAULT_NUM_RECORDS
|
|
13
|
-
from .utils.info import InterfaceInfo
|
|
11
|
+
from data_designer.config.models import ModelConfig, ModelProvider
|
|
12
|
+
from data_designer.config.utils.constants import DEFAULT_NUM_RECORDS
|
|
13
|
+
from data_designer.config.utils.info import InterfaceInfo
|
|
14
14
|
|
|
15
15
|
if TYPE_CHECKING:
|
|
16
|
-
from .analysis.dataset_profiler import DatasetProfilerResults
|
|
17
|
-
from .config_builder import DataDesignerConfigBuilder
|
|
18
|
-
from .preview_results import PreviewResults
|
|
16
|
+
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
17
|
+
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
18
|
+
from data_designer.config.preview_results import PreviewResults
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
class ResultsProtocol(Protocol):
|