data-designer 0.3.3__py3-none-any.whl → 0.3.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/__init__.py +2 -0
- data_designer/_version.py +2 -2
- data_designer/cli/__init__.py +2 -0
- data_designer/cli/commands/download.py +2 -0
- data_designer/cli/commands/list.py +2 -0
- data_designer/cli/commands/models.py +2 -0
- data_designer/cli/commands/providers.py +2 -0
- data_designer/cli/commands/reset.py +2 -0
- data_designer/cli/controllers/__init__.py +2 -0
- data_designer/cli/controllers/download_controller.py +2 -0
- data_designer/cli/controllers/model_controller.py +6 -1
- data_designer/cli/controllers/provider_controller.py +6 -1
- data_designer/cli/forms/__init__.py +2 -0
- data_designer/cli/forms/builder.py +2 -0
- data_designer/cli/forms/field.py +2 -0
- data_designer/cli/forms/form.py +2 -0
- data_designer/cli/forms/model_builder.py +2 -0
- data_designer/cli/forms/provider_builder.py +2 -0
- data_designer/cli/main.py +2 -0
- data_designer/cli/repositories/__init__.py +2 -0
- data_designer/cli/repositories/base.py +2 -0
- data_designer/cli/repositories/model_repository.py +2 -0
- data_designer/cli/repositories/persona_repository.py +2 -0
- data_designer/cli/repositories/provider_repository.py +2 -0
- data_designer/cli/services/__init__.py +2 -0
- data_designer/cli/services/download_service.py +2 -0
- data_designer/cli/services/model_service.py +2 -0
- data_designer/cli/services/provider_service.py +2 -0
- data_designer/cli/ui.py +2 -0
- data_designer/cli/utils.py +2 -0
- data_designer/config/analysis/column_profilers.py +2 -0
- data_designer/config/analysis/column_statistics.py +8 -5
- data_designer/config/analysis/dataset_profiler.py +9 -3
- data_designer/config/analysis/utils/errors.py +2 -0
- data_designer/config/analysis/utils/reporting.py +7 -3
- data_designer/config/base.py +1 -0
- data_designer/config/column_configs.py +77 -7
- data_designer/config/column_types.py +33 -36
- data_designer/config/dataset_builders.py +2 -0
- data_designer/config/dataset_metadata.py +18 -0
- data_designer/config/default_model_settings.py +1 -0
- data_designer/config/errors.py +2 -0
- data_designer/config/exports.py +2 -0
- data_designer/config/interface.py +3 -2
- data_designer/config/models.py +7 -2
- data_designer/config/preview_results.py +9 -1
- data_designer/config/processors.py +2 -0
- data_designer/config/run_config.py +19 -5
- data_designer/config/sampler_constraints.py +2 -0
- data_designer/config/sampler_params.py +7 -2
- data_designer/config/seed.py +2 -0
- data_designer/config/seed_source.py +9 -3
- data_designer/config/seed_source_types.py +2 -0
- data_designer/config/utils/constants.py +2 -0
- data_designer/config/utils/errors.py +2 -0
- data_designer/config/utils/info.py +2 -0
- data_designer/config/utils/io_helpers.py +8 -3
- data_designer/config/utils/misc.py +2 -2
- data_designer/config/utils/numerical_helpers.py +2 -0
- data_designer/config/utils/type_helpers.py +2 -0
- data_designer/config/utils/visualization.py +19 -11
- data_designer/config/validator_params.py +2 -0
- data_designer/engine/analysis/column_profilers/base.py +9 -8
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
- data_designer/engine/analysis/column_profilers/registry.py +2 -0
- data_designer/engine/analysis/column_statistics.py +5 -2
- data_designer/engine/analysis/dataset_profiler.py +12 -9
- data_designer/engine/analysis/errors.py +2 -0
- data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
- data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
- data_designer/engine/column_generators/generators/base.py +26 -14
- data_designer/engine/column_generators/generators/embedding.py +4 -11
- data_designer/engine/column_generators/generators/expression.py +7 -16
- data_designer/engine/column_generators/generators/llm_completion.py +13 -47
- data_designer/engine/column_generators/generators/samplers.py +8 -14
- data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
- data_designer/engine/column_generators/generators/validation.py +9 -20
- data_designer/engine/column_generators/registry.py +2 -0
- data_designer/engine/column_generators/utils/errors.py +2 -0
- data_designer/engine/column_generators/utils/generator_classification.py +2 -0
- data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
- data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
- data_designer/engine/compiler.py +3 -6
- data_designer/engine/configurable_task.py +12 -13
- data_designer/engine/dataset_builders/artifact_storage.py +87 -8
- data_designer/engine/dataset_builders/column_wise_builder.py +34 -35
- data_designer/engine/dataset_builders/errors.py +2 -0
- data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
- data_designer/engine/dataset_builders/utils/concurrency.py +13 -4
- data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
- data_designer/engine/dataset_builders/utils/dag.py +7 -2
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +35 -25
- data_designer/engine/dataset_builders/utils/errors.py +2 -0
- data_designer/engine/errors.py +2 -0
- data_designer/engine/model_provider.py +2 -0
- data_designer/engine/models/errors.py +23 -31
- data_designer/engine/models/facade.py +12 -9
- data_designer/engine/models/factory.py +42 -0
- data_designer/engine/models/litellm_overrides.py +16 -11
- data_designer/engine/models/parsers/errors.py +2 -0
- data_designer/engine/models/parsers/parser.py +2 -2
- data_designer/engine/models/parsers/postprocessors.py +1 -0
- data_designer/engine/models/parsers/tag_parsers.py +2 -0
- data_designer/engine/models/parsers/types.py +2 -0
- data_designer/engine/models/recipes/base.py +2 -0
- data_designer/engine/models/recipes/response_recipes.py +2 -0
- data_designer/engine/models/registry.py +11 -18
- data_designer/engine/models/telemetry.py +6 -2
- data_designer/engine/processing/ginja/ast.py +2 -0
- data_designer/engine/processing/ginja/environment.py +2 -0
- data_designer/engine/processing/ginja/exceptions.py +2 -0
- data_designer/engine/processing/ginja/record.py +2 -0
- data_designer/engine/processing/gsonschema/exceptions.py +9 -2
- data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
- data_designer/engine/processing/gsonschema/types.py +2 -0
- data_designer/engine/processing/gsonschema/validators.py +10 -6
- data_designer/engine/processing/processors/base.py +1 -5
- data_designer/engine/processing/processors/drop_columns.py +7 -10
- data_designer/engine/processing/processors/registry.py +2 -0
- data_designer/engine/processing/processors/schema_transform.py +7 -10
- data_designer/engine/processing/utils.py +7 -3
- data_designer/engine/registry/base.py +2 -0
- data_designer/engine/registry/data_designer_registry.py +2 -0
- data_designer/engine/registry/errors.py +2 -0
- data_designer/engine/resources/managed_dataset_generator.py +6 -2
- data_designer/engine/resources/managed_dataset_repository.py +8 -5
- data_designer/engine/resources/managed_storage.py +2 -0
- data_designer/engine/resources/resource_provider.py +20 -1
- data_designer/engine/resources/seed_reader.py +7 -2
- data_designer/engine/sampling_gen/column.py +2 -0
- data_designer/engine/sampling_gen/constraints.py +8 -2
- data_designer/engine/sampling_gen/data_sources/base.py +10 -7
- data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
- data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
- data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
- data_designer/engine/sampling_gen/entities/errors.py +2 -0
- data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
- data_designer/engine/sampling_gen/entities/person.py +2 -0
- data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
- data_designer/engine/sampling_gen/errors.py +2 -0
- data_designer/engine/sampling_gen/generator.py +5 -4
- data_designer/engine/sampling_gen/jinja_utils.py +7 -3
- data_designer/engine/sampling_gen/people_gen.py +7 -7
- data_designer/engine/sampling_gen/person_constants.py +2 -0
- data_designer/engine/sampling_gen/schema.py +5 -1
- data_designer/engine/sampling_gen/schema_builder.py +2 -0
- data_designer/engine/sampling_gen/utils.py +7 -1
- data_designer/engine/secret_resolver.py +2 -0
- data_designer/engine/validation.py +2 -2
- data_designer/engine/validators/__init__.py +2 -0
- data_designer/engine/validators/base.py +2 -0
- data_designer/engine/validators/local_callable.py +7 -2
- data_designer/engine/validators/python.py +7 -1
- data_designer/engine/validators/remote.py +7 -1
- data_designer/engine/validators/sql.py +8 -3
- data_designer/errors.py +2 -0
- data_designer/essentials/__init__.py +2 -0
- data_designer/interface/data_designer.py +36 -39
- data_designer/interface/errors.py +2 -0
- data_designer/interface/results.py +9 -2
- data_designer/lazy_heavy_imports.py +54 -0
- data_designer/logging.py +2 -0
- data_designer/plugins/__init__.py +2 -0
- data_designer/plugins/errors.py +2 -0
- data_designer/plugins/plugin.py +0 -1
- data_designer/plugins/registry.py +2 -0
- data_designer/plugins/testing/__init__.py +2 -0
- data_designer/plugins/testing/stubs.py +21 -43
- data_designer/plugins/testing/utils.py +2 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.5.dist-info}/METADATA +19 -4
- data_designer-0.3.5.dist-info/RECORD +196 -0
- data_designer-0.3.3.dist-info/RECORD +0 -193
- {data_designer-0.3.3.dist-info → data_designer-0.3.5.dist-info}/WHEEL +0 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.5.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.5.dist-info}/licenses/LICENSE +0 -0
|
@@ -5,44 +5,41 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
import random
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
8
9
|
|
|
9
10
|
from data_designer.config.analysis.column_profilers import (
|
|
10
11
|
JudgeScoreProfilerConfig,
|
|
11
12
|
JudgeScoreProfilerResults,
|
|
12
|
-
JudgeScoreSample,
|
|
13
13
|
JudgeScoreSummary,
|
|
14
14
|
)
|
|
15
15
|
from data_designer.config.analysis.column_statistics import (
|
|
16
|
-
CategoricalDistribution,
|
|
17
|
-
CategoricalHistogramData,
|
|
18
16
|
ColumnDistributionType,
|
|
19
17
|
MissingValue,
|
|
20
|
-
NumericalDistribution,
|
|
21
|
-
)
|
|
22
|
-
from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP, DataDesignerColumnType
|
|
23
|
-
from data_designer.engine.analysis.column_profilers.base import (
|
|
24
|
-
ColumnConfigWithDataFrame,
|
|
25
|
-
ColumnProfiler,
|
|
26
|
-
ColumnProfilerMetadata,
|
|
27
18
|
)
|
|
19
|
+
from data_designer.config.column_types import DataDesignerColumnType
|
|
20
|
+
from data_designer.engine.analysis.column_profilers.base import ColumnConfigWithDataFrame, ColumnProfiler
|
|
28
21
|
from data_designer.engine.analysis.utils.judge_score_processing import (
|
|
29
22
|
extract_judge_score_distributions,
|
|
30
23
|
sample_scores_and_reasoning,
|
|
31
24
|
)
|
|
32
|
-
from data_designer.engine.models.facade import ModelFacade
|
|
33
25
|
from data_designer.engine.models.recipes.response_recipes import TextResponseRecipe
|
|
34
26
|
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
from data_designer.config.analysis.column_profilers import JudgeScoreSample
|
|
29
|
+
from data_designer.config.analysis.column_statistics import (
|
|
30
|
+
CategoricalDistribution,
|
|
31
|
+
CategoricalHistogramData,
|
|
32
|
+
NumericalDistribution,
|
|
33
|
+
)
|
|
34
|
+
from data_designer.engine.models.facade import ModelFacade
|
|
35
|
+
|
|
35
36
|
logger = logging.getLogger(__name__)
|
|
36
37
|
|
|
37
38
|
|
|
38
39
|
class JudgeScoreProfiler(ColumnProfiler[JudgeScoreProfilerConfig]):
|
|
39
40
|
@staticmethod
|
|
40
|
-
def
|
|
41
|
-
return
|
|
42
|
-
name="judge_score_profiler",
|
|
43
|
-
description="Analyzes LLM-as-judge score distributions in a Data Designer dataset.",
|
|
44
|
-
applicable_column_types=[DataDesignerColumnType.LLM_JUDGE],
|
|
45
|
-
)
|
|
41
|
+
def get_applicable_column_types() -> list[DataDesignerColumnType]:
|
|
42
|
+
return [DataDesignerColumnType.LLM_JUDGE]
|
|
46
43
|
|
|
47
44
|
def get_model(self, model_alias: str) -> ModelFacade:
|
|
48
45
|
return self.resource_provider.model_registry.get_model(model_alias=model_alias)
|
|
@@ -51,8 +48,7 @@ class JudgeScoreProfiler(ColumnProfiler[JudgeScoreProfilerConfig]):
|
|
|
51
48
|
column_config, df = column_config_with_df.as_tuple()
|
|
52
49
|
|
|
53
50
|
logger.info(
|
|
54
|
-
f"{
|
|
55
|
-
f"scores for column: '{column_config.name}'"
|
|
51
|
+
f"{column_config.get_column_emoji()} Analyzing LLM-as-judge scores for column: '{column_config.name}'"
|
|
56
52
|
)
|
|
57
53
|
|
|
58
54
|
score_summaries = {}
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.config.analysis.column_profilers import ColumnProfilerType
|
|
5
7
|
from data_designer.config.base import ConfigBase
|
|
6
8
|
from data_designer.engine.analysis.column_profilers.base import ColumnProfiler
|
|
@@ -4,9 +4,8 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
-
from typing import Any, TypeAlias
|
|
7
|
+
from typing import TYPE_CHECKING, Any, TypeAlias
|
|
8
8
|
|
|
9
|
-
import pandas as pd
|
|
10
9
|
from pydantic import BaseModel
|
|
11
10
|
from typing_extensions import Self
|
|
12
11
|
|
|
@@ -25,6 +24,10 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import (
|
|
|
25
24
|
calculate_token_stats,
|
|
26
25
|
calculate_validation_column_info,
|
|
27
26
|
)
|
|
27
|
+
from data_designer.lazy_heavy_imports import pd
|
|
28
|
+
|
|
29
|
+
if TYPE_CHECKING:
|
|
30
|
+
import pandas as pd
|
|
28
31
|
|
|
29
32
|
logger = logging.getLogger(__name__)
|
|
30
33
|
|
|
@@ -1,22 +1,20 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
from collections.abc import Sequence
|
|
6
8
|
from functools import cached_property
|
|
9
|
+
from typing import TYPE_CHECKING
|
|
7
10
|
|
|
8
|
-
import pandas as pd
|
|
9
|
-
import pyarrow as pa
|
|
10
11
|
from pydantic import Field, field_validator
|
|
11
12
|
|
|
12
13
|
from data_designer.config.analysis.column_profilers import ColumnProfilerConfigT
|
|
13
14
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
14
15
|
from data_designer.config.base import ConfigBase
|
|
15
16
|
from data_designer.config.column_configs import SingleColumnConfig
|
|
16
|
-
from data_designer.config.column_types import
|
|
17
|
-
COLUMN_TYPE_EMOJI_MAP,
|
|
18
|
-
ColumnConfigT,
|
|
19
|
-
)
|
|
17
|
+
from data_designer.config.column_types import ColumnConfigT
|
|
20
18
|
from data_designer.engine.analysis.column_profilers.base import ColumnConfigWithDataFrame, ColumnProfiler
|
|
21
19
|
from data_designer.engine.analysis.column_statistics import get_column_statistics_calculator
|
|
22
20
|
from data_designer.engine.analysis.errors import DatasetProfilerConfigurationError
|
|
@@ -24,6 +22,11 @@ from data_designer.engine.analysis.utils.column_statistics_calculations import h
|
|
|
24
22
|
from data_designer.engine.dataset_builders.multi_column_configs import DatasetBuilderColumnConfigT, MultiColumnConfig
|
|
25
23
|
from data_designer.engine.registry.data_designer_registry import DataDesignerRegistry
|
|
26
24
|
from data_designer.engine.resources.resource_provider import ResourceProvider
|
|
25
|
+
from data_designer.lazy_heavy_imports import pa, pd
|
|
26
|
+
|
|
27
|
+
if TYPE_CHECKING:
|
|
28
|
+
import pandas as pd
|
|
29
|
+
import pyarrow as pa
|
|
27
30
|
|
|
28
31
|
logger = logging.getLogger(__name__)
|
|
29
32
|
|
|
@@ -71,7 +74,7 @@ class DataDesignerDatasetProfiler:
|
|
|
71
74
|
|
|
72
75
|
column_statistics = []
|
|
73
76
|
for c in self.config.column_configs:
|
|
74
|
-
logger.info(f" |-- {
|
|
77
|
+
logger.info(f" |-- {c.get_column_emoji()} column: '{c.name}'")
|
|
75
78
|
column_statistics.append(
|
|
76
79
|
get_column_statistics_calculator(c.column_type)(
|
|
77
80
|
column_config_with_df=ColumnConfigWithDataFrame(column_config=c, df=dataset)
|
|
@@ -81,14 +84,14 @@ class DataDesignerDatasetProfiler:
|
|
|
81
84
|
column_profiles = []
|
|
82
85
|
for profiler_config in self.config.column_profiler_configs or []:
|
|
83
86
|
profiler = self._create_column_profiler(profiler_config)
|
|
84
|
-
applicable_column_types = profiler.
|
|
87
|
+
applicable_column_types = profiler.get_applicable_column_types()
|
|
85
88
|
for c in self.config.column_configs:
|
|
86
89
|
if c.column_type in applicable_column_types:
|
|
87
90
|
params = ColumnConfigWithDataFrame(column_config=c, df=dataset)
|
|
88
91
|
column_profiles.append(profiler.profile(params))
|
|
89
92
|
if len(column_profiles) == 0:
|
|
90
93
|
logger.warning(
|
|
91
|
-
f"⚠️ No applicable column types found for the '{profiler.
|
|
94
|
+
f"⚠️ No applicable column types found for the '{profiler.name}' profiler. "
|
|
92
95
|
f"This profiler is applicable to the following column types: {applicable_column_types}"
|
|
93
96
|
)
|
|
94
97
|
|
|
@@ -5,11 +5,8 @@ from __future__ import annotations
|
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
7
|
from numbers import Number
|
|
8
|
-
from typing import Any
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
|
-
import numpy as np
|
|
11
|
-
import pandas as pd
|
|
12
|
-
import pyarrow as pa
|
|
13
10
|
import tiktoken
|
|
14
11
|
|
|
15
12
|
from data_designer.config.analysis.column_statistics import (
|
|
@@ -26,6 +23,12 @@ from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|
|
26
23
|
RecordBasedPromptRenderer,
|
|
27
24
|
create_response_recipe,
|
|
28
25
|
)
|
|
26
|
+
from data_designer.lazy_heavy_imports import np, pa, pd
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
import numpy as np
|
|
30
|
+
import pandas as pd
|
|
31
|
+
import pyarrow as pa
|
|
29
32
|
|
|
30
33
|
RANDOM_SEED = 42
|
|
31
34
|
MAX_PROMPT_SAMPLE_SIZE = 1000
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import logging
|
|
5
7
|
from collections import defaultdict
|
|
6
|
-
from typing import Any
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
8
|
+
from typing import TYPE_CHECKING, Any
|
|
9
9
|
|
|
10
10
|
from data_designer.config.analysis.column_profilers import JudgeScoreDistributions, JudgeScoreSample
|
|
11
11
|
from data_designer.config.analysis.column_statistics import (
|
|
@@ -15,6 +15,10 @@ from data_designer.config.analysis.column_statistics import (
|
|
|
15
15
|
NumericalDistribution,
|
|
16
16
|
)
|
|
17
17
|
from data_designer.config.column_configs import LLMJudgeColumnConfig
|
|
18
|
+
from data_designer.lazy_heavy_imports import pd
|
|
19
|
+
|
|
20
|
+
if TYPE_CHECKING:
|
|
21
|
+
import pandas as pd
|
|
18
22
|
|
|
19
23
|
logger = logging.getLogger(__name__)
|
|
20
24
|
|
|
@@ -9,16 +9,16 @@ from abc import ABC, abstractmethod
|
|
|
9
9
|
from enum import Enum
|
|
10
10
|
from typing import TYPE_CHECKING, overload
|
|
11
11
|
|
|
12
|
-
import
|
|
13
|
-
|
|
14
|
-
from data_designer.engine.configurable_task import ConfigurableTask, ConfigurableTaskMetadata, DataT, TaskConfigT
|
|
12
|
+
from data_designer.engine.configurable_task import ConfigurableTask, DataT, TaskConfigT
|
|
13
|
+
from data_designer.lazy_heavy_imports import pd
|
|
15
14
|
|
|
16
15
|
if TYPE_CHECKING:
|
|
16
|
+
import pandas as pd
|
|
17
|
+
|
|
17
18
|
from data_designer.config.models import BaseInferenceParams, ModelConfig
|
|
18
19
|
from data_designer.engine.models.facade import ModelFacade
|
|
19
20
|
from data_designer.engine.models.registry import ModelRegistry
|
|
20
21
|
|
|
21
|
-
|
|
22
22
|
logger = logging.getLogger(__name__)
|
|
23
23
|
|
|
24
24
|
|
|
@@ -27,22 +27,14 @@ class GenerationStrategy(str, Enum):
|
|
|
27
27
|
FULL_COLUMN = "full_column"
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
class GeneratorMetadata(ConfigurableTaskMetadata):
|
|
31
|
-
generation_strategy: GenerationStrategy
|
|
32
|
-
|
|
33
|
-
|
|
34
30
|
class ColumnGenerator(ConfigurableTask[TaskConfigT], ABC):
|
|
35
31
|
@property
|
|
36
32
|
def can_generate_from_scratch(self) -> bool:
|
|
37
33
|
return False
|
|
38
34
|
|
|
39
|
-
@property
|
|
40
|
-
def generation_strategy(self) -> GenerationStrategy:
|
|
41
|
-
return self.metadata().generation_strategy
|
|
42
|
-
|
|
43
35
|
@staticmethod
|
|
44
36
|
@abstractmethod
|
|
45
|
-
def
|
|
37
|
+
def get_generation_strategy() -> GenerationStrategy: ...
|
|
46
38
|
|
|
47
39
|
@overload
|
|
48
40
|
@abstractmethod
|
|
@@ -103,8 +95,28 @@ class ColumnGeneratorWithModel(ColumnGeneratorWithModelRegistry[TaskConfigT], AB
|
|
|
103
95
|
return self.model_config.inference_parameters
|
|
104
96
|
|
|
105
97
|
def log_pre_generation(self) -> None:
|
|
106
|
-
logger.info(
|
|
98
|
+
logger.info(
|
|
99
|
+
f"{self.config.get_column_emoji()} {self.config.column_type} model config for column '{self.config.name}'"
|
|
100
|
+
)
|
|
107
101
|
logger.info(f" |-- model: {self.model_config.model!r}")
|
|
108
102
|
logger.info(f" |-- model alias: {self.config.model_alias!r}")
|
|
109
103
|
logger.info(f" |-- model provider: {self.get_model_provider_name(model_alias=self.config.model_alias)!r}")
|
|
110
104
|
logger.info(f" |-- inference parameters: {self.inference_parameters.format_for_display()}")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class ColumnGeneratorCellByCell(ColumnGenerator[TaskConfigT], ABC):
|
|
108
|
+
@staticmethod
|
|
109
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
110
|
+
return GenerationStrategy.CELL_BY_CELL
|
|
111
|
+
|
|
112
|
+
@abstractmethod
|
|
113
|
+
def generate(self, data: dict) -> dict: ...
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class ColumnGeneratorFullColumn(ColumnGenerator[TaskConfigT], ABC):
|
|
117
|
+
@staticmethod
|
|
118
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
119
|
+
return GenerationStrategy.FULL_COLUMN
|
|
120
|
+
|
|
121
|
+
@abstractmethod
|
|
122
|
+
def generate(self, data: pd.DataFrame) -> pd.DataFrame: ...
|
|
@@ -1,15 +1,12 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
4
5
|
|
|
5
6
|
from pydantic import BaseModel, computed_field
|
|
6
7
|
|
|
7
8
|
from data_designer.config.column_configs import EmbeddingColumnConfig
|
|
8
|
-
from data_designer.engine.column_generators.generators.base import
|
|
9
|
-
ColumnGeneratorWithModel,
|
|
10
|
-
GenerationStrategy,
|
|
11
|
-
GeneratorMetadata,
|
|
12
|
-
)
|
|
9
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
|
|
13
10
|
from data_designer.engine.processing.utils import deserialize_json_values, parse_list_string
|
|
14
11
|
|
|
15
12
|
|
|
@@ -27,12 +24,8 @@ class EmbeddingGenerationResult(BaseModel):
|
|
|
27
24
|
|
|
28
25
|
class EmbeddingCellGenerator(ColumnGeneratorWithModel[EmbeddingColumnConfig]):
|
|
29
26
|
@staticmethod
|
|
30
|
-
def
|
|
31
|
-
return
|
|
32
|
-
name="embedding_cell_generator",
|
|
33
|
-
description="Generate embeddings for a text column.",
|
|
34
|
-
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
35
|
-
)
|
|
27
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
28
|
+
return GenerationStrategy.CELL_BY_CELL
|
|
36
29
|
|
|
37
30
|
def generate(self, data: dict) -> dict:
|
|
38
31
|
deserialized_record = deserialize_json_values(data)
|
|
@@ -4,31 +4,22 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
9
8
|
|
|
10
9
|
from data_designer.config.column_configs import ExpressionColumnConfig
|
|
11
|
-
from data_designer.engine.column_generators.generators.base import
|
|
12
|
-
ColumnGenerator,
|
|
13
|
-
GenerationStrategy,
|
|
14
|
-
GeneratorMetadata,
|
|
15
|
-
)
|
|
10
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
|
|
16
11
|
from data_designer.engine.column_generators.utils.errors import ExpressionTemplateRenderError
|
|
17
12
|
from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering
|
|
18
13
|
from data_designer.engine.processing.utils import deserialize_json_values
|
|
14
|
+
from data_designer.lazy_heavy_imports import pd
|
|
19
15
|
|
|
20
|
-
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
import pandas as pd
|
|
21
18
|
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
22
20
|
|
|
23
|
-
class ExpressionColumnGenerator(WithJinja2UserTemplateRendering, ColumnGenerator[ExpressionColumnConfig]):
|
|
24
|
-
@staticmethod
|
|
25
|
-
def metadata() -> GeneratorMetadata:
|
|
26
|
-
return GeneratorMetadata(
|
|
27
|
-
name="expression_generator",
|
|
28
|
-
description="Generate a column from a jinja2 expression.",
|
|
29
|
-
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
30
|
-
)
|
|
31
21
|
|
|
22
|
+
class ExpressionColumnGenerator(WithJinja2UserTemplateRendering, ColumnGeneratorFullColumn[ExpressionColumnConfig]):
|
|
32
23
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
33
24
|
logger.info(f"🧩 Generating column `{self.config.name}` from expression")
|
|
34
25
|
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import functools
|
|
5
7
|
import logging
|
|
6
8
|
|
|
@@ -11,11 +13,7 @@ from data_designer.config.column_configs import (
|
|
|
11
13
|
LLMTextColumnConfig,
|
|
12
14
|
)
|
|
13
15
|
from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
14
|
-
from data_designer.engine.column_generators.generators.base import
|
|
15
|
-
ColumnGeneratorWithModel,
|
|
16
|
-
GenerationStrategy,
|
|
17
|
-
GeneratorMetadata,
|
|
18
|
-
)
|
|
16
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
|
|
19
17
|
from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|
20
18
|
PromptType,
|
|
21
19
|
RecordBasedPromptRenderer,
|
|
@@ -28,22 +26,22 @@ from data_designer.engine.processing.utils import deserialize_json_values
|
|
|
28
26
|
logger = logging.getLogger(__name__)
|
|
29
27
|
|
|
30
28
|
|
|
31
|
-
DEFAULT_MAX_CONVERSATION_RESTARTS = 5
|
|
32
|
-
DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0
|
|
33
|
-
|
|
34
|
-
|
|
35
29
|
class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfigT]):
|
|
30
|
+
@staticmethod
|
|
31
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
32
|
+
return GenerationStrategy.CELL_BY_CELL
|
|
33
|
+
|
|
36
34
|
@functools.cached_property
|
|
37
35
|
def response_recipe(self) -> ResponseRecipe:
|
|
38
36
|
return create_response_recipe(self.config, self.model_config)
|
|
39
37
|
|
|
40
38
|
@property
|
|
41
39
|
def max_conversation_correction_steps(self) -> int:
|
|
42
|
-
return
|
|
40
|
+
return self.resource_provider.run_config.max_conversation_correction_steps
|
|
43
41
|
|
|
44
42
|
@property
|
|
45
43
|
def max_conversation_restarts(self) -> int:
|
|
46
|
-
return
|
|
44
|
+
return self.resource_provider.run_config.max_conversation_restarts
|
|
47
45
|
|
|
48
46
|
@functools.cached_property
|
|
49
47
|
def prompt_renderer(self) -> RecordBasedPromptRenderer:
|
|
@@ -91,45 +89,13 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
91
89
|
return data
|
|
92
90
|
|
|
93
91
|
|
|
94
|
-
class LLMTextCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMTextColumnConfig]):
|
|
95
|
-
@staticmethod
|
|
96
|
-
def metadata() -> GeneratorMetadata:
|
|
97
|
-
return GeneratorMetadata(
|
|
98
|
-
name="llm_text_generator",
|
|
99
|
-
description="Generate a new dataset cell from a prompt template",
|
|
100
|
-
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
101
|
-
)
|
|
92
|
+
class LLMTextCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMTextColumnConfig]): ...
|
|
102
93
|
|
|
103
94
|
|
|
104
|
-
class LLMCodeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMCodeColumnConfig]):
|
|
105
|
-
@staticmethod
|
|
106
|
-
def metadata() -> GeneratorMetadata:
|
|
107
|
-
return GeneratorMetadata(
|
|
108
|
-
name="llm_code_generator",
|
|
109
|
-
description="Generate a new dataset cell from a prompt template",
|
|
110
|
-
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
111
|
-
)
|
|
112
|
-
|
|
95
|
+
class LLMCodeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMCodeColumnConfig]): ...
|
|
113
96
|
|
|
114
|
-
class LLMStructuredCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMStructuredColumnConfig]):
|
|
115
|
-
@staticmethod
|
|
116
|
-
def metadata() -> GeneratorMetadata:
|
|
117
|
-
return GeneratorMetadata(
|
|
118
|
-
name="llm_structured_generator",
|
|
119
|
-
description="Generate a new dataset cell from a prompt template",
|
|
120
|
-
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
121
|
-
)
|
|
122
97
|
|
|
98
|
+
class LLMStructuredCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMStructuredColumnConfig]): ...
|
|
123
99
|
|
|
124
|
-
class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColumnConfig]):
|
|
125
|
-
@staticmethod
|
|
126
|
-
def metadata() -> GeneratorMetadata:
|
|
127
|
-
return GeneratorMetadata(
|
|
128
|
-
name="llm_judge_generator",
|
|
129
|
-
description="Judge a new dataset cell based on a set of rubrics",
|
|
130
|
-
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
131
|
-
)
|
|
132
100
|
|
|
133
|
-
|
|
134
|
-
def max_conversation_restarts(self) -> int:
|
|
135
|
-
return 2 * DEFAULT_MAX_CONVERSATION_RESTARTS
|
|
101
|
+
class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColumnConfig]): ...
|
|
@@ -6,34 +6,28 @@ from __future__ import annotations
|
|
|
6
6
|
import logging
|
|
7
7
|
import random
|
|
8
8
|
from functools import partial
|
|
9
|
-
from typing import Callable
|
|
10
|
-
|
|
11
|
-
import pandas as pd
|
|
9
|
+
from typing import TYPE_CHECKING, Callable
|
|
12
10
|
|
|
13
11
|
from data_designer.config.utils.constants import LOCALES_WITH_MANAGED_DATASETS
|
|
14
|
-
from data_designer.engine.column_generators.generators.base import
|
|
15
|
-
FromScratchColumnGenerator,
|
|
16
|
-
GenerationStrategy,
|
|
17
|
-
GeneratorMetadata,
|
|
18
|
-
)
|
|
12
|
+
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
|
|
19
13
|
from data_designer.engine.dataset_builders.multi_column_configs import SamplerMultiColumnConfig
|
|
20
14
|
from data_designer.engine.processing.utils import concat_datasets
|
|
21
15
|
from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
|
|
22
16
|
from data_designer.engine.sampling_gen.data_sources.sources import SamplerType
|
|
23
17
|
from data_designer.engine.sampling_gen.entities.person import load_person_data_sampler
|
|
24
18
|
from data_designer.engine.sampling_gen.generator import DatasetGenerator as SamplingDatasetGenerator
|
|
19
|
+
from data_designer.lazy_heavy_imports import pd
|
|
20
|
+
|
|
21
|
+
if TYPE_CHECKING:
|
|
22
|
+
import pandas as pd
|
|
25
23
|
|
|
26
24
|
logger = logging.getLogger(__name__)
|
|
27
25
|
|
|
28
26
|
|
|
29
27
|
class SamplerColumnGenerator(FromScratchColumnGenerator[SamplerMultiColumnConfig]):
|
|
30
28
|
@staticmethod
|
|
31
|
-
def
|
|
32
|
-
return
|
|
33
|
-
name="sampler_column_generator",
|
|
34
|
-
description="Generate columns using sampling-based method.",
|
|
35
|
-
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
36
|
-
)
|
|
29
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
30
|
+
return GenerationStrategy.FULL_COLUMN
|
|
37
31
|
|
|
38
32
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
39
33
|
df_samplers = self.generate_from_scratch(len(data))
|
|
@@ -1,24 +1,22 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
|
|
5
4
|
from __future__ import annotations
|
|
6
5
|
|
|
7
6
|
import functools
|
|
8
7
|
import logging
|
|
9
|
-
|
|
10
|
-
import duckdb
|
|
11
|
-
import pandas as pd
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
12
9
|
|
|
13
10
|
from data_designer.config.seed import IndexRange, PartitionBlock, SamplingStrategy
|
|
14
|
-
from data_designer.engine.column_generators.generators.base import
|
|
15
|
-
FromScratchColumnGenerator,
|
|
16
|
-
GenerationStrategy,
|
|
17
|
-
GeneratorMetadata,
|
|
18
|
-
)
|
|
11
|
+
from data_designer.engine.column_generators.generators.base import FromScratchColumnGenerator, GenerationStrategy
|
|
19
12
|
from data_designer.engine.column_generators.utils.errors import SeedDatasetError
|
|
20
13
|
from data_designer.engine.dataset_builders.multi_column_configs import SeedDatasetMultiColumnConfig
|
|
21
14
|
from data_designer.engine.processing.utils import concat_datasets
|
|
15
|
+
from data_designer.lazy_heavy_imports import duckdb, pd
|
|
16
|
+
|
|
17
|
+
if TYPE_CHECKING:
|
|
18
|
+
import duckdb
|
|
19
|
+
import pandas as pd
|
|
22
20
|
|
|
23
21
|
MAX_ZERO_RECORD_RESPONSE_FACTOR = 2
|
|
24
22
|
|
|
@@ -27,12 +25,8 @@ logger = logging.getLogger(__name__)
|
|
|
27
25
|
|
|
28
26
|
class SeedDatasetColumnGenerator(FromScratchColumnGenerator[SeedDatasetMultiColumnConfig]):
|
|
29
27
|
@staticmethod
|
|
30
|
-
def
|
|
31
|
-
return
|
|
32
|
-
name="seed_dataset_column_generator",
|
|
33
|
-
description="Sample columns from a seed dataset.",
|
|
34
|
-
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
35
|
-
)
|
|
28
|
+
def get_generation_strategy() -> GenerationStrategy:
|
|
29
|
+
return GenerationStrategy.FULL_COLUMN
|
|
36
30
|
|
|
37
31
|
@property
|
|
38
32
|
def num_records_sampled(self) -> int:
|
|
@@ -4,21 +4,13 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import logging
|
|
7
|
-
|
|
8
|
-
import pandas as pd
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
9
8
|
|
|
10
9
|
from data_designer.config.column_configs import ValidationColumnConfig
|
|
11
10
|
from data_designer.config.errors import InvalidConfigError
|
|
12
11
|
from data_designer.config.utils.code_lang import SQL_DIALECTS, CodeLang
|
|
13
|
-
from data_designer.config.validator_params import
|
|
14
|
-
|
|
15
|
-
ValidatorType,
|
|
16
|
-
)
|
|
17
|
-
from data_designer.engine.column_generators.generators.base import (
|
|
18
|
-
ColumnGenerator,
|
|
19
|
-
GenerationStrategy,
|
|
20
|
-
GeneratorMetadata,
|
|
21
|
-
)
|
|
12
|
+
from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
|
|
13
|
+
from data_designer.engine.column_generators.generators.base import ColumnGeneratorFullColumn
|
|
22
14
|
from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
|
|
23
15
|
from data_designer.engine.errors import DataDesignerRuntimeError
|
|
24
16
|
from data_designer.engine.validators import (
|
|
@@ -29,6 +21,10 @@ from data_designer.engine.validators import (
|
|
|
29
21
|
SQLValidator,
|
|
30
22
|
ValidationResult,
|
|
31
23
|
)
|
|
24
|
+
from data_designer.lazy_heavy_imports import pd
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
import pandas as pd
|
|
32
28
|
|
|
33
29
|
logger = logging.getLogger(__name__)
|
|
34
30
|
|
|
@@ -45,15 +41,7 @@ def get_validator_from_params(validator_type: ValidatorType, validator_params: V
|
|
|
45
41
|
return LocalCallableValidator(validator_params)
|
|
46
42
|
|
|
47
43
|
|
|
48
|
-
class ValidationColumnGenerator(
|
|
49
|
-
@staticmethod
|
|
50
|
-
def metadata() -> GeneratorMetadata:
|
|
51
|
-
return GeneratorMetadata(
|
|
52
|
-
name="validate",
|
|
53
|
-
description="Validate data.",
|
|
54
|
-
generation_strategy=GenerationStrategy.FULL_COLUMN,
|
|
55
|
-
)
|
|
56
|
-
|
|
44
|
+
class ValidationColumnGenerator(ColumnGeneratorFullColumn[ValidationColumnConfig]):
|
|
57
45
|
def generate(self, data: pd.DataFrame) -> pd.DataFrame:
|
|
58
46
|
logger.info(f"🔍 Validating column {self.config.name!r} with {len(data)} records")
|
|
59
47
|
logger.info(f" |-- target columns: {self.config.target_columns}")
|
|
@@ -132,6 +120,7 @@ class ValidationColumnGenerator(ColumnGenerator[ValidationColumnConfig]):
|
|
|
132
120
|
error_callback=error_callback,
|
|
133
121
|
shutdown_error_rate=settings.shutdown_error_rate,
|
|
134
122
|
shutdown_error_window=settings.shutdown_error_window,
|
|
123
|
+
disable_early_shutdown=settings.disable_early_shutdown,
|
|
135
124
|
) as executor:
|
|
136
125
|
for i, batch in enumerate(batched_records):
|
|
137
126
|
executor.submit(lambda batch: self._validate_batch(validator, batch), batch, context={"index": i})
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.config.base import ConfigBase
|
|
5
7
|
from data_designer.config.column_configs import (
|
|
6
8
|
EmbeddingColumnConfig,
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from data_designer.config.column_types import DataDesignerColumnType
|
|
5
7
|
from data_designer.config.utils.type_helpers import resolve_string_enum
|
|
6
8
|
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModelRegistry
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
from enum import Enum
|
|
5
7
|
|
|
6
8
|
from pydantic import BaseModel, ConfigDict, Field, create_model
|