data-designer 0.3.8rc2__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/cli/commands/__init__.py +1 -1
- data_designer/interface/__init__.py +21 -1
- data_designer/{_version.py → interface/_version.py} +2 -2
- data_designer/interface/data_designer.py +1 -7
- {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0rc1.dist-info}/METADATA +10 -42
- data_designer-0.4.0rc1.dist-info/RECORD +39 -0
- data_designer/__init__.py +0 -17
- data_designer/config/__init__.py +0 -2
- data_designer/config/analysis/__init__.py +0 -2
- data_designer/config/analysis/column_profilers.py +0 -159
- data_designer/config/analysis/column_statistics.py +0 -421
- data_designer/config/analysis/dataset_profiler.py +0 -84
- data_designer/config/analysis/utils/errors.py +0 -10
- data_designer/config/analysis/utils/reporting.py +0 -192
- data_designer/config/base.py +0 -69
- data_designer/config/column_configs.py +0 -470
- data_designer/config/column_types.py +0 -141
- data_designer/config/config_builder.py +0 -595
- data_designer/config/data_designer_config.py +0 -40
- data_designer/config/dataset_builders.py +0 -13
- data_designer/config/dataset_metadata.py +0 -18
- data_designer/config/default_model_settings.py +0 -129
- data_designer/config/errors.py +0 -24
- data_designer/config/exports.py +0 -145
- data_designer/config/interface.py +0 -55
- data_designer/config/models.py +0 -455
- data_designer/config/preview_results.py +0 -41
- data_designer/config/processors.py +0 -148
- data_designer/config/run_config.py +0 -51
- data_designer/config/sampler_constraints.py +0 -52
- data_designer/config/sampler_params.py +0 -639
- data_designer/config/seed.py +0 -116
- data_designer/config/seed_source.py +0 -84
- data_designer/config/seed_source_types.py +0 -19
- data_designer/config/utils/code_lang.py +0 -82
- data_designer/config/utils/constants.py +0 -363
- data_designer/config/utils/errors.py +0 -21
- data_designer/config/utils/info.py +0 -94
- data_designer/config/utils/io_helpers.py +0 -258
- data_designer/config/utils/misc.py +0 -78
- data_designer/config/utils/numerical_helpers.py +0 -30
- data_designer/config/utils/type_helpers.py +0 -106
- data_designer/config/utils/visualization.py +0 -482
- data_designer/config/validator_params.py +0 -94
- data_designer/engine/__init__.py +0 -2
- data_designer/engine/analysis/column_profilers/base.py +0 -49
- data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
- data_designer/engine/analysis/column_profilers/registry.py +0 -22
- data_designer/engine/analysis/column_statistics.py +0 -145
- data_designer/engine/analysis/dataset_profiler.py +0 -149
- data_designer/engine/analysis/errors.py +0 -9
- data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
- data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
- data_designer/engine/column_generators/__init__.py +0 -2
- data_designer/engine/column_generators/generators/__init__.py +0 -2
- data_designer/engine/column_generators/generators/base.py +0 -122
- data_designer/engine/column_generators/generators/embedding.py +0 -35
- data_designer/engine/column_generators/generators/expression.py +0 -55
- data_designer/engine/column_generators/generators/llm_completion.py +0 -113
- data_designer/engine/column_generators/generators/samplers.py +0 -69
- data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
- data_designer/engine/column_generators/generators/validation.py +0 -140
- data_designer/engine/column_generators/registry.py +0 -60
- data_designer/engine/column_generators/utils/errors.py +0 -15
- data_designer/engine/column_generators/utils/generator_classification.py +0 -43
- data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
- data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
- data_designer/engine/compiler.py +0 -97
- data_designer/engine/configurable_task.py +0 -71
- data_designer/engine/dataset_builders/artifact_storage.py +0 -283
- data_designer/engine/dataset_builders/column_wise_builder.py +0 -335
- data_designer/engine/dataset_builders/errors.py +0 -15
- data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
- data_designer/engine/dataset_builders/utils/__init__.py +0 -2
- data_designer/engine/dataset_builders/utils/concurrency.py +0 -212
- data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
- data_designer/engine/dataset_builders/utils/dag.py +0 -62
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
- data_designer/engine/dataset_builders/utils/errors.py +0 -15
- data_designer/engine/errors.py +0 -51
- data_designer/engine/model_provider.py +0 -77
- data_designer/engine/models/__init__.py +0 -2
- data_designer/engine/models/errors.py +0 -300
- data_designer/engine/models/facade.py +0 -287
- data_designer/engine/models/factory.py +0 -42
- data_designer/engine/models/litellm_overrides.py +0 -179
- data_designer/engine/models/parsers/__init__.py +0 -2
- data_designer/engine/models/parsers/errors.py +0 -34
- data_designer/engine/models/parsers/parser.py +0 -235
- data_designer/engine/models/parsers/postprocessors.py +0 -93
- data_designer/engine/models/parsers/tag_parsers.py +0 -62
- data_designer/engine/models/parsers/types.py +0 -84
- data_designer/engine/models/recipes/base.py +0 -81
- data_designer/engine/models/recipes/response_recipes.py +0 -293
- data_designer/engine/models/registry.py +0 -146
- data_designer/engine/models/telemetry.py +0 -359
- data_designer/engine/models/usage.py +0 -73
- data_designer/engine/models/utils.py +0 -38
- data_designer/engine/processing/ginja/__init__.py +0 -2
- data_designer/engine/processing/ginja/ast.py +0 -65
- data_designer/engine/processing/ginja/environment.py +0 -463
- data_designer/engine/processing/ginja/exceptions.py +0 -56
- data_designer/engine/processing/ginja/record.py +0 -32
- data_designer/engine/processing/gsonschema/__init__.py +0 -2
- data_designer/engine/processing/gsonschema/exceptions.py +0 -15
- data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
- data_designer/engine/processing/gsonschema/types.py +0 -10
- data_designer/engine/processing/gsonschema/validators.py +0 -202
- data_designer/engine/processing/processors/base.py +0 -13
- data_designer/engine/processing/processors/drop_columns.py +0 -42
- data_designer/engine/processing/processors/registry.py +0 -25
- data_designer/engine/processing/processors/schema_transform.py +0 -49
- data_designer/engine/processing/utils.py +0 -169
- data_designer/engine/registry/base.py +0 -99
- data_designer/engine/registry/data_designer_registry.py +0 -39
- data_designer/engine/registry/errors.py +0 -12
- data_designer/engine/resources/managed_dataset_generator.py +0 -39
- data_designer/engine/resources/managed_dataset_repository.py +0 -197
- data_designer/engine/resources/managed_storage.py +0 -65
- data_designer/engine/resources/resource_provider.py +0 -77
- data_designer/engine/resources/seed_reader.py +0 -154
- data_designer/engine/sampling_gen/column.py +0 -91
- data_designer/engine/sampling_gen/constraints.py +0 -100
- data_designer/engine/sampling_gen/data_sources/base.py +0 -217
- data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
- data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
- data_designer/engine/sampling_gen/entities/__init__.py +0 -2
- data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
- data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
- data_designer/engine/sampling_gen/entities/errors.py +0 -10
- data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
- data_designer/engine/sampling_gen/entities/person.py +0 -144
- data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
- data_designer/engine/sampling_gen/errors.py +0 -26
- data_designer/engine/sampling_gen/generator.py +0 -122
- data_designer/engine/sampling_gen/jinja_utils.py +0 -64
- data_designer/engine/sampling_gen/people_gen.py +0 -199
- data_designer/engine/sampling_gen/person_constants.py +0 -56
- data_designer/engine/sampling_gen/schema.py +0 -147
- data_designer/engine/sampling_gen/schema_builder.py +0 -61
- data_designer/engine/sampling_gen/utils.py +0 -46
- data_designer/engine/secret_resolver.py +0 -82
- data_designer/engine/validation.py +0 -367
- data_designer/engine/validators/__init__.py +0 -19
- data_designer/engine/validators/base.py +0 -38
- data_designer/engine/validators/local_callable.py +0 -39
- data_designer/engine/validators/python.py +0 -254
- data_designer/engine/validators/remote.py +0 -89
- data_designer/engine/validators/sql.py +0 -65
- data_designer/errors.py +0 -7
- data_designer/essentials/__init__.py +0 -33
- data_designer/lazy_heavy_imports.py +0 -54
- data_designer/logging.py +0 -163
- data_designer/plugin_manager.py +0 -78
- data_designer/plugins/__init__.py +0 -8
- data_designer/plugins/errors.py +0 -15
- data_designer/plugins/plugin.py +0 -141
- data_designer/plugins/registry.py +0 -88
- data_designer/plugins/testing/__init__.py +0 -10
- data_designer/plugins/testing/stubs.py +0 -116
- data_designer/plugins/testing/utils.py +0 -20
- data_designer-0.3.8rc2.dist-info/RECORD +0 -196
- data_designer-0.3.8rc2.dist-info/licenses/LICENSE +0 -201
- {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0rc1.dist-info}/WHEEL +0 -0
- {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0rc1.dist-info}/entry_points.txt +0 -0
data_designer/plugin_manager.py
DELETED
|
@@ -1,78 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from enum import Enum
|
|
7
|
-
from typing import TYPE_CHECKING, TypeAlias
|
|
8
|
-
|
|
9
|
-
from data_designer.plugins.plugin import PluginType
|
|
10
|
-
from data_designer.plugins.registry import PluginRegistry
|
|
11
|
-
|
|
12
|
-
if TYPE_CHECKING:
|
|
13
|
-
from data_designer.plugins.plugin import Plugin
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class PluginManager:
|
|
17
|
-
def __init__(self):
|
|
18
|
-
self._plugin_registry = PluginRegistry()
|
|
19
|
-
|
|
20
|
-
def get_column_generator_plugins(self) -> list[Plugin]:
|
|
21
|
-
"""Get all column generator plugins.
|
|
22
|
-
|
|
23
|
-
Returns:
|
|
24
|
-
A list of all column generator plugins.
|
|
25
|
-
"""
|
|
26
|
-
return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
|
|
27
|
-
|
|
28
|
-
def get_column_generator_plugin_if_exists(self, plugin_name: str) -> Plugin | None:
|
|
29
|
-
"""Get a column generator plugin by name if it exists.
|
|
30
|
-
|
|
31
|
-
Args:
|
|
32
|
-
plugin_name: The name of the plugin to retrieve.
|
|
33
|
-
|
|
34
|
-
Returns:
|
|
35
|
-
The plugin if found, otherwise None.
|
|
36
|
-
"""
|
|
37
|
-
if self._plugin_registry.plugin_exists(plugin_name):
|
|
38
|
-
return self._plugin_registry.get_plugin(plugin_name)
|
|
39
|
-
|
|
40
|
-
def get_plugin_column_types(self, enum_type: type[Enum]) -> list[Enum]:
|
|
41
|
-
"""Get a list of plugin column types.
|
|
42
|
-
|
|
43
|
-
Args:
|
|
44
|
-
enum_type: The enum type to use for plugin entries.
|
|
45
|
-
|
|
46
|
-
Returns:
|
|
47
|
-
A list of plugin column types.
|
|
48
|
-
"""
|
|
49
|
-
type_list = []
|
|
50
|
-
for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
|
|
51
|
-
type_list.append(enum_type(plugin.name))
|
|
52
|
-
return type_list
|
|
53
|
-
|
|
54
|
-
def inject_into_column_config_type_union(self, column_config_type: type[TypeAlias]) -> type[TypeAlias]:
|
|
55
|
-
"""Inject plugins into the column config type.
|
|
56
|
-
|
|
57
|
-
Args:
|
|
58
|
-
column_config_type: The column config type to inject plugins into.
|
|
59
|
-
|
|
60
|
-
Returns:
|
|
61
|
-
The column config type with plugins injected.
|
|
62
|
-
"""
|
|
63
|
-
column_config_type = self._plugin_registry.add_plugin_types_to_union(
|
|
64
|
-
column_config_type, PluginType.COLUMN_GENERATOR
|
|
65
|
-
)
|
|
66
|
-
return column_config_type
|
|
67
|
-
|
|
68
|
-
def inject_into_seed_source_type_union(self, seed_source_type: type[TypeAlias]) -> type[TypeAlias]:
|
|
69
|
-
"""Inject plugins into the seed source type.
|
|
70
|
-
|
|
71
|
-
Args:
|
|
72
|
-
seed_source_type: The seed source type to inject plugins into.
|
|
73
|
-
|
|
74
|
-
Returns:
|
|
75
|
-
The seed source type with plugins injected.
|
|
76
|
-
"""
|
|
77
|
-
seed_source_type = self._plugin_registry.add_plugin_types_to_union(seed_source_type, PluginType.SEED_READER)
|
|
78
|
-
return seed_source_type
|
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.plugins.plugin import Plugin, PluginType
|
|
7
|
-
|
|
8
|
-
__all__ = ["Plugin", "PluginType"]
|
data_designer/plugins/errors.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.errors import DataDesignerError
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
class PluginLoadError(DataDesignerError): ...
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
class PluginRegistrationError(DataDesignerError): ...
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class PluginNotFoundError(DataDesignerError): ...
|
data_designer/plugins/plugin.py
DELETED
|
@@ -1,141 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import ast
|
|
7
|
-
import importlib
|
|
8
|
-
import importlib.util
|
|
9
|
-
from enum import Enum
|
|
10
|
-
from functools import cached_property
|
|
11
|
-
from typing import Literal, get_origin
|
|
12
|
-
|
|
13
|
-
from pydantic import BaseModel, Field, field_validator, model_validator
|
|
14
|
-
from typing_extensions import Self
|
|
15
|
-
|
|
16
|
-
from data_designer.config.base import ConfigBase
|
|
17
|
-
from data_designer.plugins.errors import PluginLoadError
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class PluginType(str, Enum):
|
|
21
|
-
COLUMN_GENERATOR = "column-generator"
|
|
22
|
-
SEED_READER = "seed-reader"
|
|
23
|
-
|
|
24
|
-
@property
|
|
25
|
-
def discriminator_field(self) -> str:
|
|
26
|
-
if self == PluginType.COLUMN_GENERATOR:
|
|
27
|
-
return "column_type"
|
|
28
|
-
elif self == PluginType.SEED_READER:
|
|
29
|
-
return "seed_type"
|
|
30
|
-
else:
|
|
31
|
-
raise ValueError(f"Invalid plugin type: {self.value}")
|
|
32
|
-
|
|
33
|
-
@property
|
|
34
|
-
def display_name(self) -> str:
|
|
35
|
-
return self.value.replace("-", " ")
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def _get_module_and_object_names(fully_qualified_object: str) -> tuple[str, str]:
|
|
39
|
-
try:
|
|
40
|
-
module_name, object_name = fully_qualified_object.rsplit(".", 1)
|
|
41
|
-
except ValueError:
|
|
42
|
-
# If fully_qualified_object does not have any periods, the rsplit call will return
|
|
43
|
-
# a list of length 1 and the variable assignment above will raise ValueError
|
|
44
|
-
raise PluginLoadError("Expected a fully-qualified object name, e.g. 'my_plugin.config.MyConfig'")
|
|
45
|
-
|
|
46
|
-
return module_name, object_name
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def _check_class_exists_in_file(filepath: str, class_name: str) -> None:
|
|
50
|
-
try:
|
|
51
|
-
with open(filepath, "r") as file:
|
|
52
|
-
source = file.read()
|
|
53
|
-
except FileNotFoundError:
|
|
54
|
-
raise PluginLoadError(f"Could not read source code at {filepath!r}")
|
|
55
|
-
|
|
56
|
-
tree = ast.parse(source)
|
|
57
|
-
for node in ast.walk(tree):
|
|
58
|
-
if isinstance(node, ast.ClassDef) and node.name == class_name:
|
|
59
|
-
return None
|
|
60
|
-
|
|
61
|
-
raise PluginLoadError(f"Could not find class named {class_name!r} in {filepath!r}")
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
class Plugin(BaseModel):
|
|
65
|
-
impl_qualified_name: str = Field(
|
|
66
|
-
...,
|
|
67
|
-
description="The fully-qualified name of the implementation class object, e.g. 'my_plugin.generator.MyColumnGenerator'",
|
|
68
|
-
)
|
|
69
|
-
config_qualified_name: str = Field(
|
|
70
|
-
..., description="The fully-qualified name o the config class object, e.g. 'my_plugin.config.MyConfig'"
|
|
71
|
-
)
|
|
72
|
-
plugin_type: PluginType = Field(..., description="The type of plugin")
|
|
73
|
-
|
|
74
|
-
@property
|
|
75
|
-
def config_type_as_class_name(self) -> str:
|
|
76
|
-
return self.enum_key_name.title().replace("_", "")
|
|
77
|
-
|
|
78
|
-
@property
|
|
79
|
-
def enum_key_name(self) -> str:
|
|
80
|
-
return self.name.replace("-", "_").upper()
|
|
81
|
-
|
|
82
|
-
@property
|
|
83
|
-
def name(self) -> str:
|
|
84
|
-
return self.config_cls.model_fields[self.discriminator_field].default
|
|
85
|
-
|
|
86
|
-
@property
|
|
87
|
-
def discriminator_field(self) -> str:
|
|
88
|
-
return self.plugin_type.discriminator_field
|
|
89
|
-
|
|
90
|
-
@field_validator("impl_qualified_name", "config_qualified_name", mode="after")
|
|
91
|
-
@classmethod
|
|
92
|
-
def validate_class_name(cls, value: str) -> str:
|
|
93
|
-
module_name, object_name = _get_module_and_object_names(value)
|
|
94
|
-
try:
|
|
95
|
-
spec = importlib.util.find_spec(module_name)
|
|
96
|
-
except:
|
|
97
|
-
raise PluginLoadError(f"Could not find module {module_name!r}")
|
|
98
|
-
|
|
99
|
-
if spec is None or spec.origin is None:
|
|
100
|
-
raise PluginLoadError(f"Error finding source for module {module_name!r}")
|
|
101
|
-
|
|
102
|
-
_check_class_exists_in_file(spec.origin, object_name)
|
|
103
|
-
|
|
104
|
-
return value
|
|
105
|
-
|
|
106
|
-
@model_validator(mode="after")
|
|
107
|
-
def validate_discriminator_field(self) -> Self:
|
|
108
|
-
_, cfg = _get_module_and_object_names(self.config_qualified_name)
|
|
109
|
-
field = self.plugin_type.discriminator_field
|
|
110
|
-
if field not in self.config_cls.model_fields:
|
|
111
|
-
raise ValueError(f"Discriminator field {field!r} not found in config class {cfg!r}")
|
|
112
|
-
field_info = self.config_cls.model_fields[field]
|
|
113
|
-
if get_origin(field_info.annotation) is not Literal:
|
|
114
|
-
raise ValueError(f"Field {field!r} of {cfg!r} must be a Literal type, not {field_info.annotation!r}.")
|
|
115
|
-
if not isinstance(field_info.default, str):
|
|
116
|
-
raise ValueError(f"The default of {field!r} must be a string, not {type(field_info.default)!r}.")
|
|
117
|
-
enum_key = field_info.default.replace("-", "_").upper()
|
|
118
|
-
if not enum_key.isidentifier():
|
|
119
|
-
raise ValueError(
|
|
120
|
-
f"The default value {field_info.default!r} for discriminator field {field!r} "
|
|
121
|
-
f"cannot be converted to a valid enum key. The converted key {enum_key!r} "
|
|
122
|
-
f"must be a valid Python identifier."
|
|
123
|
-
)
|
|
124
|
-
return self
|
|
125
|
-
|
|
126
|
-
@cached_property
|
|
127
|
-
def config_cls(self) -> type[ConfigBase]:
|
|
128
|
-
return self._load(self.config_qualified_name)
|
|
129
|
-
|
|
130
|
-
@cached_property
|
|
131
|
-
def impl_cls(self) -> type:
|
|
132
|
-
return self._load(self.impl_qualified_name)
|
|
133
|
-
|
|
134
|
-
@staticmethod
|
|
135
|
-
def _load(fully_qualified_object: str) -> type:
|
|
136
|
-
module_name, object_name = _get_module_and_object_names(fully_qualified_object)
|
|
137
|
-
module = importlib.import_module(module_name)
|
|
138
|
-
try:
|
|
139
|
-
return getattr(module, object_name)
|
|
140
|
-
except AttributeError:
|
|
141
|
-
raise PluginLoadError(f"Could not find class {object_name!r} in module {module_name!r}")
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
import logging
|
|
7
|
-
import os
|
|
8
|
-
import threading
|
|
9
|
-
from importlib.metadata import entry_points
|
|
10
|
-
from typing import TypeAlias
|
|
11
|
-
|
|
12
|
-
from typing_extensions import Self
|
|
13
|
-
|
|
14
|
-
from data_designer.plugins.errors import PluginNotFoundError
|
|
15
|
-
from data_designer.plugins.plugin import Plugin, PluginType
|
|
16
|
-
|
|
17
|
-
logger = logging.getLogger(__name__)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
PLUGINS_DISABLED = os.getenv("DISABLE_DATA_DESIGNER_PLUGINS", "false").lower() == "true"
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class PluginRegistry:
|
|
24
|
-
_instance = None
|
|
25
|
-
_plugins_discovered = False
|
|
26
|
-
_lock = threading.Lock()
|
|
27
|
-
|
|
28
|
-
_plugins: dict[str, Plugin] = {}
|
|
29
|
-
|
|
30
|
-
def __init__(self):
|
|
31
|
-
with self._lock:
|
|
32
|
-
if not self._plugins_discovered:
|
|
33
|
-
self._discover()
|
|
34
|
-
|
|
35
|
-
@classmethod
|
|
36
|
-
def reset(cls) -> None:
|
|
37
|
-
with cls._lock:
|
|
38
|
-
cls._instance = None
|
|
39
|
-
cls._plugins_discovered = False
|
|
40
|
-
cls._plugins = {}
|
|
41
|
-
|
|
42
|
-
def add_plugin_types_to_union(self, type_union: type[TypeAlias], plugin_type: PluginType) -> type[TypeAlias]:
|
|
43
|
-
for plugin in self.get_plugins(plugin_type):
|
|
44
|
-
if plugin.config_cls not in type_union.__args__:
|
|
45
|
-
type_union |= plugin.config_cls
|
|
46
|
-
return type_union
|
|
47
|
-
|
|
48
|
-
def get_plugin(self, plugin_name: str) -> Plugin:
|
|
49
|
-
if plugin_name not in self._plugins:
|
|
50
|
-
raise PluginNotFoundError(f"Plugin {plugin_name!r} not found.")
|
|
51
|
-
return self._plugins[plugin_name]
|
|
52
|
-
|
|
53
|
-
def get_plugins(self, plugin_type: PluginType) -> list[Plugin]:
|
|
54
|
-
return [plugin for plugin in self._plugins.values() if plugin.plugin_type == plugin_type]
|
|
55
|
-
|
|
56
|
-
def get_plugin_names(self, plugin_type: PluginType) -> list[str]:
|
|
57
|
-
return [plugin.name for plugin in self.get_plugins(plugin_type)]
|
|
58
|
-
|
|
59
|
-
def num_plugins(self, plugin_type: PluginType) -> int:
|
|
60
|
-
return len(self.get_plugins(plugin_type))
|
|
61
|
-
|
|
62
|
-
def plugin_exists(self, plugin_name: str) -> bool:
|
|
63
|
-
return plugin_name in self._plugins
|
|
64
|
-
|
|
65
|
-
def _discover(self) -> Self:
|
|
66
|
-
if PLUGINS_DISABLED:
|
|
67
|
-
return self
|
|
68
|
-
for ep in entry_points(group="data_designer.plugins"):
|
|
69
|
-
try:
|
|
70
|
-
plugin = ep.load()
|
|
71
|
-
if isinstance(plugin, Plugin):
|
|
72
|
-
logger.info(
|
|
73
|
-
f"🔌 Plugin discovered ➜ {plugin.plugin_type.display_name} "
|
|
74
|
-
f"{plugin.enum_key_name} is now available ⚡️"
|
|
75
|
-
)
|
|
76
|
-
self._plugins[plugin.name] = plugin
|
|
77
|
-
except Exception as e:
|
|
78
|
-
logger.warning(f"🛑 Failed to load plugin from entry point {ep.name!r}: {e}")
|
|
79
|
-
self._plugins_discovered = True
|
|
80
|
-
return self
|
|
81
|
-
|
|
82
|
-
def __new__(cls, *args, **kwargs):
|
|
83
|
-
"""Plugin manager is a singleton."""
|
|
84
|
-
if not cls._instance:
|
|
85
|
-
with cls._lock:
|
|
86
|
-
if not cls._instance:
|
|
87
|
-
cls._instance = super().__new__(cls)
|
|
88
|
-
return cls._instance
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.plugins.testing.utils import assert_valid_plugin
|
|
7
|
-
|
|
8
|
-
__all__ = [
|
|
9
|
-
assert_valid_plugin.__name__,
|
|
10
|
-
]
|
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from typing import Literal
|
|
7
|
-
|
|
8
|
-
from data_designer.config.base import ConfigBase
|
|
9
|
-
from data_designer.config.column_configs import SingleColumnConfig
|
|
10
|
-
from data_designer.engine.column_generators.generators.base import ColumnGeneratorCellByCell
|
|
11
|
-
from data_designer.plugins.plugin import Plugin, PluginType
|
|
12
|
-
|
|
13
|
-
MODULE_NAME = __name__
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class ValidTestConfig(SingleColumnConfig):
|
|
17
|
-
"""Valid config for testing plugin creation."""
|
|
18
|
-
|
|
19
|
-
column_type: Literal["test-generator"] = "test-generator"
|
|
20
|
-
name: str
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
class ValidTestTask(ColumnGeneratorCellByCell[ValidTestConfig]):
|
|
24
|
-
"""Valid task for testing plugin creation."""
|
|
25
|
-
|
|
26
|
-
def generate(self, data: dict) -> dict:
|
|
27
|
-
return data
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
class ConfigWithoutDiscriminator(ConfigBase):
|
|
31
|
-
some_field: str
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class ConfigWithStringField(ConfigBase):
|
|
35
|
-
column_type: str = "test-generator"
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class ConfigWithNonStringDefault(ConfigBase):
|
|
39
|
-
column_type: Literal["test-generator"] = 123 # type: ignore
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
class ConfigWithInvalidKey(ConfigBase):
|
|
43
|
-
column_type: Literal["invalid-key-!@#"] = "invalid-key-!@#"
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class StubPluginConfigA(SingleColumnConfig):
|
|
47
|
-
column_type: Literal["test-plugin-a"] = "test-plugin-a"
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
class StubPluginConfigB(SingleColumnConfig):
|
|
51
|
-
column_type: Literal["test-plugin-b"] = "test-plugin-b"
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class StubPluginTaskA(ColumnGeneratorCellByCell[StubPluginConfigA]):
|
|
55
|
-
def generate(self, data: dict) -> dict:
|
|
56
|
-
return data
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
class StubPluginTaskB(ColumnGeneratorCellByCell[StubPluginConfigB]):
|
|
60
|
-
def generate(self, data: dict) -> dict:
|
|
61
|
-
return data
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
# Stub plugins requiring different combinations of resources
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
class StubPluginConfigModels(SingleColumnConfig):
|
|
68
|
-
column_type: Literal["test-plugin-models"] = "test-plugin-models"
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
class StubPluginConfigModelsAndBlobs(SingleColumnConfig):
|
|
72
|
-
column_type: Literal["test-plugin-models-and-blobs"] = "test-plugin-models-and-blobs"
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
class StubPluginConfigBlobsAndSeeds(SingleColumnConfig):
|
|
76
|
-
column_type: Literal["test-plugin-blobs-and-seeds"] = "test-plugin-blobs-and-seeds"
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class StubPluginTaskModels(ColumnGeneratorCellByCell[StubPluginConfigModels]):
|
|
80
|
-
def generate(self, data: dict) -> dict:
|
|
81
|
-
return data
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
class StubPluginTaskModelsAndBlobs(ColumnGeneratorCellByCell[StubPluginConfigModelsAndBlobs]):
|
|
85
|
-
def generate(self, data: dict) -> dict:
|
|
86
|
-
return data
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class StubPluginTaskBlobsAndSeeds(ColumnGeneratorCellByCell[StubPluginConfigBlobsAndSeeds]):
|
|
90
|
-
def generate(self, data: dict) -> dict:
|
|
91
|
-
return data
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
plugin_none = Plugin(
|
|
95
|
-
config_qualified_name=f"{MODULE_NAME}.StubPluginConfigA",
|
|
96
|
-
impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskA",
|
|
97
|
-
plugin_type=PluginType.COLUMN_GENERATOR,
|
|
98
|
-
)
|
|
99
|
-
|
|
100
|
-
plugin_models = Plugin(
|
|
101
|
-
config_qualified_name=f"{MODULE_NAME}.StubPluginConfigModels",
|
|
102
|
-
impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskModels",
|
|
103
|
-
plugin_type=PluginType.COLUMN_GENERATOR,
|
|
104
|
-
)
|
|
105
|
-
|
|
106
|
-
plugin_models_and_blobs = Plugin(
|
|
107
|
-
config_qualified_name=f"{MODULE_NAME}.StubPluginConfigModelsAndBlobs",
|
|
108
|
-
impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskModelsAndBlobs",
|
|
109
|
-
plugin_type=PluginType.COLUMN_GENERATOR,
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
plugin_blobs_and_seeds = Plugin(
|
|
113
|
-
config_qualified_name=f"{MODULE_NAME}.StubPluginConfigBlobsAndSeeds",
|
|
114
|
-
impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskBlobsAndSeeds",
|
|
115
|
-
plugin_type=PluginType.COLUMN_GENERATOR,
|
|
116
|
-
)
|
|
@@ -1,20 +0,0 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
4
|
-
from __future__ import annotations
|
|
5
|
-
|
|
6
|
-
from data_designer.config.base import ConfigBase
|
|
7
|
-
from data_designer.engine.configurable_task import ConfigurableTask
|
|
8
|
-
from data_designer.engine.resources.seed_reader import SeedReader
|
|
9
|
-
from data_designer.plugins.plugin import Plugin, PluginType
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
def assert_valid_plugin(plugin: Plugin) -> None:
|
|
13
|
-
assert issubclass(plugin.config_cls, ConfigBase), "Plugin config class is not a subclass of ConfigBase"
|
|
14
|
-
|
|
15
|
-
if plugin.plugin_type == PluginType.COLUMN_GENERATOR:
|
|
16
|
-
assert issubclass(plugin.impl_cls, ConfigurableTask), (
|
|
17
|
-
"Column generator plugin impl class must be a subclass of ConfigurableTask"
|
|
18
|
-
)
|
|
19
|
-
elif plugin.plugin_type == PluginType.SEED_READER:
|
|
20
|
-
assert issubclass(plugin.impl_cls, SeedReader), "Seed reader plugin impl class must be a subclass of SeedReader"
|