data-designer 0.3.8rc2__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (166) hide show
  1. data_designer/cli/commands/__init__.py +1 -1
  2. data_designer/interface/__init__.py +21 -1
  3. data_designer/{_version.py → interface/_version.py} +2 -2
  4. data_designer/interface/data_designer.py +1 -7
  5. {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/METADATA +10 -42
  6. data_designer-0.4.0.dist-info/RECORD +39 -0
  7. data_designer/__init__.py +0 -17
  8. data_designer/config/__init__.py +0 -2
  9. data_designer/config/analysis/__init__.py +0 -2
  10. data_designer/config/analysis/column_profilers.py +0 -159
  11. data_designer/config/analysis/column_statistics.py +0 -421
  12. data_designer/config/analysis/dataset_profiler.py +0 -84
  13. data_designer/config/analysis/utils/errors.py +0 -10
  14. data_designer/config/analysis/utils/reporting.py +0 -192
  15. data_designer/config/base.py +0 -69
  16. data_designer/config/column_configs.py +0 -470
  17. data_designer/config/column_types.py +0 -141
  18. data_designer/config/config_builder.py +0 -595
  19. data_designer/config/data_designer_config.py +0 -40
  20. data_designer/config/dataset_builders.py +0 -13
  21. data_designer/config/dataset_metadata.py +0 -18
  22. data_designer/config/default_model_settings.py +0 -129
  23. data_designer/config/errors.py +0 -24
  24. data_designer/config/exports.py +0 -145
  25. data_designer/config/interface.py +0 -55
  26. data_designer/config/models.py +0 -455
  27. data_designer/config/preview_results.py +0 -41
  28. data_designer/config/processors.py +0 -148
  29. data_designer/config/run_config.py +0 -51
  30. data_designer/config/sampler_constraints.py +0 -52
  31. data_designer/config/sampler_params.py +0 -639
  32. data_designer/config/seed.py +0 -116
  33. data_designer/config/seed_source.py +0 -84
  34. data_designer/config/seed_source_types.py +0 -19
  35. data_designer/config/utils/code_lang.py +0 -82
  36. data_designer/config/utils/constants.py +0 -363
  37. data_designer/config/utils/errors.py +0 -21
  38. data_designer/config/utils/info.py +0 -94
  39. data_designer/config/utils/io_helpers.py +0 -258
  40. data_designer/config/utils/misc.py +0 -78
  41. data_designer/config/utils/numerical_helpers.py +0 -30
  42. data_designer/config/utils/type_helpers.py +0 -106
  43. data_designer/config/utils/visualization.py +0 -482
  44. data_designer/config/validator_params.py +0 -94
  45. data_designer/engine/__init__.py +0 -2
  46. data_designer/engine/analysis/column_profilers/base.py +0 -49
  47. data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
  48. data_designer/engine/analysis/column_profilers/registry.py +0 -22
  49. data_designer/engine/analysis/column_statistics.py +0 -145
  50. data_designer/engine/analysis/dataset_profiler.py +0 -149
  51. data_designer/engine/analysis/errors.py +0 -9
  52. data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
  53. data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
  54. data_designer/engine/column_generators/__init__.py +0 -2
  55. data_designer/engine/column_generators/generators/__init__.py +0 -2
  56. data_designer/engine/column_generators/generators/base.py +0 -122
  57. data_designer/engine/column_generators/generators/embedding.py +0 -35
  58. data_designer/engine/column_generators/generators/expression.py +0 -55
  59. data_designer/engine/column_generators/generators/llm_completion.py +0 -113
  60. data_designer/engine/column_generators/generators/samplers.py +0 -69
  61. data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
  62. data_designer/engine/column_generators/generators/validation.py +0 -140
  63. data_designer/engine/column_generators/registry.py +0 -60
  64. data_designer/engine/column_generators/utils/errors.py +0 -15
  65. data_designer/engine/column_generators/utils/generator_classification.py +0 -43
  66. data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
  67. data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
  68. data_designer/engine/compiler.py +0 -97
  69. data_designer/engine/configurable_task.py +0 -71
  70. data_designer/engine/dataset_builders/artifact_storage.py +0 -283
  71. data_designer/engine/dataset_builders/column_wise_builder.py +0 -335
  72. data_designer/engine/dataset_builders/errors.py +0 -15
  73. data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
  74. data_designer/engine/dataset_builders/utils/__init__.py +0 -2
  75. data_designer/engine/dataset_builders/utils/concurrency.py +0 -212
  76. data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
  77. data_designer/engine/dataset_builders/utils/dag.py +0 -62
  78. data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
  79. data_designer/engine/dataset_builders/utils/errors.py +0 -15
  80. data_designer/engine/errors.py +0 -51
  81. data_designer/engine/model_provider.py +0 -77
  82. data_designer/engine/models/__init__.py +0 -2
  83. data_designer/engine/models/errors.py +0 -300
  84. data_designer/engine/models/facade.py +0 -287
  85. data_designer/engine/models/factory.py +0 -42
  86. data_designer/engine/models/litellm_overrides.py +0 -179
  87. data_designer/engine/models/parsers/__init__.py +0 -2
  88. data_designer/engine/models/parsers/errors.py +0 -34
  89. data_designer/engine/models/parsers/parser.py +0 -235
  90. data_designer/engine/models/parsers/postprocessors.py +0 -93
  91. data_designer/engine/models/parsers/tag_parsers.py +0 -62
  92. data_designer/engine/models/parsers/types.py +0 -84
  93. data_designer/engine/models/recipes/base.py +0 -81
  94. data_designer/engine/models/recipes/response_recipes.py +0 -293
  95. data_designer/engine/models/registry.py +0 -146
  96. data_designer/engine/models/telemetry.py +0 -359
  97. data_designer/engine/models/usage.py +0 -73
  98. data_designer/engine/models/utils.py +0 -38
  99. data_designer/engine/processing/ginja/__init__.py +0 -2
  100. data_designer/engine/processing/ginja/ast.py +0 -65
  101. data_designer/engine/processing/ginja/environment.py +0 -463
  102. data_designer/engine/processing/ginja/exceptions.py +0 -56
  103. data_designer/engine/processing/ginja/record.py +0 -32
  104. data_designer/engine/processing/gsonschema/__init__.py +0 -2
  105. data_designer/engine/processing/gsonschema/exceptions.py +0 -15
  106. data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
  107. data_designer/engine/processing/gsonschema/types.py +0 -10
  108. data_designer/engine/processing/gsonschema/validators.py +0 -202
  109. data_designer/engine/processing/processors/base.py +0 -13
  110. data_designer/engine/processing/processors/drop_columns.py +0 -42
  111. data_designer/engine/processing/processors/registry.py +0 -25
  112. data_designer/engine/processing/processors/schema_transform.py +0 -49
  113. data_designer/engine/processing/utils.py +0 -169
  114. data_designer/engine/registry/base.py +0 -99
  115. data_designer/engine/registry/data_designer_registry.py +0 -39
  116. data_designer/engine/registry/errors.py +0 -12
  117. data_designer/engine/resources/managed_dataset_generator.py +0 -39
  118. data_designer/engine/resources/managed_dataset_repository.py +0 -197
  119. data_designer/engine/resources/managed_storage.py +0 -65
  120. data_designer/engine/resources/resource_provider.py +0 -77
  121. data_designer/engine/resources/seed_reader.py +0 -154
  122. data_designer/engine/sampling_gen/column.py +0 -91
  123. data_designer/engine/sampling_gen/constraints.py +0 -100
  124. data_designer/engine/sampling_gen/data_sources/base.py +0 -217
  125. data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
  126. data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
  127. data_designer/engine/sampling_gen/entities/__init__.py +0 -2
  128. data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  129. data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
  130. data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
  131. data_designer/engine/sampling_gen/entities/errors.py +0 -10
  132. data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
  133. data_designer/engine/sampling_gen/entities/person.py +0 -144
  134. data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
  135. data_designer/engine/sampling_gen/errors.py +0 -26
  136. data_designer/engine/sampling_gen/generator.py +0 -122
  137. data_designer/engine/sampling_gen/jinja_utils.py +0 -64
  138. data_designer/engine/sampling_gen/people_gen.py +0 -199
  139. data_designer/engine/sampling_gen/person_constants.py +0 -56
  140. data_designer/engine/sampling_gen/schema.py +0 -147
  141. data_designer/engine/sampling_gen/schema_builder.py +0 -61
  142. data_designer/engine/sampling_gen/utils.py +0 -46
  143. data_designer/engine/secret_resolver.py +0 -82
  144. data_designer/engine/validation.py +0 -367
  145. data_designer/engine/validators/__init__.py +0 -19
  146. data_designer/engine/validators/base.py +0 -38
  147. data_designer/engine/validators/local_callable.py +0 -39
  148. data_designer/engine/validators/python.py +0 -254
  149. data_designer/engine/validators/remote.py +0 -89
  150. data_designer/engine/validators/sql.py +0 -65
  151. data_designer/errors.py +0 -7
  152. data_designer/essentials/__init__.py +0 -33
  153. data_designer/lazy_heavy_imports.py +0 -54
  154. data_designer/logging.py +0 -163
  155. data_designer/plugin_manager.py +0 -78
  156. data_designer/plugins/__init__.py +0 -8
  157. data_designer/plugins/errors.py +0 -15
  158. data_designer/plugins/plugin.py +0 -141
  159. data_designer/plugins/registry.py +0 -88
  160. data_designer/plugins/testing/__init__.py +0 -10
  161. data_designer/plugins/testing/stubs.py +0 -116
  162. data_designer/plugins/testing/utils.py +0 -20
  163. data_designer-0.3.8rc2.dist-info/RECORD +0 -196
  164. data_designer-0.3.8rc2.dist-info/licenses/LICENSE +0 -201
  165. {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/WHEEL +0 -0
  166. {data_designer-0.3.8rc2.dist-info → data_designer-0.4.0.dist-info}/entry_points.txt +0 -0
@@ -1,78 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from enum import Enum
7
- from typing import TYPE_CHECKING, TypeAlias
8
-
9
- from data_designer.plugins.plugin import PluginType
10
- from data_designer.plugins.registry import PluginRegistry
11
-
12
- if TYPE_CHECKING:
13
- from data_designer.plugins.plugin import Plugin
14
-
15
-
16
- class PluginManager:
17
- def __init__(self):
18
- self._plugin_registry = PluginRegistry()
19
-
20
- def get_column_generator_plugins(self) -> list[Plugin]:
21
- """Get all column generator plugins.
22
-
23
- Returns:
24
- A list of all column generator plugins.
25
- """
26
- return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
27
-
28
- def get_column_generator_plugin_if_exists(self, plugin_name: str) -> Plugin | None:
29
- """Get a column generator plugin by name if it exists.
30
-
31
- Args:
32
- plugin_name: The name of the plugin to retrieve.
33
-
34
- Returns:
35
- The plugin if found, otherwise None.
36
- """
37
- if self._plugin_registry.plugin_exists(plugin_name):
38
- return self._plugin_registry.get_plugin(plugin_name)
39
-
40
- def get_plugin_column_types(self, enum_type: type[Enum]) -> list[Enum]:
41
- """Get a list of plugin column types.
42
-
43
- Args:
44
- enum_type: The enum type to use for plugin entries.
45
-
46
- Returns:
47
- A list of plugin column types.
48
- """
49
- type_list = []
50
- for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
51
- type_list.append(enum_type(plugin.name))
52
- return type_list
53
-
54
- def inject_into_column_config_type_union(self, column_config_type: type[TypeAlias]) -> type[TypeAlias]:
55
- """Inject plugins into the column config type.
56
-
57
- Args:
58
- column_config_type: The column config type to inject plugins into.
59
-
60
- Returns:
61
- The column config type with plugins injected.
62
- """
63
- column_config_type = self._plugin_registry.add_plugin_types_to_union(
64
- column_config_type, PluginType.COLUMN_GENERATOR
65
- )
66
- return column_config_type
67
-
68
- def inject_into_seed_source_type_union(self, seed_source_type: type[TypeAlias]) -> type[TypeAlias]:
69
- """Inject plugins into the seed source type.
70
-
71
- Args:
72
- seed_source_type: The seed source type to inject plugins into.
73
-
74
- Returns:
75
- The seed source type with plugins injected.
76
- """
77
- seed_source_type = self._plugin_registry.add_plugin_types_to_union(seed_source_type, PluginType.SEED_READER)
78
- return seed_source_type
@@ -1,8 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from data_designer.plugins.plugin import Plugin, PluginType
7
-
8
- __all__ = ["Plugin", "PluginType"]
@@ -1,15 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from data_designer.errors import DataDesignerError
7
-
8
-
9
- class PluginLoadError(DataDesignerError): ...
10
-
11
-
12
- class PluginRegistrationError(DataDesignerError): ...
13
-
14
-
15
- class PluginNotFoundError(DataDesignerError): ...
@@ -1,141 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- import ast
7
- import importlib
8
- import importlib.util
9
- from enum import Enum
10
- from functools import cached_property
11
- from typing import Literal, get_origin
12
-
13
- from pydantic import BaseModel, Field, field_validator, model_validator
14
- from typing_extensions import Self
15
-
16
- from data_designer.config.base import ConfigBase
17
- from data_designer.plugins.errors import PluginLoadError
18
-
19
-
20
- class PluginType(str, Enum):
21
- COLUMN_GENERATOR = "column-generator"
22
- SEED_READER = "seed-reader"
23
-
24
- @property
25
- def discriminator_field(self) -> str:
26
- if self == PluginType.COLUMN_GENERATOR:
27
- return "column_type"
28
- elif self == PluginType.SEED_READER:
29
- return "seed_type"
30
- else:
31
- raise ValueError(f"Invalid plugin type: {self.value}")
32
-
33
- @property
34
- def display_name(self) -> str:
35
- return self.value.replace("-", " ")
36
-
37
-
38
- def _get_module_and_object_names(fully_qualified_object: str) -> tuple[str, str]:
39
- try:
40
- module_name, object_name = fully_qualified_object.rsplit(".", 1)
41
- except ValueError:
42
- # If fully_qualified_object does not have any periods, the rsplit call will return
43
- # a list of length 1 and the variable assignment above will raise ValueError
44
- raise PluginLoadError("Expected a fully-qualified object name, e.g. 'my_plugin.config.MyConfig'")
45
-
46
- return module_name, object_name
47
-
48
-
49
- def _check_class_exists_in_file(filepath: str, class_name: str) -> None:
50
- try:
51
- with open(filepath, "r") as file:
52
- source = file.read()
53
- except FileNotFoundError:
54
- raise PluginLoadError(f"Could not read source code at {filepath!r}")
55
-
56
- tree = ast.parse(source)
57
- for node in ast.walk(tree):
58
- if isinstance(node, ast.ClassDef) and node.name == class_name:
59
- return None
60
-
61
- raise PluginLoadError(f"Could not find class named {class_name!r} in {filepath!r}")
62
-
63
-
64
- class Plugin(BaseModel):
65
- impl_qualified_name: str = Field(
66
- ...,
67
- description="The fully-qualified name of the implementation class object, e.g. 'my_plugin.generator.MyColumnGenerator'",
68
- )
69
- config_qualified_name: str = Field(
70
- ..., description="The fully-qualified name o the config class object, e.g. 'my_plugin.config.MyConfig'"
71
- )
72
- plugin_type: PluginType = Field(..., description="The type of plugin")
73
-
74
- @property
75
- def config_type_as_class_name(self) -> str:
76
- return self.enum_key_name.title().replace("_", "")
77
-
78
- @property
79
- def enum_key_name(self) -> str:
80
- return self.name.replace("-", "_").upper()
81
-
82
- @property
83
- def name(self) -> str:
84
- return self.config_cls.model_fields[self.discriminator_field].default
85
-
86
- @property
87
- def discriminator_field(self) -> str:
88
- return self.plugin_type.discriminator_field
89
-
90
- @field_validator("impl_qualified_name", "config_qualified_name", mode="after")
91
- @classmethod
92
- def validate_class_name(cls, value: str) -> str:
93
- module_name, object_name = _get_module_and_object_names(value)
94
- try:
95
- spec = importlib.util.find_spec(module_name)
96
- except:
97
- raise PluginLoadError(f"Could not find module {module_name!r}")
98
-
99
- if spec is None or spec.origin is None:
100
- raise PluginLoadError(f"Error finding source for module {module_name!r}")
101
-
102
- _check_class_exists_in_file(spec.origin, object_name)
103
-
104
- return value
105
-
106
- @model_validator(mode="after")
107
- def validate_discriminator_field(self) -> Self:
108
- _, cfg = _get_module_and_object_names(self.config_qualified_name)
109
- field = self.plugin_type.discriminator_field
110
- if field not in self.config_cls.model_fields:
111
- raise ValueError(f"Discriminator field {field!r} not found in config class {cfg!r}")
112
- field_info = self.config_cls.model_fields[field]
113
- if get_origin(field_info.annotation) is not Literal:
114
- raise ValueError(f"Field {field!r} of {cfg!r} must be a Literal type, not {field_info.annotation!r}.")
115
- if not isinstance(field_info.default, str):
116
- raise ValueError(f"The default of {field!r} must be a string, not {type(field_info.default)!r}.")
117
- enum_key = field_info.default.replace("-", "_").upper()
118
- if not enum_key.isidentifier():
119
- raise ValueError(
120
- f"The default value {field_info.default!r} for discriminator field {field!r} "
121
- f"cannot be converted to a valid enum key. The converted key {enum_key!r} "
122
- f"must be a valid Python identifier."
123
- )
124
- return self
125
-
126
- @cached_property
127
- def config_cls(self) -> type[ConfigBase]:
128
- return self._load(self.config_qualified_name)
129
-
130
- @cached_property
131
- def impl_cls(self) -> type:
132
- return self._load(self.impl_qualified_name)
133
-
134
- @staticmethod
135
- def _load(fully_qualified_object: str) -> type:
136
- module_name, object_name = _get_module_and_object_names(fully_qualified_object)
137
- module = importlib.import_module(module_name)
138
- try:
139
- return getattr(module, object_name)
140
- except AttributeError:
141
- raise PluginLoadError(f"Could not find class {object_name!r} in module {module_name!r}")
@@ -1,88 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- import logging
7
- import os
8
- import threading
9
- from importlib.metadata import entry_points
10
- from typing import TypeAlias
11
-
12
- from typing_extensions import Self
13
-
14
- from data_designer.plugins.errors import PluginNotFoundError
15
- from data_designer.plugins.plugin import Plugin, PluginType
16
-
17
- logger = logging.getLogger(__name__)
18
-
19
-
20
- PLUGINS_DISABLED = os.getenv("DISABLE_DATA_DESIGNER_PLUGINS", "false").lower() == "true"
21
-
22
-
23
- class PluginRegistry:
24
- _instance = None
25
- _plugins_discovered = False
26
- _lock = threading.Lock()
27
-
28
- _plugins: dict[str, Plugin] = {}
29
-
30
- def __init__(self):
31
- with self._lock:
32
- if not self._plugins_discovered:
33
- self._discover()
34
-
35
- @classmethod
36
- def reset(cls) -> None:
37
- with cls._lock:
38
- cls._instance = None
39
- cls._plugins_discovered = False
40
- cls._plugins = {}
41
-
42
- def add_plugin_types_to_union(self, type_union: type[TypeAlias], plugin_type: PluginType) -> type[TypeAlias]:
43
- for plugin in self.get_plugins(plugin_type):
44
- if plugin.config_cls not in type_union.__args__:
45
- type_union |= plugin.config_cls
46
- return type_union
47
-
48
- def get_plugin(self, plugin_name: str) -> Plugin:
49
- if plugin_name not in self._plugins:
50
- raise PluginNotFoundError(f"Plugin {plugin_name!r} not found.")
51
- return self._plugins[plugin_name]
52
-
53
- def get_plugins(self, plugin_type: PluginType) -> list[Plugin]:
54
- return [plugin for plugin in self._plugins.values() if plugin.plugin_type == plugin_type]
55
-
56
- def get_plugin_names(self, plugin_type: PluginType) -> list[str]:
57
- return [plugin.name for plugin in self.get_plugins(plugin_type)]
58
-
59
- def num_plugins(self, plugin_type: PluginType) -> int:
60
- return len(self.get_plugins(plugin_type))
61
-
62
- def plugin_exists(self, plugin_name: str) -> bool:
63
- return plugin_name in self._plugins
64
-
65
- def _discover(self) -> Self:
66
- if PLUGINS_DISABLED:
67
- return self
68
- for ep in entry_points(group="data_designer.plugins"):
69
- try:
70
- plugin = ep.load()
71
- if isinstance(plugin, Plugin):
72
- logger.info(
73
- f"🔌 Plugin discovered ➜ {plugin.plugin_type.display_name} "
74
- f"{plugin.enum_key_name} is now available ⚡️"
75
- )
76
- self._plugins[plugin.name] = plugin
77
- except Exception as e:
78
- logger.warning(f"🛑 Failed to load plugin from entry point {ep.name!r}: {e}")
79
- self._plugins_discovered = True
80
- return self
81
-
82
- def __new__(cls, *args, **kwargs):
83
- """Plugin manager is a singleton."""
84
- if not cls._instance:
85
- with cls._lock:
86
- if not cls._instance:
87
- cls._instance = super().__new__(cls)
88
- return cls._instance
@@ -1,10 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from data_designer.plugins.testing.utils import assert_valid_plugin
7
-
8
- __all__ = [
9
- assert_valid_plugin.__name__,
10
- ]
@@ -1,116 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from typing import Literal
7
-
8
- from data_designer.config.base import ConfigBase
9
- from data_designer.config.column_configs import SingleColumnConfig
10
- from data_designer.engine.column_generators.generators.base import ColumnGeneratorCellByCell
11
- from data_designer.plugins.plugin import Plugin, PluginType
12
-
13
- MODULE_NAME = __name__
14
-
15
-
16
- class ValidTestConfig(SingleColumnConfig):
17
- """Valid config for testing plugin creation."""
18
-
19
- column_type: Literal["test-generator"] = "test-generator"
20
- name: str
21
-
22
-
23
- class ValidTestTask(ColumnGeneratorCellByCell[ValidTestConfig]):
24
- """Valid task for testing plugin creation."""
25
-
26
- def generate(self, data: dict) -> dict:
27
- return data
28
-
29
-
30
- class ConfigWithoutDiscriminator(ConfigBase):
31
- some_field: str
32
-
33
-
34
- class ConfigWithStringField(ConfigBase):
35
- column_type: str = "test-generator"
36
-
37
-
38
- class ConfigWithNonStringDefault(ConfigBase):
39
- column_type: Literal["test-generator"] = 123 # type: ignore
40
-
41
-
42
- class ConfigWithInvalidKey(ConfigBase):
43
- column_type: Literal["invalid-key-!@#"] = "invalid-key-!@#"
44
-
45
-
46
- class StubPluginConfigA(SingleColumnConfig):
47
- column_type: Literal["test-plugin-a"] = "test-plugin-a"
48
-
49
-
50
- class StubPluginConfigB(SingleColumnConfig):
51
- column_type: Literal["test-plugin-b"] = "test-plugin-b"
52
-
53
-
54
- class StubPluginTaskA(ColumnGeneratorCellByCell[StubPluginConfigA]):
55
- def generate(self, data: dict) -> dict:
56
- return data
57
-
58
-
59
- class StubPluginTaskB(ColumnGeneratorCellByCell[StubPluginConfigB]):
60
- def generate(self, data: dict) -> dict:
61
- return data
62
-
63
-
64
- # Stub plugins requiring different combinations of resources
65
-
66
-
67
- class StubPluginConfigModels(SingleColumnConfig):
68
- column_type: Literal["test-plugin-models"] = "test-plugin-models"
69
-
70
-
71
- class StubPluginConfigModelsAndBlobs(SingleColumnConfig):
72
- column_type: Literal["test-plugin-models-and-blobs"] = "test-plugin-models-and-blobs"
73
-
74
-
75
- class StubPluginConfigBlobsAndSeeds(SingleColumnConfig):
76
- column_type: Literal["test-plugin-blobs-and-seeds"] = "test-plugin-blobs-and-seeds"
77
-
78
-
79
- class StubPluginTaskModels(ColumnGeneratorCellByCell[StubPluginConfigModels]):
80
- def generate(self, data: dict) -> dict:
81
- return data
82
-
83
-
84
- class StubPluginTaskModelsAndBlobs(ColumnGeneratorCellByCell[StubPluginConfigModelsAndBlobs]):
85
- def generate(self, data: dict) -> dict:
86
- return data
87
-
88
-
89
- class StubPluginTaskBlobsAndSeeds(ColumnGeneratorCellByCell[StubPluginConfigBlobsAndSeeds]):
90
- def generate(self, data: dict) -> dict:
91
- return data
92
-
93
-
94
- plugin_none = Plugin(
95
- config_qualified_name=f"{MODULE_NAME}.StubPluginConfigA",
96
- impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskA",
97
- plugin_type=PluginType.COLUMN_GENERATOR,
98
- )
99
-
100
- plugin_models = Plugin(
101
- config_qualified_name=f"{MODULE_NAME}.StubPluginConfigModels",
102
- impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskModels",
103
- plugin_type=PluginType.COLUMN_GENERATOR,
104
- )
105
-
106
- plugin_models_and_blobs = Plugin(
107
- config_qualified_name=f"{MODULE_NAME}.StubPluginConfigModelsAndBlobs",
108
- impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskModelsAndBlobs",
109
- plugin_type=PluginType.COLUMN_GENERATOR,
110
- )
111
-
112
- plugin_blobs_and_seeds = Plugin(
113
- config_qualified_name=f"{MODULE_NAME}.StubPluginConfigBlobsAndSeeds",
114
- impl_qualified_name=f"{MODULE_NAME}.StubPluginTaskBlobsAndSeeds",
115
- plugin_type=PluginType.COLUMN_GENERATOR,
116
- )
@@ -1,20 +0,0 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
- # SPDX-License-Identifier: Apache-2.0
3
-
4
- from __future__ import annotations
5
-
6
- from data_designer.config.base import ConfigBase
7
- from data_designer.engine.configurable_task import ConfigurableTask
8
- from data_designer.engine.resources.seed_reader import SeedReader
9
- from data_designer.plugins.plugin import Plugin, PluginType
10
-
11
-
12
- def assert_valid_plugin(plugin: Plugin) -> None:
13
- assert issubclass(plugin.config_cls, ConfigBase), "Plugin config class is not a subclass of ConfigBase"
14
-
15
- if plugin.plugin_type == PluginType.COLUMN_GENERATOR:
16
- assert issubclass(plugin.impl_cls, ConfigurableTask), (
17
- "Column generator plugin impl class must be a subclass of ConfigurableTask"
18
- )
19
- elif plugin.plugin_type == PluginType.SEED_READER:
20
- assert issubclass(plugin.impl_cls, SeedReader), "Seed reader plugin impl class must be a subclass of SeedReader"