data-designer 0.3.3__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/_version.py +2 -2
- data_designer/config/base.py +1 -0
- data_designer/config/dataset_metadata.py +18 -0
- data_designer/config/preview_results.py +4 -0
- data_designer/config/run_config.py +17 -5
- data_designer/config/seed_source.py +2 -1
- data_designer/config/utils/visualization.py +13 -9
- data_designer/engine/column_generators/generators/llm_completion.py +2 -10
- data_designer/engine/column_generators/generators/validation.py +1 -0
- data_designer/engine/dataset_builders/column_wise_builder.py +2 -1
- data_designer/engine/dataset_builders/utils/concurrency.py +13 -4
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +28 -21
- data_designer/engine/resources/resource_provider.py +12 -0
- data_designer/interface/data_designer.py +14 -23
- data_designer/interface/results.py +4 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.4.dist-info}/METADATA +9 -1
- {data_designer-0.3.3.dist-info → data_designer-0.3.4.dist-info}/RECORD +20 -19
- {data_designer-0.3.3.dist-info → data_designer-0.3.4.dist-info}/WHEEL +0 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.4.dist-info}/entry_points.txt +0 -0
- {data_designer-0.3.3.dist-info → data_designer-0.3.4.dist-info}/licenses/LICENSE +0 -0
data_designer/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3,
|
|
31
|
+
__version__ = version = '0.3.4'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 4)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
data_designer/config/base.py
CHANGED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class DatasetMetadata(BaseModel):
|
|
8
|
+
"""Metadata about a generated dataset.
|
|
9
|
+
|
|
10
|
+
This object is created by the engine and passed to results objects for use
|
|
11
|
+
in visualization and other client-side utilities. It is designed to be
|
|
12
|
+
serializable so it can be sent over the wire in a client-server architecture.
|
|
13
|
+
|
|
14
|
+
Attributes:
|
|
15
|
+
seed_column_names: Names of columns from the seed dataset. Empty list if no seed dataset.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
seed_column_names: list[str] = []
|
|
@@ -7,6 +7,7 @@ import pandas as pd
|
|
|
7
7
|
|
|
8
8
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
9
9
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
10
|
+
from data_designer.config.dataset_metadata import DatasetMetadata
|
|
10
11
|
from data_designer.config.utils.visualization import WithRecordSamplerMixin
|
|
11
12
|
|
|
12
13
|
|
|
@@ -15,6 +16,7 @@ class PreviewResults(WithRecordSamplerMixin):
|
|
|
15
16
|
self,
|
|
16
17
|
*,
|
|
17
18
|
config_builder: DataDesignerConfigBuilder,
|
|
19
|
+
dataset_metadata: DatasetMetadata,
|
|
18
20
|
dataset: pd.DataFrame | None = None,
|
|
19
21
|
analysis: DatasetProfilerResults | None = None,
|
|
20
22
|
processor_artifacts: dict[str, list[str] | str] | None = None,
|
|
@@ -23,6 +25,7 @@ class PreviewResults(WithRecordSamplerMixin):
|
|
|
23
25
|
|
|
24
26
|
Args:
|
|
25
27
|
config_builder: Data Designer configuration builder.
|
|
28
|
+
dataset_metadata: Metadata about the generated dataset (e.g., seed column names).
|
|
26
29
|
dataset: Dataset of the preview run.
|
|
27
30
|
analysis: Analysis of the preview run.
|
|
28
31
|
processor_artifacts: Artifacts generated by the processors.
|
|
@@ -30,4 +33,5 @@ class PreviewResults(WithRecordSamplerMixin):
|
|
|
30
33
|
self.dataset: pd.DataFrame | None = dataset
|
|
31
34
|
self.analysis: DatasetProfilerResults | None = analysis
|
|
32
35
|
self.processor_artifacts: dict[str, list[str] | str] | None = processor_artifacts
|
|
36
|
+
self.dataset_metadata = dataset_metadata
|
|
33
37
|
self._config_builder = config_builder
|
|
@@ -14,21 +14,33 @@ class RunConfig(ConfigBase):
|
|
|
14
14
|
part of the dataset configuration itself.
|
|
15
15
|
|
|
16
16
|
Attributes:
|
|
17
|
-
disable_early_shutdown: If True, disables early
|
|
18
|
-
will continue regardless of error rate
|
|
19
|
-
|
|
20
|
-
|
|
17
|
+
disable_early_shutdown: If True, disables the executor's early-shutdown behavior entirely.
|
|
18
|
+
Generation will continue regardless of error rate, and the early-shutdown exception
|
|
19
|
+
will never be raised. Error counts and summaries are still collected. Default is False.
|
|
20
|
+
shutdown_error_rate: Error rate threshold (0.0-1.0) that triggers early shutdown when
|
|
21
|
+
early shutdown is enabled. Default is 0.5.
|
|
21
22
|
shutdown_error_window: Minimum number of completed tasks before error rate
|
|
22
23
|
monitoring begins. Must be >= 0. Default is 10.
|
|
24
|
+
buffer_size: Number of records to process in each batch during dataset generation.
|
|
25
|
+
A batch is processed end-to-end (column generation, post-batch processors, and writing the batch
|
|
26
|
+
to artifact storage) before moving on to the next batch. Must be > 0. Default is 1000.
|
|
27
|
+
max_conversation_restarts: Maximum number of full conversation restarts permitted when
|
|
28
|
+
generation tasks call `ModelFacade.generate(...)`. Must be >= 0. Default is 5.
|
|
29
|
+
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
30
|
+
single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
|
|
31
|
+
Default is 0.
|
|
23
32
|
"""
|
|
24
33
|
|
|
25
34
|
disable_early_shutdown: bool = False
|
|
26
35
|
shutdown_error_rate: float = Field(default=0.5, ge=0.0, le=1.0)
|
|
27
36
|
shutdown_error_window: int = Field(default=10, ge=0)
|
|
37
|
+
buffer_size: int = Field(default=1000, gt=0)
|
|
38
|
+
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
39
|
+
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
28
40
|
|
|
29
41
|
@model_validator(mode="after")
|
|
30
42
|
def normalize_shutdown_settings(self) -> Self:
|
|
31
|
-
"""
|
|
43
|
+
"""Normalize shutdown settings for compatibility."""
|
|
32
44
|
if self.disable_early_shutdown:
|
|
33
45
|
self.shutdown_error_rate = 1.0
|
|
34
46
|
return self
|
|
@@ -6,6 +6,7 @@ from typing import Literal
|
|
|
6
6
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
9
|
+
from pydantic.json_schema import SkipJsonSchema
|
|
9
10
|
from typing_extensions import Self
|
|
10
11
|
|
|
11
12
|
from data_designer.config.utils.io_helpers import (
|
|
@@ -68,7 +69,7 @@ class DataFrameSeedSource(SeedSource):
|
|
|
68
69
|
|
|
69
70
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
70
71
|
|
|
71
|
-
df: pd.DataFrame = Field(
|
|
72
|
+
df: SkipJsonSchema[pd.DataFrame] = Field(
|
|
72
73
|
...,
|
|
73
74
|
exclude=True,
|
|
74
75
|
description=(
|
|
@@ -31,6 +31,7 @@ from data_designer.config.utils.errors import DatasetSampleDisplayError
|
|
|
31
31
|
|
|
32
32
|
if TYPE_CHECKING:
|
|
33
33
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
34
|
+
from data_designer.config.dataset_metadata import DatasetMetadata
|
|
34
35
|
|
|
35
36
|
|
|
36
37
|
console = Console()
|
|
@@ -57,6 +58,7 @@ class ColorPalette(str, Enum):
|
|
|
57
58
|
|
|
58
59
|
class WithRecordSamplerMixin:
|
|
59
60
|
_display_cycle_index: int = 0
|
|
61
|
+
dataset_metadata: DatasetMetadata
|
|
60
62
|
|
|
61
63
|
@cached_property
|
|
62
64
|
def _record_sampler_dataset(self) -> pd.DataFrame:
|
|
@@ -79,22 +81,22 @@ class WithRecordSamplerMixin:
|
|
|
79
81
|
self,
|
|
80
82
|
index: int | None = None,
|
|
81
83
|
*,
|
|
82
|
-
hide_seed_columns: bool = False,
|
|
83
84
|
syntax_highlighting_theme: str = "dracula",
|
|
84
85
|
background_color: str | None = None,
|
|
85
86
|
processors_to_display: list[str] | None = None,
|
|
87
|
+
hide_seed_columns: bool = False,
|
|
86
88
|
) -> None:
|
|
87
89
|
"""Display a sample record from the Data Designer dataset preview.
|
|
88
90
|
|
|
89
91
|
Args:
|
|
90
92
|
index: Index of the record to display. If None, the next record will be displayed.
|
|
91
93
|
This is useful for running the cell in a notebook multiple times.
|
|
92
|
-
hide_seed_columns: If True, the columns from the seed dataset (if any) will not be displayed.
|
|
93
94
|
syntax_highlighting_theme: Theme to use for syntax highlighting. See the `Syntax`
|
|
94
95
|
documentation from `rich` for information about available themes.
|
|
95
96
|
background_color: Background color to use for the record. See the `Syntax`
|
|
96
97
|
documentation from `rich` for information about available background colors.
|
|
97
98
|
processors_to_display: List of processors to display the artifacts for. If None, all processors will be displayed.
|
|
99
|
+
hide_seed_columns: If True, seed columns will not be displayed separately.
|
|
98
100
|
"""
|
|
99
101
|
i = index or self._display_cycle_index
|
|
100
102
|
|
|
@@ -120,14 +122,16 @@ class WithRecordSamplerMixin:
|
|
|
120
122
|
else:
|
|
121
123
|
processor_data_to_display[processor] = self.processor_artifacts[processor]
|
|
122
124
|
|
|
125
|
+
seed_column_names = None if hide_seed_columns else self.dataset_metadata.seed_column_names
|
|
126
|
+
|
|
123
127
|
display_sample_record(
|
|
124
128
|
record=record,
|
|
125
129
|
processor_data_to_display=processor_data_to_display,
|
|
126
130
|
config_builder=self._config_builder,
|
|
127
131
|
background_color=background_color,
|
|
128
132
|
syntax_highlighting_theme=syntax_highlighting_theme,
|
|
129
|
-
hide_seed_columns=hide_seed_columns,
|
|
130
133
|
record_index=i,
|
|
134
|
+
seed_column_names=seed_column_names,
|
|
131
135
|
)
|
|
132
136
|
if index is None:
|
|
133
137
|
self._display_cycle_index = (self._display_cycle_index + 1) % num_records
|
|
@@ -160,7 +164,7 @@ def display_sample_record(
|
|
|
160
164
|
background_color: str | None = None,
|
|
161
165
|
syntax_highlighting_theme: str = "dracula",
|
|
162
166
|
record_index: int | None = None,
|
|
163
|
-
|
|
167
|
+
seed_column_names: list[str] | None = None,
|
|
164
168
|
):
|
|
165
169
|
if isinstance(record, (dict, pd.Series)):
|
|
166
170
|
record = pd.DataFrame([record]).iloc[0]
|
|
@@ -179,14 +183,14 @@ def display_sample_record(
|
|
|
179
183
|
render_list = []
|
|
180
184
|
table_kws = dict(show_lines=True, expand=True)
|
|
181
185
|
|
|
182
|
-
|
|
183
|
-
if
|
|
186
|
+
# Display seed columns if seed_column_names is provided and not empty
|
|
187
|
+
if seed_column_names:
|
|
184
188
|
table = Table(title="Seed Columns", **table_kws)
|
|
185
189
|
table.add_column("Name")
|
|
186
190
|
table.add_column("Value")
|
|
187
|
-
for
|
|
188
|
-
if
|
|
189
|
-
table.add_row(
|
|
191
|
+
for col_name in seed_column_names:
|
|
192
|
+
if col_name in record.index:
|
|
193
|
+
table.add_row(col_name, convert_to_row_element(record[col_name]))
|
|
190
194
|
render_list.append(pad_console_element(table))
|
|
191
195
|
|
|
192
196
|
non_code_columns = (
|
|
@@ -28,10 +28,6 @@ from data_designer.engine.processing.utils import deserialize_json_values
|
|
|
28
28
|
logger = logging.getLogger(__name__)
|
|
29
29
|
|
|
30
30
|
|
|
31
|
-
DEFAULT_MAX_CONVERSATION_RESTARTS = 5
|
|
32
|
-
DEFAULT_MAX_CONVERSATION_CORRECTION_STEPS = 0
|
|
33
|
-
|
|
34
|
-
|
|
35
31
|
class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfigT]):
|
|
36
32
|
@functools.cached_property
|
|
37
33
|
def response_recipe(self) -> ResponseRecipe:
|
|
@@ -39,11 +35,11 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
39
35
|
|
|
40
36
|
@property
|
|
41
37
|
def max_conversation_correction_steps(self) -> int:
|
|
42
|
-
return
|
|
38
|
+
return self.resource_provider.run_config.max_conversation_correction_steps
|
|
43
39
|
|
|
44
40
|
@property
|
|
45
41
|
def max_conversation_restarts(self) -> int:
|
|
46
|
-
return
|
|
42
|
+
return self.resource_provider.run_config.max_conversation_restarts
|
|
47
43
|
|
|
48
44
|
@functools.cached_property
|
|
49
45
|
def prompt_renderer(self) -> RecordBasedPromptRenderer:
|
|
@@ -129,7 +125,3 @@ class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColum
|
|
|
129
125
|
description="Judge a new dataset cell based on a set of rubrics",
|
|
130
126
|
generation_strategy=GenerationStrategy.CELL_BY_CELL,
|
|
131
127
|
)
|
|
132
|
-
|
|
133
|
-
@property
|
|
134
|
-
def max_conversation_restarts(self) -> int:
|
|
135
|
-
return 2 * DEFAULT_MAX_CONVERSATION_RESTARTS
|
|
@@ -132,6 +132,7 @@ class ValidationColumnGenerator(ColumnGenerator[ValidationColumnConfig]):
|
|
|
132
132
|
error_callback=error_callback,
|
|
133
133
|
shutdown_error_rate=settings.shutdown_error_rate,
|
|
134
134
|
shutdown_error_window=settings.shutdown_error_window,
|
|
135
|
+
disable_early_shutdown=settings.disable_early_shutdown,
|
|
135
136
|
) as executor:
|
|
136
137
|
for i, batch in enumerate(batched_records):
|
|
137
138
|
executor.submit(lambda batch: self._validate_batch(validator, batch), batch, context={"index": i})
|
|
@@ -94,7 +94,6 @@ class ColumnWiseDatasetBuilder:
|
|
|
94
94
|
self,
|
|
95
95
|
*,
|
|
96
96
|
num_records: int,
|
|
97
|
-
buffer_size: int,
|
|
98
97
|
on_batch_complete: Callable[[Path], None] | None = None,
|
|
99
98
|
) -> Path:
|
|
100
99
|
self._write_configs()
|
|
@@ -104,6 +103,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
104
103
|
start_time = time.perf_counter()
|
|
105
104
|
group_id = uuid.uuid4().hex
|
|
106
105
|
|
|
106
|
+
buffer_size = self._resource_provider.run_config.buffer_size
|
|
107
107
|
self.batch_manager.start(num_records=num_records, buffer_size=buffer_size)
|
|
108
108
|
for batch_idx in range(self.batch_manager.num_batches):
|
|
109
109
|
logger.info(f"⏳ Processing batch {batch_idx + 1} of {self.batch_manager.num_batches}")
|
|
@@ -228,6 +228,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
228
228
|
error_callback=self._worker_error_callback,
|
|
229
229
|
shutdown_error_rate=settings.shutdown_error_rate,
|
|
230
230
|
shutdown_error_window=settings.shutdown_error_window,
|
|
231
|
+
disable_early_shutdown=settings.disable_early_shutdown,
|
|
231
232
|
) as executor:
|
|
232
233
|
for i, record in self.batch_manager.iter_current_batch():
|
|
233
234
|
executor.submit(lambda record: generator.generate(record), record, context={"index": i})
|
|
@@ -96,6 +96,7 @@ class ConcurrentThreadExecutor:
|
|
|
96
96
|
error_callback: ErrorCallbackWithContext | None = None,
|
|
97
97
|
shutdown_error_rate: float = 0.50,
|
|
98
98
|
shutdown_error_window: int = 10,
|
|
99
|
+
disable_early_shutdown: bool = False,
|
|
99
100
|
):
|
|
100
101
|
self._executor = None
|
|
101
102
|
self._column_name = column_name
|
|
@@ -106,6 +107,7 @@ class ConcurrentThreadExecutor:
|
|
|
106
107
|
self._error_callback = error_callback
|
|
107
108
|
self._shutdown_error_rate = shutdown_error_rate
|
|
108
109
|
self._shutdown_window_size = shutdown_error_window
|
|
110
|
+
self._disable_early_shutdown = disable_early_shutdown
|
|
109
111
|
self._results = ExecutorResults(failure_threshold=shutdown_error_rate)
|
|
110
112
|
|
|
111
113
|
@property
|
|
@@ -139,7 +141,7 @@ class ConcurrentThreadExecutor:
|
|
|
139
141
|
|
|
140
142
|
def __exit__(self, exc_type, exc_value, traceback):
|
|
141
143
|
self._shutdown_executor()
|
|
142
|
-
if self._results.early_shutdown is True:
|
|
144
|
+
if not self._disable_early_shutdown and self._results.early_shutdown is True:
|
|
143
145
|
self._raise_task_error()
|
|
144
146
|
|
|
145
147
|
def _shutdown_executor(self) -> None:
|
|
@@ -160,7 +162,7 @@ class ConcurrentThreadExecutor:
|
|
|
160
162
|
if self._executor is None:
|
|
161
163
|
raise RuntimeError("Executor is not initialized, this class should be used as a context manager.")
|
|
162
164
|
|
|
163
|
-
if self._results.early_shutdown:
|
|
165
|
+
if not self._disable_early_shutdown and self._results.early_shutdown:
|
|
164
166
|
self._shutdown_executor()
|
|
165
167
|
self._raise_task_error()
|
|
166
168
|
|
|
@@ -176,7 +178,9 @@ class ConcurrentThreadExecutor:
|
|
|
176
178
|
with self._lock:
|
|
177
179
|
self._results.completed_count += 1
|
|
178
180
|
self._results.error_trap.handle_error(err)
|
|
179
|
-
if self._results.is_error_rate_exceeded(
|
|
181
|
+
if not self._disable_early_shutdown and self._results.is_error_rate_exceeded(
|
|
182
|
+
self._shutdown_window_size
|
|
183
|
+
):
|
|
180
184
|
# Signal to shutdown early on the next submission (if received).
|
|
181
185
|
# We cannot trigger shutdown from within this thread as it can
|
|
182
186
|
# cause a deadlock.
|
|
@@ -196,7 +200,12 @@ class ConcurrentThreadExecutor:
|
|
|
196
200
|
# We'll re-raise a custom error that can be handled at the call-site and the summary
|
|
197
201
|
# can also be inspected.
|
|
198
202
|
self._semaphore.release()
|
|
199
|
-
|
|
203
|
+
is_shutdown_error = isinstance(err, RuntimeError) and (
|
|
204
|
+
"after shutdown" in str(err) or "Pool shutdown" in str(err)
|
|
205
|
+
)
|
|
206
|
+
if not is_shutdown_error:
|
|
207
|
+
raise err
|
|
208
|
+
if self._disable_early_shutdown:
|
|
200
209
|
raise err
|
|
201
210
|
self._raise_task_error()
|
|
202
211
|
|
|
@@ -69,7 +69,7 @@ class DatasetBatchManager:
|
|
|
69
69
|
def drop_records(self, index: Container[int]) -> None:
|
|
70
70
|
self._buffer = [record for i, record in enumerate(self._buffer) if i not in index]
|
|
71
71
|
|
|
72
|
-
def finish_batch(self, on_complete: Callable[[Path], None] | None = None) -> Path:
|
|
72
|
+
def finish_batch(self, on_complete: Callable[[Path], None] | None = None) -> Path | None:
|
|
73
73
|
"""Finish the batch by moving the results from the partial results path to the final parquet folder.
|
|
74
74
|
|
|
75
75
|
Returns:
|
|
@@ -78,29 +78,36 @@ class DatasetBatchManager:
|
|
|
78
78
|
if self._current_batch_number >= self.num_batches:
|
|
79
79
|
raise DatasetBatchManagementError("🛑 All batches have been processed.")
|
|
80
80
|
|
|
81
|
-
if
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
81
|
+
if self.write() is not None:
|
|
82
|
+
final_file_path = self.artifact_storage.move_partial_result_to_final_file_path(self._current_batch_number)
|
|
83
|
+
|
|
84
|
+
self.artifact_storage.write_metadata(
|
|
85
|
+
{
|
|
86
|
+
"target_num_records": sum(self.num_records_list),
|
|
87
|
+
"total_num_batches": self.num_batches,
|
|
88
|
+
"buffer_size": self._buffer_size,
|
|
89
|
+
"schema": {field.name: str(field.type) for field in pq.read_schema(final_file_path)},
|
|
90
|
+
"file_paths": [str(f) for f in sorted(self.artifact_storage.final_dataset_path.glob("*.parquet"))],
|
|
91
|
+
"num_records": self.num_records_list[: self._current_batch_number + 1],
|
|
92
|
+
"num_completed_batches": self._current_batch_number + 1,
|
|
93
|
+
"dataset_name": self.artifact_storage.dataset_name,
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
if on_complete:
|
|
98
|
+
on_complete(final_file_path)
|
|
99
|
+
else:
|
|
100
|
+
final_file_path = None
|
|
101
|
+
|
|
102
|
+
logger.warning(
|
|
103
|
+
f"⚠️ Batch {self._current_batch_number + 1} finished without any results to write. "
|
|
104
|
+
"A partial dataset containing the currently available columns has been written to the partial results "
|
|
105
|
+
f"directory: {self.artifact_storage.partial_results_path}"
|
|
106
|
+
)
|
|
107
|
+
|
|
98
108
|
self._current_batch_number += 1
|
|
99
109
|
self._buffer: list[dict] = []
|
|
100
110
|
|
|
101
|
-
if on_complete:
|
|
102
|
-
on_complete(final_file_path)
|
|
103
|
-
|
|
104
111
|
return final_file_path
|
|
105
112
|
|
|
106
113
|
def finish(self) -> None:
|
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from data_designer.config.base import ConfigBase
|
|
5
|
+
from data_designer.config.dataset_metadata import DatasetMetadata
|
|
5
6
|
from data_designer.config.models import ModelConfig
|
|
6
7
|
from data_designer.config.run_config import RunConfig
|
|
7
8
|
from data_designer.config.seed_source import SeedSource
|
|
@@ -27,6 +28,17 @@ class ResourceProvider(ConfigBase):
|
|
|
27
28
|
run_config: RunConfig = RunConfig()
|
|
28
29
|
seed_reader: SeedReader | None = None
|
|
29
30
|
|
|
31
|
+
def get_dataset_metadata(self) -> DatasetMetadata:
|
|
32
|
+
"""Get metadata about the dataset being generated.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
DatasetMetadata with seed column names and other metadata.
|
|
36
|
+
"""
|
|
37
|
+
seed_column_names = []
|
|
38
|
+
if self.seed_reader is not None:
|
|
39
|
+
seed_column_names = self.seed_reader.get_column_names()
|
|
40
|
+
return DatasetMetadata(seed_column_names=seed_column_names)
|
|
41
|
+
|
|
30
42
|
|
|
31
43
|
def create_resource_provider(
|
|
32
44
|
*,
|
|
@@ -56,15 +56,12 @@ from data_designer.engine.secret_resolver import (
|
|
|
56
56
|
from data_designer.interface.errors import (
|
|
57
57
|
DataDesignerGenerationError,
|
|
58
58
|
DataDesignerProfilingError,
|
|
59
|
-
InvalidBufferValueError,
|
|
60
59
|
)
|
|
61
60
|
from data_designer.interface.results import DatasetCreationResults
|
|
62
61
|
from data_designer.logging import RandomEmoji
|
|
63
62
|
from data_designer.plugins.plugin import PluginType
|
|
64
63
|
from data_designer.plugins.registry import PluginRegistry
|
|
65
64
|
|
|
66
|
-
DEFAULT_BUFFER_SIZE = 1000
|
|
67
|
-
|
|
68
65
|
DEFAULT_SECRET_RESOLVER = CompositeResolver([EnvironmentResolver(), PlaintextResolver()])
|
|
69
66
|
|
|
70
67
|
DEFAULT_SEED_READERS = [
|
|
@@ -112,7 +109,6 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
112
109
|
):
|
|
113
110
|
self._secret_resolver = secret_resolver or DEFAULT_SECRET_RESOLVER
|
|
114
111
|
self._artifact_path = Path(artifact_path) if artifact_path is not None else Path.cwd() / "artifacts"
|
|
115
|
-
self._buffer_size = DEFAULT_BUFFER_SIZE
|
|
116
112
|
self._run_config = RunConfig()
|
|
117
113
|
self._managed_assets_path = Path(managed_assets_path or MANAGED_ASSETS_PATH)
|
|
118
114
|
self._model_providers = self._resolve_model_providers(model_providers)
|
|
@@ -169,7 +165,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
169
165
|
builder = self._create_dataset_builder(config_builder, resource_provider)
|
|
170
166
|
|
|
171
167
|
try:
|
|
172
|
-
builder.build(num_records=num_records
|
|
168
|
+
builder.build(num_records=num_records)
|
|
173
169
|
except Exception as e:
|
|
174
170
|
raise DataDesignerGenerationError(f"🛑 Error generating dataset: {e}")
|
|
175
171
|
|
|
@@ -182,10 +178,13 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
182
178
|
except Exception as e:
|
|
183
179
|
raise DataDesignerProfilingError(f"🛑 Error profiling dataset: {e}")
|
|
184
180
|
|
|
181
|
+
dataset_metadata = resource_provider.get_dataset_metadata()
|
|
182
|
+
|
|
185
183
|
return DatasetCreationResults(
|
|
186
184
|
artifact_storage=builder.artifact_storage,
|
|
187
185
|
analysis=analysis,
|
|
188
186
|
config_builder=config_builder,
|
|
187
|
+
dataset_metadata=dataset_metadata,
|
|
189
188
|
)
|
|
190
189
|
|
|
191
190
|
def preview(
|
|
@@ -249,11 +248,15 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
249
248
|
):
|
|
250
249
|
logger.info(f"{RandomEmoji.success()} Preview complete!")
|
|
251
250
|
|
|
251
|
+
# Create dataset metadata from the resource provider
|
|
252
|
+
dataset_metadata = resource_provider.get_dataset_metadata()
|
|
253
|
+
|
|
252
254
|
return PreviewResults(
|
|
253
255
|
dataset=processed_dataset,
|
|
254
256
|
analysis=analysis,
|
|
255
257
|
processor_artifacts=processor_artifacts,
|
|
256
258
|
config_builder=config_builder,
|
|
259
|
+
dataset_metadata=dataset_metadata,
|
|
257
260
|
)
|
|
258
261
|
|
|
259
262
|
def validate(self, config_builder: DataDesignerConfigBuilder) -> None:
|
|
@@ -300,34 +303,22 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
300
303
|
"""
|
|
301
304
|
return self._secret_resolver
|
|
302
305
|
|
|
303
|
-
def set_buffer_size(self, buffer_size: int) -> None:
|
|
304
|
-
"""Set the buffer size for dataset generation.
|
|
305
|
-
|
|
306
|
-
The buffer size controls how many records are processed in memory at once
|
|
307
|
-
during dataset generation using the `create` method. The default value is
|
|
308
|
-
set to the constant `DEFAULT_BUFFER_SIZE` defined in the data_designer module.
|
|
309
|
-
|
|
310
|
-
Args:
|
|
311
|
-
buffer_size: Number of records to process in each buffer.
|
|
312
|
-
|
|
313
|
-
Raises:
|
|
314
|
-
InvalidBufferValueError: If buffer size is less than or equal to 0.
|
|
315
|
-
"""
|
|
316
|
-
if buffer_size <= 0:
|
|
317
|
-
raise InvalidBufferValueError("Buffer size must be greater than 0.")
|
|
318
|
-
self._buffer_size = buffer_size
|
|
319
|
-
|
|
320
306
|
def set_run_config(self, run_config: RunConfig) -> None:
|
|
321
307
|
"""Set the runtime configuration for dataset generation.
|
|
322
308
|
|
|
323
309
|
Args:
|
|
324
310
|
run_config: A RunConfig instance containing runtime settings such as
|
|
325
|
-
early shutdown behavior
|
|
311
|
+
early shutdown behavior and batch sizing via `buffer_size`. Import RunConfig from
|
|
312
|
+
data_designer.essentials.
|
|
326
313
|
|
|
327
314
|
Example:
|
|
328
315
|
>>> from data_designer.essentials import DataDesigner, RunConfig
|
|
329
316
|
>>> dd = DataDesigner()
|
|
330
317
|
>>> dd.set_run_config(RunConfig(disable_early_shutdown=True))
|
|
318
|
+
|
|
319
|
+
Notes:
|
|
320
|
+
When `disable_early_shutdown=True`, DataDesigner will never terminate generation early
|
|
321
|
+
due to error-rate thresholds. Errors are still tracked for reporting.
|
|
331
322
|
"""
|
|
332
323
|
self._run_config = run_config
|
|
333
324
|
|
|
@@ -9,6 +9,7 @@ import pandas as pd
|
|
|
9
9
|
|
|
10
10
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
11
11
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
12
|
+
from data_designer.config.dataset_metadata import DatasetMetadata
|
|
12
13
|
from data_designer.config.utils.visualization import WithRecordSamplerMixin
|
|
13
14
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
14
15
|
from data_designer.engine.dataset_builders.errors import ArtifactStorageError
|
|
@@ -28,6 +29,7 @@ class DatasetCreationResults(WithRecordSamplerMixin):
|
|
|
28
29
|
artifact_storage: ArtifactStorage,
|
|
29
30
|
analysis: DatasetProfilerResults,
|
|
30
31
|
config_builder: DataDesignerConfigBuilder,
|
|
32
|
+
dataset_metadata: DatasetMetadata,
|
|
31
33
|
):
|
|
32
34
|
"""Creates a new instance with results based on a dataset creation run.
|
|
33
35
|
|
|
@@ -35,10 +37,12 @@ class DatasetCreationResults(WithRecordSamplerMixin):
|
|
|
35
37
|
artifact_storage: Storage manager for accessing generated artifacts.
|
|
36
38
|
analysis: Profiling results for the generated dataset.
|
|
37
39
|
config_builder: Configuration builder used to create the dataset.
|
|
40
|
+
dataset_metadata: Metadata about the generated dataset (e.g., seed column names).
|
|
38
41
|
"""
|
|
39
42
|
self.artifact_storage = artifact_storage
|
|
40
43
|
self._analysis = analysis
|
|
41
44
|
self._config_builder = config_builder
|
|
45
|
+
self.dataset_metadata = dataset_metadata
|
|
42
46
|
|
|
43
47
|
def load_analysis(self) -> DatasetProfilerResults:
|
|
44
48
|
"""Load the profiling analysis results for the generated dataset.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.4
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -193,6 +193,14 @@ The value `openai/gpt-oss-20b` would be collected.
|
|
|
193
193
|
|
|
194
194
|
To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
|
|
195
195
|
|
|
196
|
+
### Top Models
|
|
197
|
+
|
|
198
|
+
This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
|
|
199
|
+
|
|
200
|
+

|
|
201
|
+
|
|
202
|
+
_Last updated on 1/14/2026_
|
|
203
|
+
|
|
196
204
|
---
|
|
197
205
|
|
|
198
206
|
## License
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
data_designer/__init__.py,sha256=xYZCBRleSswcNCARfHQzFy_Edag3Nmmwfa6A5C1d5B8,466
|
|
2
|
-
data_designer/_version.py,sha256=
|
|
2
|
+
data_designer/_version.py,sha256=3nDaC5e0d_scBB1bUEKPlItbvbY0PmXNNyyOTNFNWNI,704
|
|
3
3
|
data_designer/errors.py,sha256=BjnxDxwtTGscY3lZbi1RQ666j5PseoxNJRobeMXAJiI,184
|
|
4
4
|
data_designer/logging.py,sha256=2ToJzPPb6zF-QR-yQnrLVcBiBGL_QVNsnCHSQC0nPpQ,5346
|
|
5
5
|
data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
|
|
@@ -34,24 +34,25 @@ data_designer/cli/services/download_service.py,sha256=9ocQdHZW6VYHqM3nyHn_7dP1z1
|
|
|
34
34
|
data_designer/cli/services/model_service.py,sha256=rcsozdIRXMElskUHtmWGj6pZbVFf2JzHHv8rTnXv8xw,3890
|
|
35
35
|
data_designer/cli/services/provider_service.py,sha256=ru0-AJVZLr8wk5kOV2z2FgPpF-iK8vWQs0qtrMKp-dg,3921
|
|
36
36
|
data_designer/config/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
37
|
-
data_designer/config/base.py,sha256=
|
|
37
|
+
data_designer/config/base.py,sha256=IGj6sy_GnKzC94uu2rdxe12EqR_AmGJ6O3rl2MxOv6g,2449
|
|
38
38
|
data_designer/config/column_configs.py,sha256=Z3V8BKMkgse91MGlEcyJBa-lV25_j81SZ-vZNOoUKXc,18966
|
|
39
39
|
data_designer/config/column_types.py,sha256=qsy04QslsnPEOoStN3sJIoSBeSSTxpNg5VLVmew_YLQ,5753
|
|
40
40
|
data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLBAW1AmhajI,24707
|
|
41
41
|
data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
|
|
42
42
|
data_designer/config/dataset_builders.py,sha256=4NSEEqXzgSK8IDXoUSVRSUTcEe-ocKS-iEzyzKkNSJ0,332
|
|
43
|
+
data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhFMy9vvbUwMV4,647
|
|
43
44
|
data_designer/config/default_model_settings.py,sha256=3iUr10JvSTMDHwlEdSbLE_y90czbGOs_21La3V9fXoM,4462
|
|
44
45
|
data_designer/config/errors.py,sha256=g64yn9l7lTbcXI3DPdC_3utvm994IXGDhoQRiNON6T0,524
|
|
45
46
|
data_designer/config/exports.py,sha256=y23KqhwAf4DIarfvqgiuqyK2Fs1zv9cTDcuQ9SBX54o,4720
|
|
46
47
|
data_designer/config/interface.py,sha256=RRC5JHl6wQ9Icg1IQWOM6t8VkBk6c1BREkka3cEAi8c,1624
|
|
47
48
|
data_designer/config/models.py,sha256=A8D0qj6L9ndYvScvNWGuoOLe0zjYtoAm0JkwaG3qjRg,15335
|
|
48
|
-
data_designer/config/preview_results.py,sha256=
|
|
49
|
+
data_designer/config/preview_results.py,sha256=y-zEnRxZt_72XuAFs_e_peK4bVJ_fDSi9b-jLxsQ-S0,1602
|
|
49
50
|
data_designer/config/processors.py,sha256=CoLUN1AuEBUPigmDT8vCWotvOcFgnC8CFk8VLvNSjxw,5992
|
|
50
|
-
data_designer/config/run_config.py,sha256=
|
|
51
|
+
data_designer/config/run_config.py,sha256=yakCcWyT78kLXGZnFVK35pdrkPpWT7AVEp_sHyq67fg,2393
|
|
51
52
|
data_designer/config/sampler_constraints.py,sha256=XBPxm81J0u2q3WVsRaP2CoIE_0ssY14xc4Nt5r0oyuM,1161
|
|
52
53
|
data_designer/config/sampler_params.py,sha256=FSMvmdj-9Hiyap1_n-AM_ZDT-sTovox7LK4TO4L2UUc,27843
|
|
53
54
|
data_designer/config/seed.py,sha256=iUFByadE6GTG9HCcp0nEWm7wTDNaAOuYtGm2Ov0tiaE,4632
|
|
54
|
-
data_designer/config/seed_source.py,sha256=
|
|
55
|
+
data_designer/config/seed_source.py,sha256=GYgK9f0_a0FuOtvXlrLEw0MmAELucTB1Qrc_sGGoziw,2513
|
|
55
56
|
data_designer/config/seed_source_types.py,sha256=4EJ4IsYkfkicYqh0CuEvyI3H541XbtV_ffClJV2-zCQ,679
|
|
56
57
|
data_designer/config/validator_params.py,sha256=9w9M7Z1rcZOpw-BUBhjaVfA8ykNP5iPvdyJOHmdugEg,3911
|
|
57
58
|
data_designer/config/analysis/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
@@ -68,7 +69,7 @@ data_designer/config/utils/io_helpers.py,sha256=_14cfoMUIFqiUPBTu5u-BomarqmP6_VB
|
|
|
68
69
|
data_designer/config/utils/misc.py,sha256=cRofbhUulLKjV7j_7M5aNNNyIewyyhXLkj00GaZf9uo,2472
|
|
69
70
|
data_designer/config/utils/numerical_helpers.py,sha256=BadOPY1AR2ZVKFAKORpKT_Corc1SZR7W1of6FXYIMY4,802
|
|
70
71
|
data_designer/config/utils/type_helpers.py,sha256=2WSGYWTDktOCa9FGNv2IPXIHQHJc7-fimAB1JqIJVx0,4023
|
|
71
|
-
data_designer/config/utils/visualization.py,sha256=
|
|
72
|
+
data_designer/config/utils/visualization.py,sha256=2ZloKN1UimKvIXyKJBSHZ56-JItE5MzMXY31J9x4hbY,18366
|
|
72
73
|
data_designer/engine/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
73
74
|
data_designer/engine/compiler.py,sha256=tzZF5rk_AAid0yRwwsR86-xvW-5GdJskN1RPfvuFq-k,2853
|
|
74
75
|
data_designer/engine/configurable_task.py,sha256=hdXyfekQ9dHSc_NQG6nZIxKCqWSWKnqA5nje7Uobf6k,2525
|
|
@@ -90,23 +91,23 @@ data_designer/engine/column_generators/generators/__init__.py,sha256=XLO09Ei8g0l
|
|
|
90
91
|
data_designer/engine/column_generators/generators/base.py,sha256=T3ccXIyKXwYN4Kes6HcTBdELK-wOs9cFCoaTH3C3dyQ,3814
|
|
91
92
|
data_designer/engine/column_generators/generators/embedding.py,sha256=pdRMzb95CKmBpOiTQrAEiKngBFvAlt5g8HwI8EwWBIY,1565
|
|
92
93
|
data_designer/engine/column_generators/generators/expression.py,sha256=irBDhTsFyZaWU2EIqy5xOKRBxX-x8W8q5pQ69P7NKTs,2543
|
|
93
|
-
data_designer/engine/column_generators/generators/llm_completion.py,sha256=
|
|
94
|
+
data_designer/engine/column_generators/generators/llm_completion.py,sha256=fwL6Xbfg8d9SnZgv2-j3g_6S38xzuwnVY4R4UabvdIw,4881
|
|
94
95
|
data_designer/engine/column_generators/generators/samplers.py,sha256=0bvJhVK2LfH9aRY1BxqWCjA7LJxy1B63gGmZuWK8auU,3486
|
|
95
96
|
data_designer/engine/column_generators/generators/seed_dataset.py,sha256=IskfOQkRMRTfu8tiYb426LMchBYKNQs0uSo9E5y0bwg,6905
|
|
96
|
-
data_designer/engine/column_generators/generators/validation.py,sha256=
|
|
97
|
+
data_designer/engine/column_generators/generators/validation.py,sha256=0gw0Wzq4yVqejWewt2uznQ4UJI7_0MMEdvMenH-KwYc,6844
|
|
97
98
|
data_designer/engine/column_generators/utils/errors.py,sha256=Nemo7fxg9BpTOf0kdlxxdtXZMfe_ksrRfzWg2E5sFX0,370
|
|
98
99
|
data_designer/engine/column_generators/utils/generator_classification.py,sha256=1fvMX7lQzY3A1s-V3CtS-W6-zwmby553_Oe9K-tdZKE,1922
|
|
99
100
|
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=8l0g-L_O6esmAEf1rJh7o2IASZnLqZ_KDlGaLCMYMK0,2105
|
|
100
101
|
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=xXnzJiR60zTmMDorR_wfTleNMJsLKA5gbL4WOjQ-wYg,4765
|
|
101
102
|
data_designer/engine/dataset_builders/artifact_storage.py,sha256=fYBC569tXVpn7UURcuXfHPhEvvwOHnMxAkA1iQAB-j4,8425
|
|
102
|
-
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=
|
|
103
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=DUAMEmy7xCSFDYy-WU-ZXflKzy5oJ6yqMESQSb7CoUc,15308
|
|
103
104
|
data_designer/engine/dataset_builders/errors.py,sha256=ov9cTRvLtLieIFkUGZdk1n_iabdc904ZATwgT5u9uzY,364
|
|
104
105
|
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=bxRildX3SfzSFKv_rqwwsUeFn_RoyhfFneSj4qBi-Q4,1624
|
|
105
106
|
data_designer/engine/dataset_builders/utils/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
106
|
-
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=
|
|
107
|
+
data_designer/engine/dataset_builders/utils/concurrency.py,sha256=Q0ro9UY-3-FFzfi3MZ29nMTSiDZgg1Um6y_HQFztDhk,8338
|
|
107
108
|
data_designer/engine/dataset_builders/utils/config_compiler.py,sha256=iAbaLiDNBPyjZwSVK1a83KFKln9LACjKdVSpIiZRemw,2405
|
|
108
109
|
data_designer/engine/dataset_builders/utils/dag.py,sha256=L3-sla2s8oqcy3V-WSxXAWZVakkmb3cvmTh5L6phC1M,2474
|
|
109
|
-
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=
|
|
110
|
+
data_designer/engine/dataset_builders/utils/dataset_batch_manager.py,sha256=DpEW_zrHv7CPiVFw07r_0Q9V6dFaTqOFJ1uWnuicxHo,8142
|
|
110
111
|
data_designer/engine/dataset_builders/utils/errors.py,sha256=6hstnyjYZ8pU69qK3OrUvlyeifqRJC2m399GNVcih3I,375
|
|
111
112
|
data_designer/engine/models/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
112
113
|
data_designer/engine/models/errors.py,sha256=HjPF3ERKU7veS7s189_0VBBMpePl6pSmOqaYhOO8xv4,12179
|
|
@@ -145,7 +146,7 @@ data_designer/engine/registry/errors.py,sha256=PNWsfU2baNmzshcHah1-P7YT9OqxfxYf1
|
|
|
145
146
|
data_designer/engine/resources/managed_dataset_generator.py,sha256=mJAR-dc0vEfiw3xN7ABIOalxGIbQmfZ7P23U-vUut40,1374
|
|
146
147
|
data_designer/engine/resources/managed_dataset_repository.py,sha256=zxxP9SDdu0PrP28ozJmIwdlbHHuGkcPRmb1Ua5VOsG0,7552
|
|
147
148
|
data_designer/engine/resources/managed_storage.py,sha256=nTfp14o-BbtDP6XUxD3564g0cXB6B1og-1rZI9dx1H0,2084
|
|
148
|
-
data_designer/engine/resources/resource_provider.py,sha256=
|
|
149
|
+
data_designer/engine/resources/resource_provider.py,sha256=pMh0_j8JCZ54x4zCPtPIb9s5AGwSBswQsYFddxsgM_U,2813
|
|
149
150
|
data_designer/engine/resources/seed_reader.py,sha256=Gh524gMWh6Lz43bielQG3nmMz5MrZy1BTGs792AhMpc,5591
|
|
150
151
|
data_designer/engine/sampling_gen/column.py,sha256=lqdMrUebIFJ_C_Laye0TuIkEd_fVVPP5b-zoNGez25Q,3951
|
|
151
152
|
data_designer/engine/sampling_gen/constraints.py,sha256=HtLxW_VYXlo_A8IMlZkgTb1fBwyrRHuB3LZnF3AgV-I,3009
|
|
@@ -176,9 +177,9 @@ data_designer/engine/validators/remote.py,sha256=Qviad8vF7SBtHRvNqKxZyiLJuGJp0rm
|
|
|
176
177
|
data_designer/engine/validators/sql.py,sha256=Y2FkM_JmcWzW98qjZruq73vt8RgjkLXuTbyLPGrA2l8,2255
|
|
177
178
|
data_designer/essentials/__init__.py,sha256=wt84P4qu1-OdIfrZmdOtRtiBoGcVSdTEdkIiUaztPWs,1099
|
|
178
179
|
data_designer/interface/__init__.py,sha256=XLO09Ei8g0lU7hYlzKCvhvQhLFBe5CBwE4v2PqK9xWY,142
|
|
179
|
-
data_designer/interface/data_designer.py,sha256=
|
|
180
|
+
data_designer/interface/data_designer.py,sha256=OpjWdfjwhp1wzeArdicpCzmaaVCZydSGarQDpvh3egU,17105
|
|
180
181
|
data_designer/interface/errors.py,sha256=CoH-6V95RDNHNONxKs8MWina-cD6NtaerMW_cyH8WRU,570
|
|
181
|
-
data_designer/interface/results.py,sha256=
|
|
182
|
+
data_designer/interface/results.py,sha256=aiRWSK2QVAKTGboqn_Fods7dc5lbG5cmDs6O_INbH-0,3768
|
|
182
183
|
data_designer/plugins/__init__.py,sha256=GmeQ6bzFMNkLthrmLKQ5bcUMPw_W2K4d6oWKvYA8eNw,239
|
|
183
184
|
data_designer/plugins/errors.py,sha256=EzKPo0rEiTe872sIAvF0_cDjFfl_P2MyDtESZL1P3ug,350
|
|
184
185
|
data_designer/plugins/plugin.py,sha256=TmdllvWXOKZZNwJTW_rUMD1M68pAh_IEIILZei7fHqU,5468
|
|
@@ -186,8 +187,8 @@ data_designer/plugins/registry.py,sha256=1vNlmDyFMCIY8D_z1RbnWMfKnHZ02g8sAvJg1nE
|
|
|
186
187
|
data_designer/plugins/testing/__init__.py,sha256=lSuWzt1AVTJg4gV9wh6BUU1Az7IjQ-9FgADdUAtm_qQ,260
|
|
187
188
|
data_designer/plugins/testing/stubs.py,sha256=E8bovwU4zpNaJM4b1i4biCDXyqvthe8vmsrZ9M0M9vM,4277
|
|
188
189
|
data_designer/plugins/testing/utils.py,sha256=OLoQzW8-qbA-91wTUWtwNHrMDyGz1-ma0f6a-3NBUNI,937
|
|
189
|
-
data_designer-0.3.
|
|
190
|
-
data_designer-0.3.
|
|
191
|
-
data_designer-0.3.
|
|
192
|
-
data_designer-0.3.
|
|
193
|
-
data_designer-0.3.
|
|
190
|
+
data_designer-0.3.4.dist-info/METADATA,sha256=10B6euF52J8L-MFg1_I_3lRQ3rK3Q5x_2V0iZ_vTtBQ,7914
|
|
191
|
+
data_designer-0.3.4.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
192
|
+
data_designer-0.3.4.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
|
|
193
|
+
data_designer-0.3.4.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
|
|
194
|
+
data_designer-0.3.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|