data-designer 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/_version.py +2 -2
- data_designer/config/exports.py +6 -1
- data_designer/config/preview_results.py +6 -3
- data_designer/config/processors.py +46 -2
- data_designer/config/utils/validation.py +33 -2
- data_designer/config/utils/visualization.py +32 -0
- data_designer/engine/dataset_builders/artifact_storage.py +13 -3
- data_designer/engine/dataset_builders/column_wise_builder.py +3 -0
- data_designer/engine/processing/processors/drop_columns.py +1 -1
- data_designer/engine/processing/processors/registry.py +3 -0
- data_designer/engine/processing/processors/schema_transform.py +53 -0
- data_designer/interface/data_designer.py +12 -0
- data_designer/interface/results.py +36 -0
- {data_designer-0.1.4.dist-info → data_designer-0.1.5.dist-info}/METADATA +9 -9
- {data_designer-0.1.4.dist-info → data_designer-0.1.5.dist-info}/RECORD +18 -17
- {data_designer-0.1.4.dist-info → data_designer-0.1.5.dist-info}/WHEEL +0 -0
- {data_designer-0.1.4.dist-info → data_designer-0.1.5.dist-info}/entry_points.txt +0 -0
- {data_designer-0.1.4.dist-info → data_designer-0.1.5.dist-info}/licenses/LICENSE +0 -0
data_designer/_version.py
CHANGED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.5'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 5)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
data_designer/config/exports.py
CHANGED
|
@@ -32,7 +32,11 @@ from data_designer.config.models import (
|
|
|
32
32
|
UniformDistribution,
|
|
33
33
|
UniformDistributionParams,
|
|
34
34
|
)
|
|
35
|
-
from data_designer.config.processors import
|
|
35
|
+
from data_designer.config.processors import (
|
|
36
|
+
DropColumnsProcessorConfig,
|
|
37
|
+
ProcessorType,
|
|
38
|
+
SchemaTransformProcessorConfig,
|
|
39
|
+
)
|
|
36
40
|
from data_designer.config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
|
|
37
41
|
from data_designer.config.sampler_params import (
|
|
38
42
|
BernoulliMixtureSamplerParams,
|
|
@@ -69,6 +73,7 @@ from data_designer.config.validator_params import (
|
|
|
69
73
|
|
|
70
74
|
def get_config_exports() -> list[str]:
|
|
71
75
|
return [
|
|
76
|
+
SchemaTransformProcessorConfig.__name__,
|
|
72
77
|
BernoulliMixtureSamplerParams.__name__,
|
|
73
78
|
BernoulliSamplerParams.__name__,
|
|
74
79
|
BinomialSamplerParams.__name__,
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
-
from typing import Optional
|
|
6
|
+
from typing import Optional, Union
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
@@ -19,6 +19,7 @@ class PreviewResults(WithRecordSamplerMixin):
|
|
|
19
19
|
config_builder: DataDesignerConfigBuilder,
|
|
20
20
|
dataset: Optional[pd.DataFrame] = None,
|
|
21
21
|
analysis: Optional[DatasetProfilerResults] = None,
|
|
22
|
+
processor_artifacts: Optional[dict[str, Union[list[str], str]]] = None,
|
|
22
23
|
):
|
|
23
24
|
"""Creates a new instance with results from a Data Designer preview run.
|
|
24
25
|
|
|
@@ -26,7 +27,9 @@ class PreviewResults(WithRecordSamplerMixin):
|
|
|
26
27
|
config_builder: Data Designer configuration builder.
|
|
27
28
|
dataset: Dataset of the preview run.
|
|
28
29
|
analysis: Analysis of the preview run.
|
|
30
|
+
processor_artifacts: Artifacts generated by the processors.
|
|
29
31
|
"""
|
|
30
|
-
self.dataset: pd.DataFrame
|
|
31
|
-
self.analysis: DatasetProfilerResults
|
|
32
|
+
self.dataset: Optional[pd.DataFrame] = dataset
|
|
33
|
+
self.analysis: Optional[DatasetProfilerResults] = analysis
|
|
34
|
+
self.processor_artifacts: Optional[dict[str, Union[list[str], str]]] = processor_artifacts
|
|
32
35
|
self._config_builder = config_builder
|
|
@@ -1,25 +1,32 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
import json
|
|
4
5
|
from abc import ABC
|
|
5
6
|
from enum import Enum
|
|
6
|
-
from typing import Literal
|
|
7
|
+
from typing import Any, Literal
|
|
7
8
|
|
|
8
9
|
from pydantic import Field, field_validator
|
|
9
10
|
|
|
10
11
|
from data_designer.config.base import ConfigBase
|
|
11
12
|
from data_designer.config.dataset_builders import BuildStage
|
|
13
|
+
from data_designer.config.errors import InvalidConfigError
|
|
12
14
|
|
|
13
15
|
SUPPORTED_STAGES = [BuildStage.POST_BATCH]
|
|
14
16
|
|
|
15
17
|
|
|
16
18
|
class ProcessorType(str, Enum):
|
|
17
19
|
DROP_COLUMNS = "drop_columns"
|
|
20
|
+
SCHEMA_TRANSFORM = "schema_transform"
|
|
18
21
|
|
|
19
22
|
|
|
20
23
|
class ProcessorConfig(ConfigBase, ABC):
|
|
24
|
+
name: str = Field(
|
|
25
|
+
description="The name of the processor, used to identify the processor in the results and to write the artifacts to disk.",
|
|
26
|
+
)
|
|
21
27
|
build_stage: BuildStage = Field(
|
|
22
|
-
|
|
28
|
+
default=BuildStage.POST_BATCH,
|
|
29
|
+
description=f"The stage at which the processor will run. Supported stages: {', '.join(SUPPORTED_STAGES)}",
|
|
23
30
|
)
|
|
24
31
|
|
|
25
32
|
@field_validator("build_stage")
|
|
@@ -34,8 +41,45 @@ class ProcessorConfig(ConfigBase, ABC):
|
|
|
34
41
|
def get_processor_config_from_kwargs(processor_type: ProcessorType, **kwargs) -> ProcessorConfig:
|
|
35
42
|
if processor_type == ProcessorType.DROP_COLUMNS:
|
|
36
43
|
return DropColumnsProcessorConfig(**kwargs)
|
|
44
|
+
elif processor_type == ProcessorType.SCHEMA_TRANSFORM:
|
|
45
|
+
return SchemaTransformProcessorConfig(**kwargs)
|
|
37
46
|
|
|
38
47
|
|
|
39
48
|
class DropColumnsProcessorConfig(ProcessorConfig):
|
|
40
49
|
column_names: list[str]
|
|
41
50
|
processor_type: Literal[ProcessorType.DROP_COLUMNS] = ProcessorType.DROP_COLUMNS
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class SchemaTransformProcessorConfig(ProcessorConfig):
|
|
54
|
+
template: dict[str, Any] = Field(
|
|
55
|
+
...,
|
|
56
|
+
description="""
|
|
57
|
+
Dictionary specifying columns and templates to use in the new dataset with transformed schema.
|
|
58
|
+
|
|
59
|
+
Each key is a new column name, and each value is an object containing Jinja2 templates - for instance, a string or a list of strings.
|
|
60
|
+
Values must be JSON-serializable.
|
|
61
|
+
|
|
62
|
+
Example:
|
|
63
|
+
|
|
64
|
+
```python
|
|
65
|
+
template = {
|
|
66
|
+
"list_of_strings": ["{{ col1 }}", "{{ col2 }}"],
|
|
67
|
+
"uppercase_string": "{{ col1 | upper }}",
|
|
68
|
+
"lowercase_string": "{{ col2 | lower }}",
|
|
69
|
+
}
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The above templates will create an new dataset with three columns: "list_of_strings", "uppercase_string", and "lowercase_string".
|
|
73
|
+
References to columns "col1" and "col2" in the templates will be replaced with the actual values of the columns in the dataset.
|
|
74
|
+
""",
|
|
75
|
+
)
|
|
76
|
+
processor_type: Literal[ProcessorType.SCHEMA_TRANSFORM] = ProcessorType.SCHEMA_TRANSFORM
|
|
77
|
+
|
|
78
|
+
@field_validator("template")
|
|
79
|
+
def validate_template(cls, v: dict[str, Any]) -> dict[str, Any]:
|
|
80
|
+
try:
|
|
81
|
+
json.dumps(v)
|
|
82
|
+
except TypeError as e:
|
|
83
|
+
if "not JSON serializable" in str(e):
|
|
84
|
+
raise InvalidConfigError("Template must be JSON serializable")
|
|
85
|
+
return v
|
|
@@ -18,7 +18,10 @@ from rich.panel import Panel
|
|
|
18
18
|
from data_designer.config.column_types import ColumnConfigT, DataDesignerColumnType, column_type_is_llm_generated
|
|
19
19
|
from data_designer.config.processors import ProcessorConfig, ProcessorType
|
|
20
20
|
from data_designer.config.utils.constants import RICH_CONSOLE_THEME
|
|
21
|
-
from data_designer.config.utils.misc import
|
|
21
|
+
from data_designer.config.utils.misc import (
|
|
22
|
+
can_run_data_designer_locally,
|
|
23
|
+
get_prompt_template_keywords,
|
|
24
|
+
)
|
|
22
25
|
from data_designer.config.validator_params import ValidatorType
|
|
23
26
|
|
|
24
27
|
|
|
@@ -63,6 +66,7 @@ def validate_data_designer_config(
|
|
|
63
66
|
violations.extend(validate_expression_references(columns=columns, allowed_references=allowed_references))
|
|
64
67
|
violations.extend(validate_columns_not_all_dropped(columns=columns))
|
|
65
68
|
violations.extend(validate_drop_columns_processor(columns=columns, processor_configs=processor_configs))
|
|
69
|
+
violations.extend(validate_schema_transform_processor(columns=columns, processor_configs=processor_configs))
|
|
66
70
|
if not can_run_data_designer_locally():
|
|
67
71
|
violations.extend(validate_local_only_columns(columns=columns))
|
|
68
72
|
return violations
|
|
@@ -271,7 +275,7 @@ def validate_drop_columns_processor(
|
|
|
271
275
|
columns: list[ColumnConfigT],
|
|
272
276
|
processor_configs: list[ProcessorConfig],
|
|
273
277
|
) -> list[Violation]:
|
|
274
|
-
all_column_names =
|
|
278
|
+
all_column_names = {c.name for c in columns}
|
|
275
279
|
for processor_config in processor_configs:
|
|
276
280
|
if processor_config.processor_type == ProcessorType.DROP_COLUMNS:
|
|
277
281
|
invalid_columns = set(processor_config.column_names) - all_column_names
|
|
@@ -288,6 +292,33 @@ def validate_drop_columns_processor(
|
|
|
288
292
|
return []
|
|
289
293
|
|
|
290
294
|
|
|
295
|
+
def validate_schema_transform_processor(
|
|
296
|
+
columns: list[ColumnConfigT],
|
|
297
|
+
processor_configs: list[ProcessorConfig],
|
|
298
|
+
) -> list[Violation]:
|
|
299
|
+
violations = []
|
|
300
|
+
|
|
301
|
+
all_column_names = {c.name for c in columns}
|
|
302
|
+
for processor_config in processor_configs:
|
|
303
|
+
if processor_config.processor_type == ProcessorType.SCHEMA_TRANSFORM:
|
|
304
|
+
for col, template in processor_config.template.items():
|
|
305
|
+
template_keywords = get_prompt_template_keywords(template)
|
|
306
|
+
invalid_keywords = set(template_keywords) - all_column_names
|
|
307
|
+
if len(invalid_keywords) > 0:
|
|
308
|
+
invalid_keywords = ", ".join([f"'{k}'" for k in invalid_keywords])
|
|
309
|
+
message = f"Ancillary dataset processor attempts to reference columns {invalid_keywords} in the template for '{col}', but the columns are not defined in the dataset."
|
|
310
|
+
violations.append(
|
|
311
|
+
Violation(
|
|
312
|
+
column=None,
|
|
313
|
+
type=ViolationType.INVALID_REFERENCE,
|
|
314
|
+
message=message,
|
|
315
|
+
level=ViolationLevel.ERROR,
|
|
316
|
+
)
|
|
317
|
+
)
|
|
318
|
+
|
|
319
|
+
return violations
|
|
320
|
+
|
|
321
|
+
|
|
291
322
|
def validate_expression_references(
|
|
292
323
|
columns: list[ColumnConfigT],
|
|
293
324
|
allowed_references: list[str],
|
|
@@ -72,6 +72,9 @@ class WithRecordSamplerMixin:
|
|
|
72
72
|
else:
|
|
73
73
|
raise DatasetSampleDisplayError("No valid dataset found in results object.")
|
|
74
74
|
|
|
75
|
+
def _has_processor_artifacts(self) -> bool:
|
|
76
|
+
return hasattr(self, "processor_artifacts") and self.processor_artifacts is not None
|
|
77
|
+
|
|
75
78
|
def display_sample_record(
|
|
76
79
|
self,
|
|
77
80
|
index: Optional[int] = None,
|
|
@@ -79,6 +82,7 @@ class WithRecordSamplerMixin:
|
|
|
79
82
|
hide_seed_columns: bool = False,
|
|
80
83
|
syntax_highlighting_theme: str = "dracula",
|
|
81
84
|
background_color: Optional[str] = None,
|
|
85
|
+
processors_to_display: Optional[list[str]] = None,
|
|
82
86
|
) -> None:
|
|
83
87
|
"""Display a sample record from the Data Designer dataset preview.
|
|
84
88
|
|
|
@@ -90,6 +94,7 @@ class WithRecordSamplerMixin:
|
|
|
90
94
|
documentation from `rich` for information about available themes.
|
|
91
95
|
background_color: Background color to use for the record. See the `Syntax`
|
|
92
96
|
documentation from `rich` for information about available background colors.
|
|
97
|
+
processors_to_display: List of processors to display the artifacts for. If None, all processors will be displayed.
|
|
93
98
|
"""
|
|
94
99
|
i = index or self._display_cycle_index
|
|
95
100
|
|
|
@@ -99,8 +104,25 @@ class WithRecordSamplerMixin:
|
|
|
99
104
|
except IndexError:
|
|
100
105
|
raise DatasetSampleDisplayError(f"Index {i} is out of bounds for dataset of length {num_records}.")
|
|
101
106
|
|
|
107
|
+
processor_data_to_display = None
|
|
108
|
+
if self._has_processor_artifacts() and len(self.processor_artifacts) > 0:
|
|
109
|
+
if processors_to_display is None:
|
|
110
|
+
processors_to_display = list(self.processor_artifacts.keys())
|
|
111
|
+
|
|
112
|
+
if len(processors_to_display) > 0:
|
|
113
|
+
processor_data_to_display = {}
|
|
114
|
+
for processor in processors_to_display:
|
|
115
|
+
if (
|
|
116
|
+
isinstance(self.processor_artifacts[processor], list)
|
|
117
|
+
and len(self.processor_artifacts[processor]) == num_records
|
|
118
|
+
):
|
|
119
|
+
processor_data_to_display[processor] = self.processor_artifacts[processor][i]
|
|
120
|
+
else:
|
|
121
|
+
processor_data_to_display[processor] = self.processor_artifacts[processor]
|
|
122
|
+
|
|
102
123
|
display_sample_record(
|
|
103
124
|
record=record,
|
|
125
|
+
processor_data_to_display=processor_data_to_display,
|
|
104
126
|
config_builder=self._config_builder,
|
|
105
127
|
background_color=background_color,
|
|
106
128
|
syntax_highlighting_theme=syntax_highlighting_theme,
|
|
@@ -134,6 +156,7 @@ def create_rich_histogram_table(
|
|
|
134
156
|
def display_sample_record(
|
|
135
157
|
record: Union[dict, pd.Series, pd.DataFrame],
|
|
136
158
|
config_builder: DataDesignerConfigBuilder,
|
|
159
|
+
processor_data_to_display: Optional[dict[str, Union[list[str], str]]] = None,
|
|
137
160
|
background_color: Optional[str] = None,
|
|
138
161
|
syntax_highlighting_theme: str = "dracula",
|
|
139
162
|
record_index: Optional[int] = None,
|
|
@@ -230,6 +253,15 @@ def display_sample_record(
|
|
|
230
253
|
table.add_row(*row)
|
|
231
254
|
render_list.append(pad_console_element(table, (1, 0, 1, 0)))
|
|
232
255
|
|
|
256
|
+
if processor_data_to_display and len(processor_data_to_display) > 0:
|
|
257
|
+
for processor_name, processor_data in processor_data_to_display.items():
|
|
258
|
+
table = Table(title=f"Processor Outputs: {processor_name}", **table_kws)
|
|
259
|
+
table.add_column("Name")
|
|
260
|
+
table.add_column("Value")
|
|
261
|
+
for col, value in processor_data.items():
|
|
262
|
+
table.add_row(col, convert_to_row_element(value))
|
|
263
|
+
render_list.append(pad_console_element(table, (1, 0, 1, 0)))
|
|
264
|
+
|
|
233
265
|
if record_index is not None:
|
|
234
266
|
index_label = Text(f"[index: {record_index}]", justify="center")
|
|
235
267
|
render_list.append(index_label)
|
|
@@ -25,6 +25,7 @@ class BatchStage(StrEnum):
|
|
|
25
25
|
PARTIAL_RESULT = "partial_results_path"
|
|
26
26
|
FINAL_RESULT = "final_dataset_path"
|
|
27
27
|
DROPPED_COLUMNS = "dropped_columns_dataset_path"
|
|
28
|
+
PROCESSORS_OUTPUTS = "processors_outputs_path"
|
|
28
29
|
|
|
29
30
|
|
|
30
31
|
class ArtifactStorage(BaseModel):
|
|
@@ -33,6 +34,7 @@ class ArtifactStorage(BaseModel):
|
|
|
33
34
|
final_dataset_folder_name: str = "parquet-files"
|
|
34
35
|
partial_results_folder_name: str = "tmp-partial-parquet-files"
|
|
35
36
|
dropped_columns_folder_name: str = "dropped-columns-parquet-files"
|
|
37
|
+
processors_outputs_folder_name: str = "processors-files"
|
|
36
38
|
|
|
37
39
|
@property
|
|
38
40
|
def artifact_path_exists(self) -> bool:
|
|
@@ -70,6 +72,10 @@ class ArtifactStorage(BaseModel):
|
|
|
70
72
|
def partial_results_path(self) -> Path:
|
|
71
73
|
return self.base_dataset_path / self.partial_results_folder_name
|
|
72
74
|
|
|
75
|
+
@property
|
|
76
|
+
def processors_outputs_path(self) -> Path:
|
|
77
|
+
return self.base_dataset_path / self.processors_outputs_folder_name
|
|
78
|
+
|
|
73
79
|
@field_validator("artifact_path")
|
|
74
80
|
def validate_artifact_path(cls, v: Union[Path, str]) -> Path:
|
|
75
81
|
v = Path(v)
|
|
@@ -84,6 +90,7 @@ class ArtifactStorage(BaseModel):
|
|
|
84
90
|
self.final_dataset_folder_name,
|
|
85
91
|
self.partial_results_folder_name,
|
|
86
92
|
self.dropped_columns_folder_name,
|
|
93
|
+
self.processors_outputs_folder_name,
|
|
87
94
|
]
|
|
88
95
|
|
|
89
96
|
for name in folder_names:
|
|
@@ -169,9 +176,10 @@ class ArtifactStorage(BaseModel):
|
|
|
169
176
|
batch_number: int,
|
|
170
177
|
dataframe: pd.DataFrame,
|
|
171
178
|
batch_stage: BatchStage,
|
|
179
|
+
subfolder: str | None = None,
|
|
172
180
|
) -> Path:
|
|
173
181
|
file_path = self.create_batch_file_path(batch_number, batch_stage=batch_stage)
|
|
174
|
-
self.write_parquet_file(file_path.name, dataframe, batch_stage)
|
|
182
|
+
self.write_parquet_file(file_path.name, dataframe, batch_stage, subfolder=subfolder)
|
|
175
183
|
return file_path
|
|
176
184
|
|
|
177
185
|
def write_parquet_file(
|
|
@@ -179,9 +187,11 @@ class ArtifactStorage(BaseModel):
|
|
|
179
187
|
parquet_file_name: str,
|
|
180
188
|
dataframe: pd.DataFrame,
|
|
181
189
|
batch_stage: BatchStage,
|
|
190
|
+
subfolder: str | None = None,
|
|
182
191
|
) -> Path:
|
|
183
|
-
|
|
184
|
-
|
|
192
|
+
subfolder = subfolder or ""
|
|
193
|
+
self.mkdir_if_needed(self._get_stage_path(batch_stage) / subfolder)
|
|
194
|
+
file_path = self._get_stage_path(batch_stage) / subfolder / parquet_file_name
|
|
185
195
|
dataframe.to_parquet(file_path, index=False)
|
|
186
196
|
return file_path
|
|
187
197
|
|
|
@@ -171,6 +171,8 @@ class ColumnWiseDatasetBuilder:
|
|
|
171
171
|
max_workers = MAX_CONCURRENCY_PER_NON_LLM_GENERATOR
|
|
172
172
|
if isinstance(generator, WithLLMGeneration):
|
|
173
173
|
max_workers = generator.inference_parameters.max_parallel_requests
|
|
174
|
+
elif hasattr(generator.config, "max_parallel_requests"):
|
|
175
|
+
max_workers = generator.config.max_parallel_requests
|
|
174
176
|
self._fan_out_with_threads(generator, max_workers=max_workers)
|
|
175
177
|
|
|
176
178
|
def _run_full_column_generator(self, generator: ColumnGenerator) -> None:
|
|
@@ -244,6 +246,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
244
246
|
processors[BuildStage.POST_BATCH].append( # as post-batch by default
|
|
245
247
|
DropColumnsProcessor(
|
|
246
248
|
config=DropColumnsProcessorConfig(
|
|
249
|
+
name="default_drop_columns_processor",
|
|
247
250
|
column_names=columns_to_drop,
|
|
248
251
|
build_stage=BuildStage.POST_BATCH,
|
|
249
252
|
),
|
|
@@ -17,7 +17,7 @@ class DropColumnsProcessor(Processor[DropColumnsProcessorConfig]):
|
|
|
17
17
|
@staticmethod
|
|
18
18
|
def metadata() -> ConfigurableTaskMetadata:
|
|
19
19
|
return ConfigurableTaskMetadata(
|
|
20
|
-
name="
|
|
20
|
+
name="drop_columns_processor",
|
|
21
21
|
description="Drop columns from the input dataset.",
|
|
22
22
|
required_resources=None,
|
|
23
23
|
)
|
|
@@ -5,9 +5,11 @@ from data_designer.config.base import ConfigBase
|
|
|
5
5
|
from data_designer.config.processors import (
|
|
6
6
|
DropColumnsProcessorConfig,
|
|
7
7
|
ProcessorType,
|
|
8
|
+
SchemaTransformProcessorConfig,
|
|
8
9
|
)
|
|
9
10
|
from data_designer.engine.processing.processors.base import Processor
|
|
10
11
|
from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
|
|
12
|
+
from data_designer.engine.processing.processors.schema_transform import SchemaTransformProcessor
|
|
11
13
|
from data_designer.engine.registry.base import TaskRegistry
|
|
12
14
|
|
|
13
15
|
|
|
@@ -16,5 +18,6 @@ class ProcessorRegistry(TaskRegistry[str, Processor, ConfigBase]): ...
|
|
|
16
18
|
|
|
17
19
|
def create_default_processor_registry() -> ProcessorRegistry:
|
|
18
20
|
registry = ProcessorRegistry()
|
|
21
|
+
registry.register(ProcessorType.SCHEMA_TRANSFORM, SchemaTransformProcessor, SchemaTransformProcessorConfig, False)
|
|
19
22
|
registry.register(ProcessorType.DROP_COLUMNS, DropColumnsProcessor, DropColumnsProcessorConfig, False)
|
|
20
23
|
return registry
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import logging
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from data_designer.config.processors import SchemaTransformProcessorConfig
|
|
10
|
+
from data_designer.engine.configurable_task import ConfigurableTaskMetadata
|
|
11
|
+
from data_designer.engine.dataset_builders.artifact_storage import BatchStage
|
|
12
|
+
from data_designer.engine.processing.ginja.environment import WithJinja2UserTemplateRendering
|
|
13
|
+
from data_designer.engine.processing.processors.base import Processor
|
|
14
|
+
from data_designer.engine.processing.utils import deserialize_json_values
|
|
15
|
+
|
|
16
|
+
logger = logging.getLogger(__name__)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[SchemaTransformProcessorConfig]):
|
|
20
|
+
@staticmethod
|
|
21
|
+
def metadata() -> ConfigurableTaskMetadata:
|
|
22
|
+
return ConfigurableTaskMetadata(
|
|
23
|
+
name="schema_transform_processor",
|
|
24
|
+
description="Generate dataset with transformed schema using a Jinja2 template.",
|
|
25
|
+
required_resources=None,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
@property
|
|
29
|
+
def template_as_str(self) -> str:
|
|
30
|
+
return json.dumps(self.config.template)
|
|
31
|
+
|
|
32
|
+
def process(self, data: pd.DataFrame, *, current_batch_number: int | None = None) -> pd.DataFrame:
|
|
33
|
+
self.prepare_jinja2_template_renderer(self.template_as_str, data.columns.to_list())
|
|
34
|
+
formatted_records = [
|
|
35
|
+
json.loads(self.render_template(deserialize_json_values(record)).replace("\n", "\\n"))
|
|
36
|
+
for record in data.to_dict(orient="records")
|
|
37
|
+
]
|
|
38
|
+
formatted_data = pd.DataFrame(formatted_records)
|
|
39
|
+
if current_batch_number is not None:
|
|
40
|
+
self.artifact_storage.write_batch_to_parquet_file(
|
|
41
|
+
batch_number=current_batch_number,
|
|
42
|
+
dataframe=formatted_data,
|
|
43
|
+
batch_stage=BatchStage.PROCESSORS_OUTPUTS,
|
|
44
|
+
subfolder=self.config.name,
|
|
45
|
+
)
|
|
46
|
+
else:
|
|
47
|
+
self.artifact_storage.write_parquet_file(
|
|
48
|
+
parquet_file_name=f"{self.config.name}.parquet",
|
|
49
|
+
dataframe=formatted_data,
|
|
50
|
+
batch_stage=BatchStage.PROCESSORS_OUTPUTS,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
return data
|
|
@@ -249,6 +249,17 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
249
249
|
except Exception as e:
|
|
250
250
|
raise DataDesignerProfilingError(f"🛑 Error profiling preview dataset: {e}")
|
|
251
251
|
|
|
252
|
+
if builder.artifact_storage.processors_outputs_path.exists():
|
|
253
|
+
processor_artifacts = {
|
|
254
|
+
processor_config.name: pd.read_parquet(
|
|
255
|
+
builder.artifact_storage.processors_outputs_path / f"{processor_config.name}.parquet",
|
|
256
|
+
dtype_backend="pyarrow",
|
|
257
|
+
).to_dict(orient="records")
|
|
258
|
+
for processor_config in config_builder.get_processor_configs()
|
|
259
|
+
}
|
|
260
|
+
else:
|
|
261
|
+
processor_artifacts = {}
|
|
262
|
+
|
|
252
263
|
if (
|
|
253
264
|
len(processed_dataset) > 0
|
|
254
265
|
and isinstance(analysis, DatasetProfilerResults)
|
|
@@ -259,6 +270,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
259
270
|
return PreviewResults(
|
|
260
271
|
dataset=processed_dataset,
|
|
261
272
|
analysis=analysis,
|
|
273
|
+
processor_artifacts=processor_artifacts,
|
|
262
274
|
config_builder=config_builder,
|
|
263
275
|
)
|
|
264
276
|
|
|
@@ -3,12 +3,15 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
6
8
|
import pandas as pd
|
|
7
9
|
|
|
8
10
|
from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
|
|
9
11
|
from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
10
12
|
from data_designer.config.utils.visualization import WithRecordSamplerMixin
|
|
11
13
|
from data_designer.engine.dataset_builders.artifact_storage import ArtifactStorage
|
|
14
|
+
from data_designer.engine.dataset_builders.errors import ArtifactStorageError
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class DatasetCreationResults(WithRecordSamplerMixin):
|
|
@@ -53,3 +56,36 @@ class DatasetCreationResults(WithRecordSamplerMixin):
|
|
|
53
56
|
A pandas DataFrame containing the full generated dataset.
|
|
54
57
|
"""
|
|
55
58
|
return self.artifact_storage.load_dataset()
|
|
59
|
+
|
|
60
|
+
def load_processor_dataset(self, processor_name: str) -> pd.DataFrame:
|
|
61
|
+
"""Load the dataset generated by a processor.
|
|
62
|
+
|
|
63
|
+
This only works for processors that write their artifacts in Parquet format.
|
|
64
|
+
|
|
65
|
+
Args:
|
|
66
|
+
processor_name: The name of the processor to load the dataset from.
|
|
67
|
+
|
|
68
|
+
Returns:
|
|
69
|
+
A pandas DataFrame containing the dataset generated by the processor.
|
|
70
|
+
"""
|
|
71
|
+
try:
|
|
72
|
+
dataset = self.artifact_storage.read_parquet_files(
|
|
73
|
+
self.artifact_storage.processors_outputs_path / processor_name
|
|
74
|
+
)
|
|
75
|
+
except Exception as e:
|
|
76
|
+
raise ArtifactStorageError(f"Failed to load dataset for processor {processor_name}: {e}")
|
|
77
|
+
|
|
78
|
+
return dataset
|
|
79
|
+
|
|
80
|
+
def get_path_to_processor_artifacts(self, processor_name: str) -> Path:
|
|
81
|
+
"""Get the path to the artifacts generated by a processor.
|
|
82
|
+
|
|
83
|
+
Args:
|
|
84
|
+
processor_name: The name of the processor to load the artifact from.
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
The path to the artifacts.
|
|
88
|
+
"""
|
|
89
|
+
if not self.artifact_storage.processors_outputs_path.exists():
|
|
90
|
+
raise ArtifactStorageError(f"Processor {processor_name} has no artifacts.")
|
|
91
|
+
return self.artifact_storage.processors_outputs_path / processor_name
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.5
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -144,12 +144,12 @@ preview.display_sample_record()
|
|
|
144
144
|
|
|
145
145
|
### 📚 Learn more
|
|
146
146
|
|
|
147
|
-
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
|
|
148
|
-
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
149
|
-
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
|
-
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
151
|
-
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
|
|
152
|
-
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
147
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
|
|
148
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
|
|
149
|
+
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
|
+
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
151
|
+
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
|
|
152
|
+
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
153
153
|
|
|
154
154
|
### 🔧 Configure models via CLI
|
|
155
155
|
|
|
@@ -161,7 +161,7 @@ data-designer config list # View current settings
|
|
|
161
161
|
|
|
162
162
|
### 🤝 Get involved
|
|
163
163
|
|
|
164
|
-
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
|
|
164
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
|
|
165
165
|
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
|
|
166
166
|
|
|
167
167
|
---
|
|
@@ -178,7 +178,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
|
|
|
178
178
|
|
|
179
179
|
```bibtex
|
|
180
180
|
@misc{nemo-data-designer,
|
|
181
|
-
author = {The NeMo Data Designer Team},
|
|
181
|
+
author = {The NeMo Data Designer Team, NVIDIA},
|
|
182
182
|
title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
|
|
183
183
|
howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
|
|
184
184
|
year = {2025},
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
data_designer/__init__.py,sha256=iCeqRnb640RrL2QpA630GY5Ng7JiDt83Vq0DwLnNugU,461
|
|
2
|
-
data_designer/_version.py,sha256=
|
|
2
|
+
data_designer/_version.py,sha256=rdxBMYpwzYxiWk08QbPLHSAxHoDfeKWwyaJIAM0lSic,704
|
|
3
3
|
data_designer/errors.py,sha256=Z4eN9XwzZvGRdBluSNoSqQYkPPzNQIDf0ET_OqWRZh8,179
|
|
4
4
|
data_designer/logging.py,sha256=OqRGvWNlGA3ebRFts7e5k-5GFwoAPaGXYQS4oEzVG0o,5354
|
|
5
5
|
data_designer/plugin_manager.py,sha256=eXtmmqyyoVHWO1zvlLvKQ-rTrONJxf9jhr4ZMzsXWSE,2610
|
|
@@ -39,11 +39,11 @@ data_designer/config/dataset_builders.py,sha256=1pNFy_pkQ5lJ6AVZ43AeTuSbz6yC_l7N
|
|
|
39
39
|
data_designer/config/datastore.py,sha256=brMylPuBsT7uDKSy7G59M7Zdx91RTYWMOVcdRVe5Wjs,7632
|
|
40
40
|
data_designer/config/default_model_settings.py,sha256=HAGyfYzT1fdWMpMSLeJuZZZQHKku2T9KJTOhpwS_5Ek,4577
|
|
41
41
|
data_designer/config/errors.py,sha256=MNMnqh8G1XzXAMeJ5ju6zkBiIH2aVgyITnzYJbGEwFY,461
|
|
42
|
-
data_designer/config/exports.py,sha256=
|
|
42
|
+
data_designer/config/exports.py,sha256=vDokNLxoBlaII_-TBIS4w65t-g-MX8ADV85arpOPBRA,4440
|
|
43
43
|
data_designer/config/interface.py,sha256=ery8a93pnCW1JPbgtiaRsMKSR8Q2o7rDmsZfVYbfkeE,1619
|
|
44
44
|
data_designer/config/models.py,sha256=kB9Ut9Y00V6nG9zKK2c4xIVZewn3vPPIU6deug_Rttc,11362
|
|
45
|
-
data_designer/config/preview_results.py,sha256=
|
|
46
|
-
data_designer/config/processors.py,sha256=
|
|
45
|
+
data_designer/config/preview_results.py,sha256=6FHBUJAxYEoLq8raCCkQYPUSJTQLvhXFMKciOBU_mVw,1411
|
|
46
|
+
data_designer/config/processors.py,sha256=Q1fCRoL7YSWAnLwJ6sGERwQXdJNx4By8WVyHhjwtd_8,3172
|
|
47
47
|
data_designer/config/sampler_constraints.py,sha256=Dxbjt5PNNmvm5CMp-Z5CYrfd6oeDeXOUnODR6FgvCDk,1187
|
|
48
48
|
data_designer/config/sampler_params.py,sha256=50OEhC1AF3EPMoMlpJGGZ72kXej5wsqcZiyt7J7Kx08,26614
|
|
49
49
|
data_designer/config/seed.py,sha256=tKzNUvHx-9JV8uPDUbQqx44tG88CAeCss_T8xFEPh5g,5547
|
|
@@ -61,8 +61,8 @@ data_designer/config/utils/io_helpers.py,sha256=Jl1ihaQM0K_SL86UfP0N1-y4KVph4z3S
|
|
|
61
61
|
data_designer/config/utils/misc.py,sha256=HVRvrbpdO5c_oPI-e_3hrS7cBJA1SaG8iHMLtWKVv8A,2526
|
|
62
62
|
data_designer/config/utils/numerical_helpers.py,sha256=tcm5x5qSURoZZHjN9Bm1-Jkct3G67QefXm10QQXDtlM,803
|
|
63
63
|
data_designer/config/utils/type_helpers.py,sha256=RvhDk4rxQKDOMBLqJiMM4IJXdLoNUf3uzW52vB5cqrg,4024
|
|
64
|
-
data_designer/config/utils/validation.py,sha256=
|
|
65
|
-
data_designer/config/utils/visualization.py,sha256=
|
|
64
|
+
data_designer/config/utils/validation.py,sha256=1MoVqrS_DofT0LDIrGpWTPi02chntZT1p2K0FIyUOzs,14463
|
|
65
|
+
data_designer/config/utils/visualization.py,sha256=X0R-EDW-yzIaYtK1ttLsCXEp6a6ubejvm_9xpO2UrMg,17599
|
|
66
66
|
data_designer/engine/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
67
67
|
data_designer/engine/configurable_task.py,sha256=GnaBG6xVBQ1ELpzumNctwKYZJvKKjh2LMKhws4W2GS4,3124
|
|
68
68
|
data_designer/engine/errors.py,sha256=DUoKhQCSwIBoLSQGv7dstzO3DFGDRqW3MBoWnRPcm1I,1262
|
|
@@ -88,8 +88,8 @@ data_designer/engine/column_generators/generators/validation.py,sha256=MbDFXzief
|
|
|
88
88
|
data_designer/engine/column_generators/utils/errors.py,sha256=ugNwaqnPdrPZI7YnKLbYwFjYUSm0WAzgaVu_u6i5Rc8,365
|
|
89
89
|
data_designer/engine/column_generators/utils/judge_score_factory.py,sha256=JRoaZgRGK24dH0zx7MNGSccK196tQK_l0sbwNkurg7c,2132
|
|
90
90
|
data_designer/engine/column_generators/utils/prompt_renderer.py,sha256=d4tbyPsgmFDikW3nxL5is9RNaajMkoPDCrfkQkxw7rc,4760
|
|
91
|
-
data_designer/engine/dataset_builders/artifact_storage.py,sha256=
|
|
92
|
-
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=
|
|
91
|
+
data_designer/engine/dataset_builders/artifact_storage.py,sha256=GCHuKuQ6Y_ePG515rsqc3NzQtN1v4pEV2L1I2H2_tx4,8451
|
|
92
|
+
data_designer/engine/dataset_builders/column_wise_builder.py,sha256=ljf-2fAKdry1UCVubhkhRWhoVlKZfK77ytwgkjuQ5VY,13267
|
|
93
93
|
data_designer/engine/dataset_builders/errors.py,sha256=1kChleChG4rASWIiL4Bel6Ox6aFZjQUrh5ogPt1CDWo,359
|
|
94
94
|
data_designer/engine/dataset_builders/multi_column_configs.py,sha256=t28fhI-WRIBohFnAJ80l5EAETEDB5rJ5RSWInMiRfyE,1619
|
|
95
95
|
data_designer/engine/dataset_builders/utils/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
@@ -125,8 +125,9 @@ data_designer/engine/processing/gsonschema/schema_transformers.py,sha256=__-dfrC
|
|
|
125
125
|
data_designer/engine/processing/gsonschema/types.py,sha256=-x_K2HrVnZ_Z7fzYl4T2Gd7QHf6B6ADvn7E7iYvw5Kc,313
|
|
126
126
|
data_designer/engine/processing/gsonschema/validators.py,sha256=5Jh864KnA5gWBeLbpz1cE5Kk_GMxI6kPWvunAbLI3vI,4704
|
|
127
127
|
data_designer/engine/processing/processors/base.py,sha256=WJl7_0dtiUppjfY-lrQ3lDiIgYqRDSEYUwSAQNN7nFE,548
|
|
128
|
-
data_designer/engine/processing/processors/drop_columns.py,sha256
|
|
129
|
-
data_designer/engine/processing/processors/registry.py,sha256=
|
|
128
|
+
data_designer/engine/processing/processors/drop_columns.py,sha256=MIb_CVrpoM3kyN5-8dHZrdFAAUiCCWgDEyQjAk8nZqE,2060
|
|
129
|
+
data_designer/engine/processing/processors/registry.py,sha256=nhB1O4b0wSUkWQeleV9l1MykwZD-dSvY0ydqmSscEY8,1056
|
|
130
|
+
data_designer/engine/processing/processors/schema_transform.py,sha256=amRIw69F5Mn6ZrJvov3ZCRXk-Vil1_adQ1_rC6VKELg,2233
|
|
130
131
|
data_designer/engine/registry/base.py,sha256=8h5MRPccLGSGcss3qFoQ-i7XGzvn8gdiRR0tYr7mDgk,3544
|
|
131
132
|
data_designer/engine/registry/data_designer_registry.py,sha256=0nO7JEezwc2wnnDRKAX5BZz6RhBI3-kNU3Eb1WAdCFI,1487
|
|
132
133
|
data_designer/engine/registry/errors.py,sha256=nO794QVy4DovKGKWEjycVDN9cdDlH-skbZLTb354M3Y,309
|
|
@@ -164,15 +165,15 @@ data_designer/engine/validators/remote.py,sha256=jtDIvWzfHh17m2ac_Fp93p49Th8RlkB
|
|
|
164
165
|
data_designer/engine/validators/sql.py,sha256=bxbyxPxDT9yuwjhABVEY40iR1pzWRFi65WU4tPgG2bE,2250
|
|
165
166
|
data_designer/essentials/__init__.py,sha256=eHuZFJTmeRf_b6KQZ2vZeqy1afJ7y7RMTm7q4Jrg58s,1012
|
|
166
167
|
data_designer/interface/__init__.py,sha256=9eG4WHKyrJcNoK4GEz6BCw_E0Ewo9elQoDN4TLMbAog,137
|
|
167
|
-
data_designer/interface/data_designer.py,sha256=
|
|
168
|
+
data_designer/interface/data_designer.py,sha256=O6PehBIdL4_2d9rFW86J9b3jfJ_CJmFId8T2AviM2zM,16844
|
|
168
169
|
data_designer/interface/errors.py,sha256=jagKT3tPUnYq4e3e6AkTnBkcayHyEfxjPMBzx-GEKe4,565
|
|
169
|
-
data_designer/interface/results.py,sha256=
|
|
170
|
+
data_designer/interface/results.py,sha256=zYVX589OUyFuB-8XLmjjdKk3hCDNKu189sH-gOOFreQ,3511
|
|
170
171
|
data_designer/plugins/__init__.py,sha256=c_V7q4QhfVoNf_uc9UwmXCsWqwtyWogI7YoN_0PzzE4,234
|
|
171
172
|
data_designer/plugins/errors.py,sha256=yPIHpSddEr-o9ZcNVibb2hI-73O15Kg_Od8SlmQlnRs,297
|
|
172
173
|
data_designer/plugins/plugin.py,sha256=7ErdUyrTdOb5PCBE3msdhTOrvQpldjOQw90-Bu4Bosc,2522
|
|
173
174
|
data_designer/plugins/registry.py,sha256=w0o7I3A5UpIaCiqSJIj3kv_dLlh7m_WHznP_O-X13-s,3018
|
|
174
|
-
data_designer-0.1.
|
|
175
|
-
data_designer-0.1.
|
|
176
|
-
data_designer-0.1.
|
|
177
|
-
data_designer-0.1.
|
|
178
|
-
data_designer-0.1.
|
|
175
|
+
data_designer-0.1.5.dist-info/METADATA,sha256=s4j9BlO8RDnExQPVbFCYZhY5FNI539DanL-sLEmwzGk,6710
|
|
176
|
+
data_designer-0.1.5.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
177
|
+
data_designer-0.1.5.dist-info/entry_points.txt,sha256=NWWWidyDxN6CYX6y664PhBYMhbaYTQTyprqfYAgkyCg,57
|
|
178
|
+
data_designer-0.1.5.dist-info/licenses/LICENSE,sha256=cSWJDwVqHyQgly8Zmt3pqXJ2eQbZVYwN9qd0NMssxXY,11336
|
|
179
|
+
data_designer-0.1.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|