data-designer 0.1.3__py3-none-any.whl → 0.1.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/_version.py +2 -2
- data_designer/config/analysis/column_profilers.py +4 -4
- data_designer/config/analysis/column_statistics.py +5 -5
- data_designer/config/analysis/dataset_profiler.py +6 -6
- data_designer/config/analysis/utils/errors.py +1 -1
- data_designer/config/analysis/utils/reporting.py +5 -5
- data_designer/config/base.py +2 -2
- data_designer/config/column_configs.py +8 -8
- data_designer/config/column_types.py +9 -5
- data_designer/config/config_builder.py +32 -27
- data_designer/config/data_designer_config.py +7 -7
- data_designer/config/datastore.py +4 -4
- data_designer/config/default_model_settings.py +4 -4
- data_designer/config/errors.py +1 -1
- data_designer/config/exports.py +128 -0
- data_designer/config/interface.py +6 -6
- data_designer/config/models.py +109 -5
- data_designer/config/preview_results.py +3 -3
- data_designer/config/processors.py +2 -2
- data_designer/config/sampler_constraints.py +1 -1
- data_designer/config/sampler_params.py +2 -2
- data_designer/config/seed.py +3 -3
- data_designer/config/utils/constants.py +1 -1
- data_designer/config/utils/errors.py +1 -1
- data_designer/config/utils/info.py +8 -4
- data_designer/config/utils/io_helpers.py +5 -5
- data_designer/config/utils/misc.py +3 -3
- data_designer/config/utils/numerical_helpers.py +1 -1
- data_designer/config/utils/type_helpers.py +7 -3
- data_designer/config/utils/validation.py +5 -5
- data_designer/config/utils/visualization.py +10 -10
- data_designer/config/validator_params.py +2 -2
- data_designer/engine/analysis/column_profilers/base.py +1 -1
- data_designer/engine/analysis/dataset_profiler.py +1 -1
- data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
- data_designer/engine/column_generators/generators/samplers.py +1 -1
- data_designer/engine/dataset_builders/artifact_storage.py +3 -3
- data_designer/engine/dataset_builders/column_wise_builder.py +1 -1
- data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
- data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
- data_designer/engine/errors.py +1 -1
- data_designer/engine/models/errors.py +1 -1
- data_designer/engine/models/facade.py +1 -1
- data_designer/engine/models/parsers/parser.py +2 -2
- data_designer/engine/models/recipes/response_recipes.py +1 -1
- data_designer/engine/processing/ginja/environment.py +1 -1
- data_designer/engine/processing/gsonschema/validators.py +1 -1
- data_designer/engine/resources/managed_dataset_repository.py +4 -4
- data_designer/engine/resources/managed_storage.py +1 -1
- data_designer/engine/sampling_gen/constraints.py +1 -1
- data_designer/engine/sampling_gen/data_sources/base.py +1 -1
- data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
- data_designer/engine/sampling_gen/entities/person.py +1 -1
- data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
- data_designer/engine/sampling_gen/people_gen.py +3 -3
- data_designer/engine/secret_resolver.py +1 -1
- data_designer/engine/validators/python.py +2 -2
- data_designer/essentials/__init__.py +20 -128
- data_designer/interface/data_designer.py +11 -19
- data_designer/logging.py +2 -2
- data_designer/plugin_manager.py +14 -26
- data_designer/plugins/registry.py +1 -1
- {data_designer-0.1.3.dist-info → data_designer-0.1.4.dist-info}/METADATA +2 -2
- {data_designer-0.1.3.dist-info → data_designer-0.1.4.dist-info}/RECORD +68 -67
- {data_designer-0.1.3.dist-info → data_designer-0.1.4.dist-info}/WHEEL +0 -0
- {data_designer-0.1.3.dist-info → data_designer-0.1.4.dist-info}/entry_points.txt +0 -0
- {data_designer-0.1.3.dist-info → data_designer-0.1.4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from abc import ABC, abstractmethod
|
|
5
|
-
from dataclasses import dataclass
|
|
6
|
-
from functools import cached_property
|
|
7
4
|
import logging
|
|
8
|
-
from pathlib import Path
|
|
9
5
|
import tempfile
|
|
10
6
|
import threading
|
|
11
7
|
import time
|
|
8
|
+
from abc import ABC, abstractmethod
|
|
9
|
+
from dataclasses import dataclass
|
|
10
|
+
from functools import cached_property
|
|
11
|
+
from pathlib import Path
|
|
12
12
|
from typing import Any
|
|
13
13
|
|
|
14
14
|
import duckdb
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
import logging
|
|
4
5
|
from abc import ABC, abstractmethod
|
|
5
6
|
from collections.abc import Iterator
|
|
6
7
|
from contextlib import contextmanager
|
|
7
|
-
import logging
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import IO
|
|
10
10
|
|
|
@@ -5,8 +5,8 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from typing import Type
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
from numpy.typing import NDArray
|
|
9
8
|
import pandas as pd
|
|
9
|
+
from numpy.typing import NDArray
|
|
10
10
|
|
|
11
11
|
from data_designer.config.base import ConfigBase
|
|
12
12
|
from data_designer.config.sampler_constraints import (
|
|
@@ -5,8 +5,8 @@ from abc import ABC, abstractmethod
|
|
|
5
5
|
from typing import Any, Generic, Optional, Type, TypeVar, Union
|
|
6
6
|
|
|
7
7
|
import numpy as np
|
|
8
|
-
from numpy.typing import NDArray
|
|
9
8
|
import pandas as pd
|
|
9
|
+
from numpy.typing import NDArray
|
|
10
10
|
from scipy import stats
|
|
11
11
|
|
|
12
12
|
from data_designer.config.sampler_params import SamplerParamsT
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from datetime import date, timedelta
|
|
5
4
|
import random
|
|
5
|
+
from datetime import date, timedelta
|
|
6
6
|
from typing import Any, Literal, TypeAlias
|
|
7
7
|
|
|
8
8
|
from data_designer.config.utils.constants import LOCALES_WITH_MANAGED_DATASETS
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from pathlib import Path
|
|
5
4
|
import random
|
|
5
|
+
from pathlib import Path
|
|
6
6
|
from typing import Optional
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
@@ -3,15 +3,15 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
+
import random
|
|
7
|
+
import uuid
|
|
6
8
|
from abc import ABC, abstractmethod
|
|
7
9
|
from collections.abc import Callable
|
|
8
10
|
from copy import deepcopy
|
|
9
|
-
import random
|
|
10
11
|
from typing import TYPE_CHECKING, Any, Union
|
|
11
|
-
import uuid
|
|
12
12
|
|
|
13
|
-
from faker import Faker
|
|
14
13
|
import pandas as pd
|
|
14
|
+
from faker import Faker
|
|
15
15
|
|
|
16
16
|
from data_designer.config.utils.constants import AVAILABLE_LOCALES, DEFAULT_AGE_RANGE
|
|
17
17
|
from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from collections.abc import Sequence
|
|
5
4
|
import json
|
|
6
5
|
import logging
|
|
7
6
|
import os
|
|
7
|
+
from collections.abc import Sequence
|
|
8
8
|
from pathlib import Path
|
|
9
9
|
from typing import Protocol
|
|
10
10
|
|
|
@@ -2,12 +2,12 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import ast
|
|
5
|
-
from collections import defaultdict
|
|
6
5
|
import logging
|
|
7
|
-
from pathlib import Path
|
|
8
6
|
import re
|
|
9
7
|
import subprocess
|
|
10
8
|
import tempfile
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
from pathlib import Path
|
|
11
11
|
from uuid import uuid4
|
|
12
12
|
|
|
13
13
|
import pandas as pd
|
|
@@ -1,137 +1,29 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
-
|
|
3
|
+
|
|
4
|
+
from data_designer.config.default_model_settings import resolve_seed_default_model_settings
|
|
5
|
+
from data_designer.config.exports import * # noqa: F403
|
|
6
|
+
from data_designer.config.validator_params import LocalCallableValidatorParams
|
|
7
|
+
from data_designer.interface.data_designer import DataDesigner
|
|
8
|
+
from data_designer.logging import LoggingConfig, configure_logging
|
|
4
9
|
|
|
5
10
|
configure_logging(LoggingConfig.default())
|
|
6
11
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
LLMCodeColumnConfig,
|
|
11
|
-
LLMJudgeColumnConfig,
|
|
12
|
-
LLMStructuredColumnConfig,
|
|
13
|
-
LLMTextColumnConfig,
|
|
14
|
-
SamplerColumnConfig,
|
|
15
|
-
Score,
|
|
16
|
-
SeedDatasetColumnConfig,
|
|
17
|
-
ValidationColumnConfig,
|
|
18
|
-
)
|
|
19
|
-
from ..config.column_types import DataDesignerColumnType
|
|
20
|
-
from ..config.config_builder import DataDesignerConfigBuilder
|
|
21
|
-
from ..config.data_designer_config import DataDesignerConfig
|
|
22
|
-
from ..config.dataset_builders import BuildStage
|
|
23
|
-
from ..config.datastore import DatastoreSettings
|
|
24
|
-
from ..config.models import (
|
|
25
|
-
ImageContext,
|
|
26
|
-
ImageFormat,
|
|
27
|
-
InferenceParameters,
|
|
28
|
-
ManualDistribution,
|
|
29
|
-
ManualDistributionParams,
|
|
30
|
-
Modality,
|
|
31
|
-
ModalityContext,
|
|
32
|
-
ModalityDataType,
|
|
33
|
-
ModelConfig,
|
|
34
|
-
UniformDistribution,
|
|
35
|
-
UniformDistributionParams,
|
|
36
|
-
)
|
|
37
|
-
from ..config.processors import DropColumnsProcessorConfig, ProcessorType
|
|
38
|
-
from ..config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
|
|
39
|
-
from ..config.sampler_params import (
|
|
40
|
-
BernoulliMixtureSamplerParams,
|
|
41
|
-
BernoulliSamplerParams,
|
|
42
|
-
BinomialSamplerParams,
|
|
43
|
-
CategorySamplerParams,
|
|
44
|
-
DatetimeSamplerParams,
|
|
45
|
-
GaussianSamplerParams,
|
|
46
|
-
PersonFromFakerSamplerParams,
|
|
47
|
-
PersonSamplerParams,
|
|
48
|
-
PoissonSamplerParams,
|
|
49
|
-
SamplerType,
|
|
50
|
-
ScipySamplerParams,
|
|
51
|
-
SubcategorySamplerParams,
|
|
52
|
-
TimeDeltaSamplerParams,
|
|
53
|
-
UniformSamplerParams,
|
|
54
|
-
UUIDSamplerParams,
|
|
55
|
-
)
|
|
56
|
-
from ..config.seed import DatastoreSeedDatasetReference, IndexRange, PartitionBlock, SamplingStrategy, SeedConfig
|
|
57
|
-
from ..config.utils.code_lang import CodeLang
|
|
58
|
-
from ..config.utils.info import InfoType
|
|
59
|
-
from ..config.utils.misc import can_run_data_designer_locally
|
|
60
|
-
from ..config.validator_params import (
|
|
61
|
-
CodeValidatorParams,
|
|
62
|
-
RemoteValidatorParams,
|
|
63
|
-
ValidatorType,
|
|
64
|
-
)
|
|
12
|
+
# Resolve default model settings on import to ensure they are available when the library is used.
|
|
13
|
+
resolve_seed_default_model_settings()
|
|
14
|
+
|
|
65
15
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
16
|
+
def get_essentials_exports() -> list[str]:
|
|
17
|
+
logging = [
|
|
18
|
+
configure_logging.__name__,
|
|
19
|
+
LoggingConfig.__name__,
|
|
20
|
+
]
|
|
21
|
+
local = [
|
|
22
|
+
DataDesigner.__name__,
|
|
23
|
+
LocalCallableValidatorParams.__name__,
|
|
24
|
+
]
|
|
72
25
|
|
|
73
|
-
|
|
74
|
-
except ModuleNotFoundError:
|
|
75
|
-
pass
|
|
26
|
+
return logging + local + get_config_exports() # noqa: F405
|
|
76
27
|
|
|
77
|
-
__all__ = [
|
|
78
|
-
"BernoulliMixtureSamplerParams",
|
|
79
|
-
"BernoulliSamplerParams",
|
|
80
|
-
"BinomialSamplerParams",
|
|
81
|
-
"CategorySamplerParams",
|
|
82
|
-
"CodeLang",
|
|
83
|
-
"CodeValidatorParams",
|
|
84
|
-
"ColumnInequalityConstraint",
|
|
85
|
-
"configure_logging",
|
|
86
|
-
"DataDesignerColumnType",
|
|
87
|
-
"DataDesignerConfig",
|
|
88
|
-
"DataDesignerConfigBuilder",
|
|
89
|
-
"BuildStage",
|
|
90
|
-
"DatastoreSeedDatasetReference",
|
|
91
|
-
"DatastoreSettings",
|
|
92
|
-
"DatetimeSamplerParams",
|
|
93
|
-
"DropColumnsProcessorConfig",
|
|
94
|
-
"ExpressionColumnConfig",
|
|
95
|
-
"GaussianSamplerParams",
|
|
96
|
-
"IndexRange",
|
|
97
|
-
"InfoType",
|
|
98
|
-
"ImageContext",
|
|
99
|
-
"ImageFormat",
|
|
100
|
-
"InferenceParameters",
|
|
101
|
-
"JudgeScoreProfilerConfig",
|
|
102
|
-
"LLMCodeColumnConfig",
|
|
103
|
-
"LLMJudgeColumnConfig",
|
|
104
|
-
"LLMStructuredColumnConfig",
|
|
105
|
-
"LLMTextColumnConfig",
|
|
106
|
-
"LoggingConfig",
|
|
107
|
-
"ManualDistribution",
|
|
108
|
-
"ManualDistributionParams",
|
|
109
|
-
"Modality",
|
|
110
|
-
"ModalityContext",
|
|
111
|
-
"ModalityDataType",
|
|
112
|
-
"ModelConfig",
|
|
113
|
-
"PartitionBlock",
|
|
114
|
-
"PersonSamplerParams",
|
|
115
|
-
"PersonFromFakerSamplerParams",
|
|
116
|
-
"PoissonSamplerParams",
|
|
117
|
-
"ProcessorType",
|
|
118
|
-
"RemoteValidatorParams",
|
|
119
|
-
"SamplerColumnConfig",
|
|
120
|
-
"SamplerType",
|
|
121
|
-
"SamplingStrategy",
|
|
122
|
-
"ScalarInequalityConstraint",
|
|
123
|
-
"ScipySamplerParams",
|
|
124
|
-
"Score",
|
|
125
|
-
"SeedConfig",
|
|
126
|
-
"SeedDatasetColumnConfig",
|
|
127
|
-
"SubcategorySamplerParams",
|
|
128
|
-
"TimeDeltaSamplerParams",
|
|
129
|
-
"UniformDistribution",
|
|
130
|
-
"UniformDistributionParams",
|
|
131
|
-
"UniformSamplerParams",
|
|
132
|
-
"UUIDSamplerParams",
|
|
133
|
-
"ValidationColumnConfig",
|
|
134
|
-
"ValidatorType",
|
|
135
|
-
]
|
|
136
28
|
|
|
137
|
-
__all__
|
|
29
|
+
__all__ = get_essentials_exports()
|
|
@@ -13,7 +13,6 @@ from data_designer.config.default_model_settings import (
|
|
|
13
13
|
get_default_model_providers_missing_api_keys,
|
|
14
14
|
get_default_provider_name,
|
|
15
15
|
get_default_providers,
|
|
16
|
-
resolve_seed_default_model_settings,
|
|
17
16
|
)
|
|
18
17
|
from data_designer.config.interface import DataDesignerInterface
|
|
19
18
|
from data_designer.config.models import (
|
|
@@ -31,7 +30,6 @@ from data_designer.config.utils.constants import (
|
|
|
31
30
|
)
|
|
32
31
|
from data_designer.config.utils.info import InfoType, InterfaceInfo
|
|
33
32
|
from data_designer.config.utils.io_helpers import write_seed_dataset
|
|
34
|
-
from data_designer.config.utils.misc import can_run_data_designer_locally
|
|
35
33
|
from data_designer.engine.analysis.dataset_profiler import (
|
|
36
34
|
DataDesignerDatasetProfiler,
|
|
37
35
|
DatasetProfilerConfig,
|
|
@@ -66,11 +64,6 @@ DEFAULT_BUFFER_SIZE = 1000
|
|
|
66
64
|
logger = logging.getLogger(__name__)
|
|
67
65
|
|
|
68
66
|
|
|
69
|
-
# Resolve default model settings on import to ensure they are available when the library is used.
|
|
70
|
-
if can_run_data_designer_locally():
|
|
71
|
-
resolve_seed_default_model_settings()
|
|
72
|
-
|
|
73
|
-
|
|
74
67
|
class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
75
68
|
"""Main interface for creating datasets with Data Designer.
|
|
76
69
|
|
|
@@ -315,18 +308,17 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
315
308
|
|
|
316
309
|
def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
|
|
317
310
|
if model_providers is None:
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
return model_providers
|
|
311
|
+
model_providers = get_default_providers()
|
|
312
|
+
missing_api_keys = get_default_model_providers_missing_api_keys()
|
|
313
|
+
if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
|
|
314
|
+
logger.warning(
|
|
315
|
+
"🚨 You are trying to use a default model provider but your API keys are missing."
|
|
316
|
+
"\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
|
|
317
|
+
"\n\t\t\tAlternatively, you can provide your own model providers during Data Designer object initialization."
|
|
318
|
+
"\n\t\t\tSee https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-providers/ for more information."
|
|
319
|
+
)
|
|
320
|
+
self._get_interface_info(model_providers).display(InfoType.MODEL_PROVIDERS)
|
|
321
|
+
return model_providers
|
|
330
322
|
return model_providers or []
|
|
331
323
|
|
|
332
324
|
def _create_dataset_builder(
|
data_designer/logging.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from dataclasses import dataclass, field
|
|
5
4
|
import logging
|
|
6
|
-
from pathlib import Path
|
|
7
5
|
import random
|
|
8
6
|
import sys
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from pathlib import Path
|
|
9
9
|
from typing import TextIO, Union
|
|
10
10
|
|
|
11
11
|
from pythonjsonlogger import jsonlogger
|
data_designer/plugin_manager.py
CHANGED
|
@@ -6,25 +6,16 @@ from __future__ import annotations
|
|
|
6
6
|
from enum import Enum
|
|
7
7
|
from typing import TYPE_CHECKING, Type, TypeAlias
|
|
8
8
|
|
|
9
|
-
from .
|
|
9
|
+
from data_designer.plugins.plugin import PluginType
|
|
10
|
+
from data_designer.plugins.registry import PluginRegistry
|
|
10
11
|
|
|
11
12
|
if TYPE_CHECKING:
|
|
12
13
|
from data_designer.plugins.plugin import Plugin
|
|
13
14
|
|
|
14
15
|
|
|
15
|
-
if can_run_data_designer_locally():
|
|
16
|
-
from data_designer.plugins.plugin import PluginType
|
|
17
|
-
from data_designer.plugins.registry import PluginRegistry
|
|
18
|
-
|
|
19
|
-
|
|
20
16
|
class PluginManager:
|
|
21
17
|
def __init__(self):
|
|
22
|
-
|
|
23
|
-
self._plugins_supported = True
|
|
24
|
-
self._plugin_registry = PluginRegistry()
|
|
25
|
-
else:
|
|
26
|
-
self._plugins_supported = False
|
|
27
|
-
self._plugin_registry = None
|
|
18
|
+
self._plugin_registry = PluginRegistry()
|
|
28
19
|
|
|
29
20
|
def get_column_generator_plugins(self) -> list[Plugin]:
|
|
30
21
|
"""Get all column generator plugins.
|
|
@@ -32,7 +23,7 @@ class PluginManager:
|
|
|
32
23
|
Returns:
|
|
33
24
|
A list of all column generator plugins.
|
|
34
25
|
"""
|
|
35
|
-
return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
|
|
26
|
+
return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
|
|
36
27
|
|
|
37
28
|
def get_column_generator_plugin_if_exists(self, plugin_name: str) -> Plugin | None:
|
|
38
29
|
"""Get a column generator plugin by name if it exists.
|
|
@@ -43,9 +34,8 @@ class PluginManager:
|
|
|
43
34
|
Returns:
|
|
44
35
|
The plugin if found, otherwise None.
|
|
45
36
|
"""
|
|
46
|
-
if self.
|
|
37
|
+
if self._plugin_registry.plugin_exists(plugin_name):
|
|
47
38
|
return self._plugin_registry.get_plugin(plugin_name)
|
|
48
|
-
return None
|
|
49
39
|
|
|
50
40
|
def get_plugin_column_types(self, enum_type: Type[Enum], required_resources: list[str] | None = None) -> list[Enum]:
|
|
51
41
|
"""Get a list of plugin column types.
|
|
@@ -58,13 +48,12 @@ class PluginManager:
|
|
|
58
48
|
A list of plugin column types.
|
|
59
49
|
"""
|
|
60
50
|
type_list = []
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
type_list.append(enum_type(plugin.name))
|
|
51
|
+
for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
|
|
52
|
+
if required_resources:
|
|
53
|
+
task_required_resources = plugin.task_cls.metadata().required_resources or []
|
|
54
|
+
if not all(resource in task_required_resources for resource in required_resources):
|
|
55
|
+
continue
|
|
56
|
+
type_list.append(enum_type(plugin.name))
|
|
68
57
|
return type_list
|
|
69
58
|
|
|
70
59
|
def inject_into_column_config_type_union(self, column_config_type: Type[TypeAlias]) -> Type[TypeAlias]:
|
|
@@ -76,8 +65,7 @@ class PluginManager:
|
|
|
76
65
|
Returns:
|
|
77
66
|
The column config type with plugins injected.
|
|
78
67
|
"""
|
|
79
|
-
|
|
80
|
-
column_config_type
|
|
81
|
-
|
|
82
|
-
)
|
|
68
|
+
column_config_type = self._plugin_registry.add_plugin_types_to_union(
|
|
69
|
+
column_config_type, PluginType.COLUMN_GENERATOR
|
|
70
|
+
)
|
|
83
71
|
return column_config_type
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from importlib.metadata import entry_points
|
|
5
4
|
import logging
|
|
6
5
|
import os
|
|
7
6
|
import threading
|
|
7
|
+
from importlib.metadata import entry_points
|
|
8
8
|
from typing import Type, TypeAlias
|
|
9
9
|
|
|
10
10
|
from typing_extensions import Self
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.4
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -148,7 +148,7 @@ preview.display_sample_record()
|
|
|
148
148
|
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
149
149
|
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
150
|
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
151
|
-
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
|
|
151
|
+
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
|
|
152
152
|
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
153
153
|
|
|
154
154
|
### 🔧 Configure models via CLI
|