data-designer 0.1.2__py3-none-any.whl → 0.1.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. data_designer/_version.py +2 -2
  2. data_designer/config/analysis/column_profilers.py +4 -4
  3. data_designer/config/analysis/column_statistics.py +5 -5
  4. data_designer/config/analysis/dataset_profiler.py +6 -6
  5. data_designer/config/analysis/utils/errors.py +1 -1
  6. data_designer/config/analysis/utils/reporting.py +5 -5
  7. data_designer/config/base.py +2 -2
  8. data_designer/config/column_configs.py +8 -8
  9. data_designer/config/column_types.py +9 -5
  10. data_designer/config/config_builder.py +32 -27
  11. data_designer/config/data_designer_config.py +7 -7
  12. data_designer/config/datastore.py +4 -4
  13. data_designer/config/default_model_settings.py +4 -4
  14. data_designer/config/errors.py +1 -1
  15. data_designer/config/exports.py +128 -0
  16. data_designer/config/interface.py +6 -6
  17. data_designer/config/models.py +109 -5
  18. data_designer/config/preview_results.py +3 -3
  19. data_designer/config/processors.py +2 -2
  20. data_designer/config/sampler_constraints.py +1 -1
  21. data_designer/config/sampler_params.py +2 -5
  22. data_designer/config/seed.py +3 -3
  23. data_designer/config/utils/constants.py +1 -1
  24. data_designer/config/utils/errors.py +1 -1
  25. data_designer/config/utils/info.py +8 -4
  26. data_designer/config/utils/io_helpers.py +5 -5
  27. data_designer/config/utils/misc.py +3 -3
  28. data_designer/config/utils/numerical_helpers.py +1 -1
  29. data_designer/config/utils/type_helpers.py +7 -3
  30. data_designer/config/utils/validation.py +5 -5
  31. data_designer/config/utils/visualization.py +10 -10
  32. data_designer/config/validator_params.py +2 -2
  33. data_designer/engine/analysis/column_profilers/base.py +1 -1
  34. data_designer/engine/analysis/dataset_profiler.py +1 -1
  35. data_designer/engine/analysis/utils/judge_score_processing.py +1 -1
  36. data_designer/engine/column_generators/generators/samplers.py +1 -1
  37. data_designer/engine/dataset_builders/artifact_storage.py +16 -2
  38. data_designer/engine/dataset_builders/column_wise_builder.py +3 -3
  39. data_designer/engine/dataset_builders/utils/concurrency.py +1 -1
  40. data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +1 -1
  41. data_designer/engine/errors.py +1 -1
  42. data_designer/engine/models/errors.py +1 -1
  43. data_designer/engine/models/facade.py +1 -1
  44. data_designer/engine/models/parsers/parser.py +2 -2
  45. data_designer/engine/models/recipes/response_recipes.py +1 -1
  46. data_designer/engine/processing/ginja/environment.py +1 -1
  47. data_designer/engine/processing/gsonschema/validators.py +1 -1
  48. data_designer/engine/resources/managed_dataset_repository.py +4 -4
  49. data_designer/engine/resources/managed_storage.py +1 -1
  50. data_designer/engine/sampling_gen/constraints.py +1 -1
  51. data_designer/engine/sampling_gen/data_sources/base.py +1 -1
  52. data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +31 -9
  53. data_designer/engine/sampling_gen/entities/email_address_utils.py +1 -1
  54. data_designer/engine/sampling_gen/entities/national_id_utils.py +1 -1
  55. data_designer/engine/sampling_gen/entities/person.py +1 -1
  56. data_designer/engine/sampling_gen/entities/phone_number.py +1 -1
  57. data_designer/engine/sampling_gen/people_gen.py +3 -3
  58. data_designer/engine/secret_resolver.py +1 -1
  59. data_designer/engine/validators/python.py +2 -2
  60. data_designer/essentials/__init__.py +20 -128
  61. data_designer/interface/data_designer.py +16 -20
  62. data_designer/logging.py +2 -2
  63. data_designer/plugin_manager.py +14 -26
  64. data_designer/plugins/registry.py +1 -1
  65. {data_designer-0.1.2.dist-info → data_designer-0.1.4.dist-info}/METADATA +2 -2
  66. {data_designer-0.1.2.dist-info → data_designer-0.1.4.dist-info}/RECORD +69 -68
  67. {data_designer-0.1.2.dist-info → data_designer-0.1.4.dist-info}/WHEEL +1 -1
  68. {data_designer-0.1.2.dist-info → data_designer-0.1.4.dist-info}/entry_points.txt +0 -0
  69. {data_designer-0.1.2.dist-info → data_designer-0.1.4.dist-info}/licenses/LICENSE +0 -0
@@ -4,12 +4,12 @@
4
4
  from functools import reduce
5
5
  from typing import Optional
6
6
 
7
+ import marko
7
8
  from lxml import etree
8
9
  from lxml.etree import _Element
9
- import marko
10
10
 
11
- from data_designer.engine.models.parsers.postprocessors import merge_text_blocks
12
11
  import data_designer.engine.models.parsers.tag_parsers as tp
12
+ from data_designer.engine.models.parsers.postprocessors import merge_text_blocks
13
13
  from data_designer.engine.models.parsers.types import (
14
14
  LLMStructuredResponse,
15
15
  PostProcessor,
@@ -1,8 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from collections.abc import Callable
5
4
  import json
5
+ from collections.abc import Callable
6
6
 
7
7
  from pydantic import BaseModel
8
8
 
@@ -1,9 +1,9 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ import re
4
5
  from collections.abc import Callable
5
6
  from functools import partial, wraps
6
- import re
7
7
  from typing import Any
8
8
 
9
9
  from jinja2 import meta
@@ -1,8 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from copy import deepcopy
5
4
  import logging
5
+ from copy import deepcopy
6
6
  from typing import Any, overload
7
7
 
8
8
  from jsonschema import Draft202012Validator, ValidationError, validators
@@ -1,14 +1,14 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from abc import ABC, abstractmethod
5
- from dataclasses import dataclass
6
- from functools import cached_property
7
4
  import logging
8
- from pathlib import Path
9
5
  import tempfile
10
6
  import threading
11
7
  import time
8
+ from abc import ABC, abstractmethod
9
+ from dataclasses import dataclass
10
+ from functools import cached_property
11
+ from pathlib import Path
12
12
  from typing import Any
13
13
 
14
14
  import duckdb
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ import logging
4
5
  from abc import ABC, abstractmethod
5
6
  from collections.abc import Iterator
6
7
  from contextlib import contextmanager
7
- import logging
8
8
  from pathlib import Path
9
9
  from typing import IO
10
10
 
@@ -5,8 +5,8 @@ from abc import ABC, abstractmethod
5
5
  from typing import Type
6
6
 
7
7
  import numpy as np
8
- from numpy.typing import NDArray
9
8
  import pandas as pd
9
+ from numpy.typing import NDArray
10
10
 
11
11
  from data_designer.config.base import ConfigBase
12
12
  from data_designer.config.sampler_constraints import (
@@ -5,8 +5,8 @@ from abc import ABC, abstractmethod
5
5
  from typing import Any, Generic, Optional, Type, TypeVar, Union
6
6
 
7
7
  import numpy as np
8
- from numpy.typing import NDArray
9
8
  import pandas as pd
9
+ from numpy.typing import NDArray
10
10
  from scipy import stats
11
11
 
12
12
  from data_designer.config.sampler_params import SamplerParamsT
@@ -14,6 +14,7 @@ REQUIRED_FIELDS = {"first_name", "last_name", "age", "locale"}
14
14
 
15
15
 
16
16
  PII_FIELDS = [
17
+ # Core demographic fields
17
18
  "uuid",
18
19
  "first_name",
19
20
  "middle_name",
@@ -22,25 +23,38 @@ PII_FIELDS = [
22
23
  "age",
23
24
  "birth_date",
24
25
  "marital_status",
25
- "street_name",
26
- "street_number",
27
- "unit",
28
26
  "postcode",
29
- "region",
30
27
  "city",
31
- "district",
28
+ "region",
32
29
  "country",
33
- "area",
34
- "zone",
30
+ "locale",
35
31
  "bachelors_field",
36
- "education_degree",
37
32
  "education_level",
38
33
  "occupation",
39
- "locale",
34
+ "national_id",
35
+ # US-specific fields
36
+ "street_name",
37
+ "street_number",
38
+ "unit",
39
+ "state",
40
+ "email_address",
41
+ "phone_number",
42
+ # Japan-specific fields
43
+ "area",
44
+ "prefecture",
45
+ "zone",
46
+ # India-specific fields
47
+ "district",
48
+ "religion",
49
+ "education_degree",
50
+ "first_language",
51
+ "second_language",
52
+ "third_language",
40
53
  ]
41
54
 
42
55
 
43
56
  PERSONA_FIELDS = [
57
+ # Core persona fields
44
58
  "persona",
45
59
  "career_goals_and_ambitions",
46
60
  "arts_persona",
@@ -61,4 +75,12 @@ PERSONA_FIELDS = [
61
75
  "extraversion",
62
76
  "agreeableness",
63
77
  "neuroticism",
78
+ # Japan-specific persona fields
79
+ "aspects",
80
+ "digital_skills",
81
+ # India-specific persona fields
82
+ "linguistic_persona",
83
+ "religious_persona",
84
+ "linguistic_background",
85
+ "religious_background",
64
86
  ]
@@ -1,9 +1,9 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from datetime import date
5
4
  import random
6
5
  import re
6
+ from datetime import date
7
7
 
8
8
  import anyascii
9
9
 
@@ -1,8 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from datetime import date
5
4
  import random
5
+ from datetime import date
6
6
 
7
7
  SSN_RANDOMIZATION_DATE = date(2011, 6, 25)
8
8
 
@@ -1,8 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from datetime import date, timedelta
5
4
  import random
5
+ from datetime import date, timedelta
6
6
  from typing import Any, Literal, TypeAlias
7
7
 
8
8
  from data_designer.config.utils.constants import LOCALES_WITH_MANAGED_DATASETS
@@ -1,8 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from pathlib import Path
5
4
  import random
5
+ from pathlib import Path
6
6
  from typing import Optional
7
7
 
8
8
  import pandas as pd
@@ -3,15 +3,15 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
+ import random
7
+ import uuid
6
8
  from abc import ABC, abstractmethod
7
9
  from collections.abc import Callable
8
10
  from copy import deepcopy
9
- import random
10
11
  from typing import TYPE_CHECKING, Any, Union
11
- import uuid
12
12
 
13
- from faker import Faker
14
13
  import pandas as pd
14
+ from faker import Faker
15
15
 
16
16
  from data_designer.config.utils.constants import AVAILABLE_LOCALES, DEFAULT_AGE_RANGE
17
17
  from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from collections.abc import Sequence
5
4
  import json
6
5
  import logging
7
6
  import os
7
+ from collections.abc import Sequence
8
8
  from pathlib import Path
9
9
  from typing import Protocol
10
10
 
@@ -2,12 +2,12 @@
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  import ast
5
- from collections import defaultdict
6
5
  import logging
7
- from pathlib import Path
8
6
  import re
9
7
  import subprocess
10
8
  import tempfile
9
+ from collections import defaultdict
10
+ from pathlib import Path
11
11
  from uuid import uuid4
12
12
 
13
13
  import pandas as pd
@@ -1,137 +1,29 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
- from ..logging import LoggingConfig, configure_logging
3
+
4
+ from data_designer.config.default_model_settings import resolve_seed_default_model_settings
5
+ from data_designer.config.exports import * # noqa: F403
6
+ from data_designer.config.validator_params import LocalCallableValidatorParams
7
+ from data_designer.interface.data_designer import DataDesigner
8
+ from data_designer.logging import LoggingConfig, configure_logging
4
9
 
5
10
  configure_logging(LoggingConfig.default())
6
11
 
7
- from ..config.analysis.column_profilers import JudgeScoreProfilerConfig
8
- from ..config.column_configs import (
9
- ExpressionColumnConfig,
10
- LLMCodeColumnConfig,
11
- LLMJudgeColumnConfig,
12
- LLMStructuredColumnConfig,
13
- LLMTextColumnConfig,
14
- SamplerColumnConfig,
15
- Score,
16
- SeedDatasetColumnConfig,
17
- ValidationColumnConfig,
18
- )
19
- from ..config.column_types import DataDesignerColumnType
20
- from ..config.config_builder import DataDesignerConfigBuilder
21
- from ..config.data_designer_config import DataDesignerConfig
22
- from ..config.dataset_builders import BuildStage
23
- from ..config.datastore import DatastoreSettings
24
- from ..config.models import (
25
- ImageContext,
26
- ImageFormat,
27
- InferenceParameters,
28
- ManualDistribution,
29
- ManualDistributionParams,
30
- Modality,
31
- ModalityContext,
32
- ModalityDataType,
33
- ModelConfig,
34
- UniformDistribution,
35
- UniformDistributionParams,
36
- )
37
- from ..config.processors import DropColumnsProcessorConfig, ProcessorType
38
- from ..config.sampler_constraints import ColumnInequalityConstraint, ScalarInequalityConstraint
39
- from ..config.sampler_params import (
40
- BernoulliMixtureSamplerParams,
41
- BernoulliSamplerParams,
42
- BinomialSamplerParams,
43
- CategorySamplerParams,
44
- DatetimeSamplerParams,
45
- GaussianSamplerParams,
46
- PersonFromFakerSamplerParams,
47
- PersonSamplerParams,
48
- PoissonSamplerParams,
49
- SamplerType,
50
- ScipySamplerParams,
51
- SubcategorySamplerParams,
52
- TimeDeltaSamplerParams,
53
- UniformSamplerParams,
54
- UUIDSamplerParams,
55
- )
56
- from ..config.seed import DatastoreSeedDatasetReference, IndexRange, PartitionBlock, SamplingStrategy, SeedConfig
57
- from ..config.utils.code_lang import CodeLang
58
- from ..config.utils.info import InfoType
59
- from ..config.utils.misc import can_run_data_designer_locally
60
- from ..config.validator_params import (
61
- CodeValidatorParams,
62
- RemoteValidatorParams,
63
- ValidatorType,
64
- )
12
+ # Resolve default model settings on import to ensure they are available when the library is used.
13
+ resolve_seed_default_model_settings()
14
+
65
15
 
66
- local_library_imports = []
67
- try:
68
- if can_run_data_designer_locally():
69
- from ..config.validator_params import LocalCallableValidatorParams # noqa: F401
70
- from ..engine.model_provider import ModelProvider # noqa: F401
71
- from ..interface.data_designer import DataDesigner # noqa: F401
16
+ def get_essentials_exports() -> list[str]:
17
+ logging = [
18
+ configure_logging.__name__,
19
+ LoggingConfig.__name__,
20
+ ]
21
+ local = [
22
+ DataDesigner.__name__,
23
+ LocalCallableValidatorParams.__name__,
24
+ ]
72
25
 
73
- local_library_imports = ["DataDesigner", "LocalCallableValidatorParams", "ModelProvider"]
74
- except ModuleNotFoundError:
75
- pass
26
+ return logging + local + get_config_exports() # noqa: F405
76
27
 
77
- __all__ = [
78
- "BernoulliMixtureSamplerParams",
79
- "BernoulliSamplerParams",
80
- "BinomialSamplerParams",
81
- "CategorySamplerParams",
82
- "CodeLang",
83
- "CodeValidatorParams",
84
- "ColumnInequalityConstraint",
85
- "configure_logging",
86
- "DataDesignerColumnType",
87
- "DataDesignerConfig",
88
- "DataDesignerConfigBuilder",
89
- "BuildStage",
90
- "DatastoreSeedDatasetReference",
91
- "DatastoreSettings",
92
- "DatetimeSamplerParams",
93
- "DropColumnsProcessorConfig",
94
- "ExpressionColumnConfig",
95
- "GaussianSamplerParams",
96
- "IndexRange",
97
- "InfoType",
98
- "ImageContext",
99
- "ImageFormat",
100
- "InferenceParameters",
101
- "JudgeScoreProfilerConfig",
102
- "LLMCodeColumnConfig",
103
- "LLMJudgeColumnConfig",
104
- "LLMStructuredColumnConfig",
105
- "LLMTextColumnConfig",
106
- "LoggingConfig",
107
- "ManualDistribution",
108
- "ManualDistributionParams",
109
- "Modality",
110
- "ModalityContext",
111
- "ModalityDataType",
112
- "ModelConfig",
113
- "PartitionBlock",
114
- "PersonSamplerParams",
115
- "PersonFromFakerSamplerParams",
116
- "PoissonSamplerParams",
117
- "ProcessorType",
118
- "RemoteValidatorParams",
119
- "SamplerColumnConfig",
120
- "SamplerType",
121
- "SamplingStrategy",
122
- "ScalarInequalityConstraint",
123
- "ScipySamplerParams",
124
- "Score",
125
- "SeedConfig",
126
- "SeedDatasetColumnConfig",
127
- "SubcategorySamplerParams",
128
- "TimeDeltaSamplerParams",
129
- "UniformDistribution",
130
- "UniformDistributionParams",
131
- "UniformSamplerParams",
132
- "UUIDSamplerParams",
133
- "ValidationColumnConfig",
134
- "ValidatorType",
135
- ]
136
28
 
137
- __all__.extend(local_library_imports)
29
+ __all__ = get_essentials_exports()
@@ -13,7 +13,6 @@ from data_designer.config.default_model_settings import (
13
13
  get_default_model_providers_missing_api_keys,
14
14
  get_default_provider_name,
15
15
  get_default_providers,
16
- resolve_seed_default_model_settings,
17
16
  )
18
17
  from data_designer.config.interface import DataDesignerInterface
19
18
  from data_designer.config.models import (
@@ -31,7 +30,6 @@ from data_designer.config.utils.constants import (
31
30
  )
32
31
  from data_designer.config.utils.info import InfoType, InterfaceInfo
33
32
  from data_designer.config.utils.io_helpers import write_seed_dataset
34
- from data_designer.config.utils.misc import can_run_data_designer_locally
35
33
  from data_designer.engine.analysis.dataset_profiler import (
36
34
  DataDesignerDatasetProfiler,
37
35
  DatasetProfilerConfig,
@@ -66,11 +64,6 @@ DEFAULT_BUFFER_SIZE = 1000
66
64
  logger = logging.getLogger(__name__)
67
65
 
68
66
 
69
- # Resolve default model settings on import to ensure they are available when the library is used.
70
- if can_run_data_designer_locally():
71
- resolve_seed_default_model_settings()
72
-
73
-
74
67
  class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
75
68
  """Main interface for creating datasets with Data Designer.
76
69
 
@@ -173,7 +166,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
173
166
  configuration (columns, constraints, seed data, etc.).
174
167
  num_records: Number of records to generate.
175
168
  dataset_name: Name of the dataset. This name will be used as the dataset
176
- folder name in the artifact path directory.
169
+ folder name in the artifact path directory. If a non-empty directory with the
170
+ same name already exists, dataset will be saved to a new directory with
171
+ a datetime stamp. For example, if the dataset name is "awesome_dataset" and a directory
172
+ with the same name already exists, the dataset will be saved to a new directory
173
+ with the name "awesome_dataset_2025-01-01_12-00-00".
177
174
 
178
175
  Returns:
179
176
  DatasetCreationResults object with methods for loading the generated dataset,
@@ -311,18 +308,17 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
311
308
 
312
309
  def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
313
310
  if model_providers is None:
314
- if can_run_data_designer_locally():
315
- model_providers = get_default_providers()
316
- missing_api_keys = get_default_model_providers_missing_api_keys()
317
- if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
318
- logger.warning(
319
- "🚨 You are trying to use a default model provider but your API keys are missing."
320
- "\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
321
- "\n\t\t\tAlternatively, you can provide your own model providers during Data Designer object initialization."
322
- "\n\t\t\tSee https://nvidia-nemo.github.io/DataDesigner/models/model-providers/ for more information."
323
- )
324
- self._get_interface_info(model_providers).display(InfoType.MODEL_PROVIDERS)
325
- return model_providers
311
+ model_providers = get_default_providers()
312
+ missing_api_keys = get_default_model_providers_missing_api_keys()
313
+ if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
314
+ logger.warning(
315
+ "🚨 You are trying to use a default model provider but your API keys are missing."
316
+ "\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
317
+ "\n\t\t\tAlternatively, you can provide your own model providers during Data Designer object initialization."
318
+ "\n\t\t\tSee https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-providers/ for more information."
319
+ )
320
+ self._get_interface_info(model_providers).display(InfoType.MODEL_PROVIDERS)
321
+ return model_providers
326
322
  return model_providers or []
327
323
 
328
324
  def _create_dataset_builder(
data_designer/logging.py CHANGED
@@ -1,11 +1,11 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from dataclasses import dataclass, field
5
4
  import logging
6
- from pathlib import Path
7
5
  import random
8
6
  import sys
7
+ from dataclasses import dataclass, field
8
+ from pathlib import Path
9
9
  from typing import TextIO, Union
10
10
 
11
11
  from pythonjsonlogger import jsonlogger
@@ -6,25 +6,16 @@ from __future__ import annotations
6
6
  from enum import Enum
7
7
  from typing import TYPE_CHECKING, Type, TypeAlias
8
8
 
9
- from .config.utils.misc import can_run_data_designer_locally
9
+ from data_designer.plugins.plugin import PluginType
10
+ from data_designer.plugins.registry import PluginRegistry
10
11
 
11
12
  if TYPE_CHECKING:
12
13
  from data_designer.plugins.plugin import Plugin
13
14
 
14
15
 
15
- if can_run_data_designer_locally():
16
- from data_designer.plugins.plugin import PluginType
17
- from data_designer.plugins.registry import PluginRegistry
18
-
19
-
20
16
  class PluginManager:
21
17
  def __init__(self):
22
- if can_run_data_designer_locally():
23
- self._plugins_supported = True
24
- self._plugin_registry = PluginRegistry()
25
- else:
26
- self._plugins_supported = False
27
- self._plugin_registry = None
18
+ self._plugin_registry = PluginRegistry()
28
19
 
29
20
  def get_column_generator_plugins(self) -> list[Plugin]:
30
21
  """Get all column generator plugins.
@@ -32,7 +23,7 @@ class PluginManager:
32
23
  Returns:
33
24
  A list of all column generator plugins.
34
25
  """
35
- return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR) if self._plugins_supported else []
26
+ return self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR)
36
27
 
37
28
  def get_column_generator_plugin_if_exists(self, plugin_name: str) -> Plugin | None:
38
29
  """Get a column generator plugin by name if it exists.
@@ -43,9 +34,8 @@ class PluginManager:
43
34
  Returns:
44
35
  The plugin if found, otherwise None.
45
36
  """
46
- if self._plugins_supported and self._plugin_registry.plugin_exists(plugin_name):
37
+ if self._plugin_registry.plugin_exists(plugin_name):
47
38
  return self._plugin_registry.get_plugin(plugin_name)
48
- return None
49
39
 
50
40
  def get_plugin_column_types(self, enum_type: Type[Enum], required_resources: list[str] | None = None) -> list[Enum]:
51
41
  """Get a list of plugin column types.
@@ -58,13 +48,12 @@ class PluginManager:
58
48
  A list of plugin column types.
59
49
  """
60
50
  type_list = []
61
- if self._plugins_supported:
62
- for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
63
- if required_resources:
64
- task_required_resources = plugin.task_cls.metadata().required_resources or []
65
- if not all(resource in task_required_resources for resource in required_resources):
66
- continue
67
- type_list.append(enum_type(plugin.name))
51
+ for plugin in self._plugin_registry.get_plugins(PluginType.COLUMN_GENERATOR):
52
+ if required_resources:
53
+ task_required_resources = plugin.task_cls.metadata().required_resources or []
54
+ if not all(resource in task_required_resources for resource in required_resources):
55
+ continue
56
+ type_list.append(enum_type(plugin.name))
68
57
  return type_list
69
58
 
70
59
  def inject_into_column_config_type_union(self, column_config_type: Type[TypeAlias]) -> Type[TypeAlias]:
@@ -76,8 +65,7 @@ class PluginManager:
76
65
  Returns:
77
66
  The column config type with plugins injected.
78
67
  """
79
- if self._plugins_supported:
80
- column_config_type = self._plugin_registry.add_plugin_types_to_union(
81
- column_config_type, PluginType.COLUMN_GENERATOR
82
- )
68
+ column_config_type = self._plugin_registry.add_plugin_types_to_union(
69
+ column_config_type, PluginType.COLUMN_GENERATOR
70
+ )
83
71
  return column_config_type
@@ -1,10 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from importlib.metadata import entry_points
5
4
  import logging
6
5
  import os
7
6
  import threading
7
+ from importlib.metadata import entry_points
8
8
  from typing import Type, TypeAlias
9
9
 
10
10
  from typing_extensions import Self
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.1.2
3
+ Version: 0.1.4
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -148,7 +148,7 @@ preview.display_sample_record()
148
148
  - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
149
149
  - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
150
150
  - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
151
- - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
151
+ - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
152
152
  - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
153
153
 
154
154
  ### 🔧 Configure models via CLI