data-designer 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. data_designer/__init__.py +2 -0
  2. data_designer/_version.py +2 -2
  3. data_designer/cli/__init__.py +2 -0
  4. data_designer/cli/commands/download.py +2 -0
  5. data_designer/cli/commands/list.py +2 -0
  6. data_designer/cli/commands/models.py +2 -0
  7. data_designer/cli/commands/providers.py +2 -0
  8. data_designer/cli/commands/reset.py +2 -0
  9. data_designer/cli/controllers/__init__.py +2 -0
  10. data_designer/cli/controllers/download_controller.py +2 -0
  11. data_designer/cli/controllers/model_controller.py +6 -1
  12. data_designer/cli/controllers/provider_controller.py +6 -1
  13. data_designer/cli/forms/__init__.py +2 -0
  14. data_designer/cli/forms/builder.py +2 -0
  15. data_designer/cli/forms/field.py +2 -0
  16. data_designer/cli/forms/form.py +2 -0
  17. data_designer/cli/forms/model_builder.py +2 -0
  18. data_designer/cli/forms/provider_builder.py +2 -0
  19. data_designer/cli/main.py +2 -0
  20. data_designer/cli/repositories/__init__.py +2 -0
  21. data_designer/cli/repositories/base.py +2 -0
  22. data_designer/cli/repositories/model_repository.py +2 -0
  23. data_designer/cli/repositories/persona_repository.py +2 -0
  24. data_designer/cli/repositories/provider_repository.py +2 -0
  25. data_designer/cli/services/__init__.py +2 -0
  26. data_designer/cli/services/download_service.py +2 -0
  27. data_designer/cli/services/model_service.py +2 -0
  28. data_designer/cli/services/provider_service.py +2 -0
  29. data_designer/cli/ui.py +2 -0
  30. data_designer/cli/utils.py +2 -0
  31. data_designer/config/analysis/column_profilers.py +2 -0
  32. data_designer/config/analysis/column_statistics.py +8 -5
  33. data_designer/config/analysis/dataset_profiler.py +9 -3
  34. data_designer/config/analysis/utils/errors.py +2 -0
  35. data_designer/config/analysis/utils/reporting.py +7 -3
  36. data_designer/config/column_configs.py +77 -7
  37. data_designer/config/column_types.py +33 -36
  38. data_designer/config/dataset_builders.py +2 -0
  39. data_designer/config/default_model_settings.py +1 -0
  40. data_designer/config/errors.py +2 -0
  41. data_designer/config/exports.py +2 -0
  42. data_designer/config/interface.py +3 -2
  43. data_designer/config/models.py +7 -2
  44. data_designer/config/preview_results.py +7 -3
  45. data_designer/config/processors.py +2 -0
  46. data_designer/config/run_config.py +2 -0
  47. data_designer/config/sampler_constraints.py +2 -0
  48. data_designer/config/sampler_params.py +7 -2
  49. data_designer/config/seed.py +2 -0
  50. data_designer/config/seed_source.py +7 -2
  51. data_designer/config/seed_source_types.py +2 -0
  52. data_designer/config/utils/constants.py +2 -0
  53. data_designer/config/utils/errors.py +2 -0
  54. data_designer/config/utils/info.py +2 -0
  55. data_designer/config/utils/io_helpers.py +8 -3
  56. data_designer/config/utils/misc.py +2 -2
  57. data_designer/config/utils/numerical_helpers.py +2 -0
  58. data_designer/config/utils/type_helpers.py +2 -0
  59. data_designer/config/utils/visualization.py +8 -4
  60. data_designer/config/validator_params.py +2 -0
  61. data_designer/engine/analysis/column_profilers/base.py +9 -8
  62. data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
  63. data_designer/engine/analysis/column_profilers/registry.py +2 -0
  64. data_designer/engine/analysis/column_statistics.py +5 -2
  65. data_designer/engine/analysis/dataset_profiler.py +12 -9
  66. data_designer/engine/analysis/errors.py +2 -0
  67. data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
  68. data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
  69. data_designer/engine/column_generators/generators/base.py +26 -14
  70. data_designer/engine/column_generators/generators/embedding.py +4 -11
  71. data_designer/engine/column_generators/generators/expression.py +7 -16
  72. data_designer/engine/column_generators/generators/llm_completion.py +11 -37
  73. data_designer/engine/column_generators/generators/samplers.py +8 -14
  74. data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
  75. data_designer/engine/column_generators/generators/validation.py +8 -20
  76. data_designer/engine/column_generators/registry.py +2 -0
  77. data_designer/engine/column_generators/utils/errors.py +2 -0
  78. data_designer/engine/column_generators/utils/generator_classification.py +2 -0
  79. data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
  80. data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
  81. data_designer/engine/compiler.py +3 -6
  82. data_designer/engine/configurable_task.py +12 -13
  83. data_designer/engine/dataset_builders/artifact_storage.py +87 -8
  84. data_designer/engine/dataset_builders/column_wise_builder.py +32 -34
  85. data_designer/engine/dataset_builders/errors.py +2 -0
  86. data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
  87. data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
  88. data_designer/engine/dataset_builders/utils/dag.py +7 -2
  89. data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +9 -6
  90. data_designer/engine/dataset_builders/utils/errors.py +2 -0
  91. data_designer/engine/errors.py +2 -0
  92. data_designer/engine/model_provider.py +2 -0
  93. data_designer/engine/models/errors.py +23 -31
  94. data_designer/engine/models/facade.py +12 -9
  95. data_designer/engine/models/factory.py +42 -0
  96. data_designer/engine/models/litellm_overrides.py +22 -11
  97. data_designer/engine/models/parsers/errors.py +2 -0
  98. data_designer/engine/models/parsers/parser.py +2 -2
  99. data_designer/engine/models/parsers/postprocessors.py +1 -0
  100. data_designer/engine/models/parsers/tag_parsers.py +2 -0
  101. data_designer/engine/models/parsers/types.py +2 -0
  102. data_designer/engine/models/recipes/base.py +2 -0
  103. data_designer/engine/models/recipes/response_recipes.py +2 -0
  104. data_designer/engine/models/registry.py +11 -18
  105. data_designer/engine/models/telemetry.py +6 -2
  106. data_designer/engine/processing/ginja/ast.py +2 -0
  107. data_designer/engine/processing/ginja/environment.py +2 -0
  108. data_designer/engine/processing/ginja/exceptions.py +2 -0
  109. data_designer/engine/processing/ginja/record.py +2 -0
  110. data_designer/engine/processing/gsonschema/exceptions.py +9 -2
  111. data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
  112. data_designer/engine/processing/gsonschema/types.py +2 -0
  113. data_designer/engine/processing/gsonschema/validators.py +10 -6
  114. data_designer/engine/processing/processors/base.py +1 -5
  115. data_designer/engine/processing/processors/drop_columns.py +7 -10
  116. data_designer/engine/processing/processors/registry.py +2 -0
  117. data_designer/engine/processing/processors/schema_transform.py +7 -10
  118. data_designer/engine/processing/utils.py +7 -3
  119. data_designer/engine/registry/base.py +2 -0
  120. data_designer/engine/registry/data_designer_registry.py +2 -0
  121. data_designer/engine/registry/errors.py +2 -0
  122. data_designer/engine/resources/managed_dataset_generator.py +6 -2
  123. data_designer/engine/resources/managed_dataset_repository.py +8 -5
  124. data_designer/engine/resources/managed_storage.py +2 -0
  125. data_designer/engine/resources/resource_provider.py +8 -1
  126. data_designer/engine/resources/seed_reader.py +7 -2
  127. data_designer/engine/sampling_gen/column.py +2 -0
  128. data_designer/engine/sampling_gen/constraints.py +8 -2
  129. data_designer/engine/sampling_gen/data_sources/base.py +10 -7
  130. data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
  131. data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
  132. data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
  133. data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
  134. data_designer/engine/sampling_gen/entities/errors.py +2 -0
  135. data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
  136. data_designer/engine/sampling_gen/entities/person.py +2 -0
  137. data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
  138. data_designer/engine/sampling_gen/errors.py +2 -0
  139. data_designer/engine/sampling_gen/generator.py +5 -4
  140. data_designer/engine/sampling_gen/jinja_utils.py +7 -3
  141. data_designer/engine/sampling_gen/people_gen.py +7 -7
  142. data_designer/engine/sampling_gen/person_constants.py +2 -0
  143. data_designer/engine/sampling_gen/schema.py +5 -1
  144. data_designer/engine/sampling_gen/schema_builder.py +2 -0
  145. data_designer/engine/sampling_gen/utils.py +7 -1
  146. data_designer/engine/secret_resolver.py +2 -0
  147. data_designer/engine/validation.py +2 -2
  148. data_designer/engine/validators/__init__.py +2 -0
  149. data_designer/engine/validators/base.py +2 -0
  150. data_designer/engine/validators/local_callable.py +7 -2
  151. data_designer/engine/validators/python.py +7 -1
  152. data_designer/engine/validators/remote.py +7 -1
  153. data_designer/engine/validators/sql.py +8 -3
  154. data_designer/errors.py +2 -0
  155. data_designer/essentials/__init__.py +2 -0
  156. data_designer/interface/data_designer.py +23 -17
  157. data_designer/interface/errors.py +2 -0
  158. data_designer/interface/results.py +5 -2
  159. data_designer/lazy_heavy_imports.py +54 -0
  160. data_designer/logging.py +2 -0
  161. data_designer/plugins/__init__.py +2 -0
  162. data_designer/plugins/errors.py +2 -0
  163. data_designer/plugins/plugin.py +0 -1
  164. data_designer/plugins/registry.py +2 -0
  165. data_designer/plugins/testing/__init__.py +2 -0
  166. data_designer/plugins/testing/stubs.py +21 -43
  167. data_designer/plugins/testing/utils.py +2 -0
  168. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/METADATA +12 -5
  169. data_designer-0.3.6.dist-info/RECORD +196 -0
  170. data_designer-0.3.4.dist-info/RECORD +0 -194
  171. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/WHEEL +0 -0
  172. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/entry_points.txt +0 -0
  173. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/licenses/LICENSE +0 -0
@@ -1,10 +1,11 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
7
+ from typing import TYPE_CHECKING
5
8
 
6
- import numpy as np
7
- import pandas as pd
8
9
  from numpy.typing import NDArray
9
10
 
10
11
  from data_designer.config.base import ConfigBase
@@ -15,6 +16,11 @@ from data_designer.config.sampler_constraints import (
15
16
  InequalityOperator,
16
17
  ScalarInequalityConstraint,
17
18
  )
19
+ from data_designer.lazy_heavy_imports import np, pd
20
+
21
+ if TYPE_CHECKING:
22
+ import numpy as np
23
+ import pandas as pd
18
24
 
19
25
 
20
26
  class ConstraintChecker(ConfigBase, ABC):
@@ -1,24 +1,27 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
5
- from typing import Any, Generic, TypeVar
7
+ from typing import TYPE_CHECKING, Any, Generic, TypeVar
6
8
 
7
- import numpy as np
8
- import pandas as pd
9
9
  from numpy.typing import NDArray
10
- from scipy import stats
11
10
 
12
11
  from data_designer.config.sampler_params import SamplerParamsT
13
12
  from data_designer.engine.sampling_gen.utils import check_random_state
13
+ from data_designer.lazy_heavy_imports import np, pd, scipy
14
+
15
+ if TYPE_CHECKING:
16
+ import numpy as np
17
+ import pandas as pd
18
+ import scipy
14
19
 
15
20
  NumpyArray1dT = NDArray[Any]
16
21
  RadomStateT = int | np.random.RandomState
17
22
 
18
-
19
23
  GenericParamsT = TypeVar("GenericParamsT", bound=SamplerParamsT)
20
24
 
21
-
22
25
  ###########################################################
23
26
  # Processing Mixins
24
27
  # -----------------
@@ -208,7 +211,7 @@ class Sampler(DataSource[GenericParamsT], ABC):
208
211
  class ScipyStatsSampler(Sampler[GenericParamsT], ABC):
209
212
  @property
210
213
  @abstractmethod
211
- def distribution(self) -> stats.rv_continuous | stats.rv_discrete: ...
214
+ def distribution(self) -> scipy.stats.rv_continuous | scipy.stats.rv_discrete: ...
212
215
 
213
216
  def sample(self, num_samples: int) -> NumpyArray1dT:
214
217
  return self.distribution.rvs(size=num_samples, random_state=self.rng)
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.engine.sampling_gen.errors import SamplingGenError
5
7
 
6
8
 
@@ -1,11 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import uuid
4
+ from __future__ import annotations
5
5
 
6
- import numpy as np
7
- import pandas as pd
8
- from scipy import stats
6
+ import uuid
7
+ from typing import TYPE_CHECKING
9
8
 
10
9
  from data_designer.config.sampler_params import (
11
10
  BernoulliMixtureSamplerParams,
@@ -40,6 +39,12 @@ from data_designer.engine.sampling_gen.data_sources.errors import (
40
39
  )
41
40
  from data_designer.engine.sampling_gen.entities.dataset_based_person_fields import PERSONA_FIELDS, PII_FIELDS
42
41
  from data_designer.engine.sampling_gen.people_gen import PeopleGen
42
+ from data_designer.lazy_heavy_imports import np, pd, scipy
43
+
44
+ if TYPE_CHECKING:
45
+ import numpy as np
46
+ import pandas as pd
47
+ import scipy
43
48
 
44
49
  ONE_BILLION = 10**9
45
50
 
@@ -264,8 +269,8 @@ class ScipySampler(TypeConversionMixin, ScipyStatsSampler[ScipySamplerParams]):
264
269
  """Escape hatch sampler to give users access to any scipy.stats distribution."""
265
270
 
266
271
  @property
267
- def distribution(self) -> stats.rv_continuous | stats.rv_discrete:
268
- return getattr(stats, self.params.dist_name)(**self.params.dist_params)
272
+ def distribution(self) -> scipy.stats.rv_continuous | scipy.stats.rv_discrete:
273
+ return getattr(scipy.stats, self.params.dist_name)(**self.params.dist_params)
269
274
 
270
275
  def _validate(self) -> None:
271
276
  _validate_scipy_distribution(self.params.dist_name, self.params.dist_params)
@@ -274,16 +279,16 @@ class ScipySampler(TypeConversionMixin, ScipyStatsSampler[ScipySamplerParams]):
274
279
  @SamplerRegistry.register(SamplerType.BERNOULLI)
275
280
  class BernoulliSampler(TypeConversionMixin, ScipyStatsSampler[BernoulliSamplerParams]):
276
281
  @property
277
- def distribution(self) -> stats.rv_discrete:
278
- return stats.bernoulli(p=self.params.p)
282
+ def distribution(self) -> scipy.stats.rv_discrete:
283
+ return scipy.stats.bernoulli(p=self.params.p)
279
284
 
280
285
 
281
286
  @SamplerRegistry.register(SamplerType.BERNOULLI_MIXTURE)
282
287
  class BernoulliMixtureSampler(TypeConversionMixin, Sampler[BernoulliMixtureSamplerParams]):
283
288
  def sample(self, num_samples: int) -> NumpyArray1dT:
284
- return stats.bernoulli(p=self.params.p).rvs(size=num_samples) * getattr(stats, self.params.dist_name)(
285
- **self.params.dist_params
286
- ).rvs(size=num_samples)
289
+ return scipy.stats.bernoulli(p=self.params.p).rvs(size=num_samples) * getattr(
290
+ scipy.stats, self.params.dist_name
291
+ )(**self.params.dist_params).rvs(size=num_samples)
287
292
 
288
293
  def _validate(self) -> None:
289
294
  _validate_scipy_distribution(self.params.dist_name, self.params.dist_params)
@@ -292,29 +297,29 @@ class BernoulliMixtureSampler(TypeConversionMixin, Sampler[BernoulliMixtureSampl
292
297
  @SamplerRegistry.register(SamplerType.BINOMIAL)
293
298
  class BinomialSampler(TypeConversionMixin, ScipyStatsSampler[BinomialSamplerParams]):
294
299
  @property
295
- def distribution(self) -> stats.rv_discrete:
296
- return stats.binom(n=self.params.n, p=self.params.p)
300
+ def distribution(self) -> scipy.stats.rv_discrete:
301
+ return scipy.stats.binom(n=self.params.n, p=self.params.p)
297
302
 
298
303
 
299
304
  @SamplerRegistry.register(SamplerType.GAUSSIAN)
300
305
  class GaussianSampler(TypeConversionMixin, ScipyStatsSampler[GaussianSamplerParams]):
301
306
  @property
302
- def distribution(self) -> stats.rv_continuous:
303
- return stats.norm(loc=self.params.mean, scale=self.params.stddev)
307
+ def distribution(self) -> scipy.stats.rv_continuous:
308
+ return scipy.stats.norm(loc=self.params.mean, scale=self.params.stddev)
304
309
 
305
310
 
306
311
  @SamplerRegistry.register(SamplerType.POISSON)
307
312
  class PoissonSampler(TypeConversionMixin, ScipyStatsSampler[PoissonSamplerParams]):
308
313
  @property
309
- def distribution(self) -> stats.rv_discrete:
310
- return stats.poisson(mu=self.params.mean)
314
+ def distribution(self) -> scipy.stats.rv_discrete:
315
+ return scipy.stats.poisson(mu=self.params.mean)
311
316
 
312
317
 
313
318
  @SamplerRegistry.register(SamplerType.UNIFORM)
314
319
  class UniformSampler(TypeConversionMixin, ScipyStatsSampler[UniformSamplerParams]):
315
320
  @property
316
- def distribution(self) -> stats.rv_continuous:
317
- return stats.uniform(loc=self.params.low, scale=self.params.high - self.params.low)
321
+ def distribution(self) -> scipy.stats.rv_continuous:
322
+ return scipy.stats.uniform(loc=self.params.low, scale=self.params.high - self.params.low)
318
323
 
319
324
 
320
325
  ###################################################
@@ -328,14 +333,14 @@ def load_sampler(sampler_type: SamplerType, **params) -> DataSource:
328
333
 
329
334
 
330
335
  def _validate_scipy_distribution(dist_name: str, dist_params: dict) -> None:
331
- if not hasattr(stats, dist_name):
336
+ if not hasattr(scipy.stats, dist_name):
332
337
  raise InvalidSamplerParamsError(f"Distribution {dist_name} not found in scipy.stats")
333
- if not hasattr(getattr(stats, dist_name), "rvs"):
338
+ if not hasattr(getattr(scipy.stats, dist_name), "rvs"):
334
339
  raise InvalidSamplerParamsError(
335
340
  f"Distribution {dist_name} does not have a `rvs` method, which is required for sampling."
336
341
  )
337
342
  try:
338
- getattr(stats, dist_name)(**dist_params)
343
+ getattr(scipy.stats, dist_name)(**dist_params)
339
344
  except Exception:
340
345
  raise InvalidSamplerParamsError(
341
346
  f"Distribution parameters {dist_params} are not a valid for distribution '{dist_name}'"
@@ -10,8 +10,9 @@ This file contains all possible fields that:
10
10
  Do not add any other code or logic in this file.
11
11
  """
12
12
 
13
- REQUIRED_FIELDS = {"first_name", "last_name", "age", "locale"}
13
+ from __future__ import annotations
14
14
 
15
+ REQUIRED_FIELDS = {"first_name", "last_name", "age", "locale"}
15
16
 
16
17
  PII_FIELDS = [
17
18
  # Core demographic fields
@@ -52,7 +53,6 @@ PII_FIELDS = [
52
53
  "third_language",
53
54
  ]
54
55
 
55
-
56
56
  PERSONA_FIELDS = [
57
57
  # Core persona fields
58
58
  "persona",
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import random
5
7
  import re
6
8
  from datetime import date
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.errors import DataDesignerError
5
7
 
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import random
5
7
  from datetime import date
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import random
5
7
  from datetime import date, timedelta
6
8
  from typing import Any, Literal, TypeAlias
@@ -1,12 +1,19 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import random
5
7
  from pathlib import Path
8
+ from typing import TYPE_CHECKING
6
9
 
7
- import pandas as pd
8
10
  from pydantic import BaseModel, Field, field_validator
9
11
 
12
+ from data_designer.lazy_heavy_imports import pd
13
+
14
+ if TYPE_CHECKING:
15
+ import pandas as pd
16
+
10
17
  ZIP_AREA_CODE_DATA = pd.read_parquet(Path(__file__).parent / "assets" / "zip_area_code_map.parquet")
11
18
  ZIPCODE_AREA_CODE_MAP = dict(zip(ZIP_AREA_CODE_DATA["zipcode"], ZIP_AREA_CODE_DATA["area_code"]))
12
19
  ZIPCODE_POPULATION_MAP = dict(zip(ZIP_AREA_CODE_DATA["zipcode"], ZIP_AREA_CODE_DATA["count"]))
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.engine.errors import DataDesignerError
5
7
 
6
8
 
@@ -6,18 +6,19 @@ from __future__ import annotations
6
6
  from collections.abc import Callable
7
7
  from typing import TYPE_CHECKING
8
8
 
9
- import networkx as nx
10
- import numpy as np
11
- import pandas as pd
12
-
13
9
  from data_designer.engine.sampling_gen.data_sources.base import RadomStateT
14
10
  from data_designer.engine.sampling_gen.errors import RejectionSamplingError
15
11
  from data_designer.engine.sampling_gen.jinja_utils import JinjaDataFrame
16
12
  from data_designer.engine.sampling_gen.people_gen import create_people_gen_resource
17
13
  from data_designer.engine.sampling_gen.schema import DataSchema
18
14
  from data_designer.engine.sampling_gen.utils import check_random_state
15
+ from data_designer.lazy_heavy_imports import np, nx, pd
19
16
 
20
17
  if TYPE_CHECKING:
18
+ import networkx as nx
19
+ import numpy as np
20
+ import pandas as pd
21
+
21
22
  from data_designer.engine.dataset_builders.multi_column_configs import SamplerMultiColumnConfig
22
23
  from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
23
24
  from data_designer.engine.sampling_gen.column import ConditionalDataColumn
@@ -1,15 +1,19 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import ast
5
- from typing import Any
4
+ from __future__ import annotations
6
5
 
7
- import pandas as pd
6
+ import ast
7
+ from typing import TYPE_CHECKING, Any
8
8
 
9
9
  from data_designer.engine.processing.ginja.environment import (
10
10
  UserTemplateSandboxEnvironment,
11
11
  WithJinja2UserTemplateRendering,
12
12
  )
13
+ from data_designer.lazy_heavy_imports import pd
14
+
15
+ if TYPE_CHECKING:
16
+ import pandas as pd
13
17
 
14
18
 
15
19
  class JinjaDataFrame(WithJinja2UserTemplateRendering):
@@ -10,9 +10,6 @@ from collections.abc import Callable
10
10
  from copy import deepcopy
11
11
  from typing import TYPE_CHECKING, Any, TypeAlias
12
12
 
13
- import pandas as pd
14
- from faker import Faker
15
-
16
13
  from data_designer.config.utils.constants import DEFAULT_AGE_RANGE
17
14
  from data_designer.engine.resources.managed_dataset_generator import ManagedDatasetGenerator
18
15
  from data_designer.engine.sampling_gen.entities.dataset_based_person_fields import PERSONA_FIELDS, PII_FIELDS
@@ -22,12 +19,15 @@ from data_designer.engine.sampling_gen.entities.person import (
22
19
  )
23
20
  from data_designer.engine.sampling_gen.errors import ManagedDatasetGeneratorError
24
21
  from data_designer.engine.sampling_gen.person_constants import faker_constants
22
+ from data_designer.lazy_heavy_imports import faker, pd
25
23
 
26
24
  if TYPE_CHECKING:
27
- from data_designer.engine.sampling_gen.schema import DataSchema
25
+ import faker
26
+ import pandas as pd
28
27
 
28
+ from data_designer.engine.sampling_gen.schema import DataSchema
29
29
 
30
- EngineT: TypeAlias = Faker | ManagedDatasetGenerator
30
+ EngineT: TypeAlias = faker.Faker | ManagedDatasetGenerator
31
31
 
32
32
 
33
33
  class PeopleGen(ABC):
@@ -46,7 +46,7 @@ class PeopleGen(ABC):
46
46
 
47
47
  class PeopleGenFaker(PeopleGen):
48
48
  @property
49
- def _fake(self) -> Faker:
49
+ def _fake(self) -> faker.Faker:
50
50
  return self._engine
51
51
 
52
52
  def try_fake_else_none(self, attr_name: str, none_fill: Any | None = None) -> type:
@@ -193,7 +193,7 @@ def create_people_gen_resource(
193
193
  for params in [column.params, *list(column.conditional_params.values())]:
194
194
  if params.people_gen_key not in people_gen_resource:
195
195
  people_gen_resource[params.people_gen_key] = PeopleGenFaker(
196
- engine=Faker(params.locale), locale=params.locale
196
+ engine=faker.Faker(params.locale), locale=params.locale
197
197
  )
198
198
 
199
199
  return people_gen_resource
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from typing import NamedTuple
5
7
 
6
8
 
@@ -4,8 +4,8 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  from functools import cached_property
7
+ from typing import TYPE_CHECKING
7
8
 
8
- import networkx as nx
9
9
  from pydantic import BaseModel, Field, field_validator, model_validator
10
10
  from typing_extensions import Self
11
11
 
@@ -14,6 +14,10 @@ from data_designer.config.sampler_constraints import ColumnConstraintT
14
14
  from data_designer.config.sampler_params import SamplerType
15
15
  from data_designer.engine.sampling_gen.column import ConditionalDataColumn
16
16
  from data_designer.engine.sampling_gen.constraints import ConstraintChecker, get_constraint_checker
17
+ from data_designer.lazy_heavy_imports import nx
18
+
19
+ if TYPE_CHECKING:
20
+ import networkx as nx
17
21
 
18
22
 
19
23
  class Dag(BaseModel):
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from copy import deepcopy
5
7
 
6
8
  from data_designer.config.column_configs import SamplerColumnConfig
@@ -1,9 +1,15 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import numbers
7
+ from typing import TYPE_CHECKING
8
+
9
+ from data_designer.lazy_heavy_imports import np
5
10
 
6
- import numpy as np
11
+ if TYPE_CHECKING:
12
+ import numpy as np
7
13
 
8
14
 
9
15
  def check_random_state(seed):
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import json
5
7
  import logging
6
8
  import os
@@ -19,7 +19,7 @@ from data_designer.config.processors import ProcessorConfigT, ProcessorType
19
19
  from data_designer.config.utils.constants import RICH_CONSOLE_THEME
20
20
  from data_designer.config.utils.misc import (
21
21
  can_run_data_designer_locally,
22
- get_prompt_template_keywords,
22
+ extract_keywords_from_jinja2_template,
23
23
  )
24
24
  from data_designer.config.validator_params import ValidatorType
25
25
  from data_designer.engine.column_generators.utils.generator_classification import column_type_is_model_generated
@@ -302,7 +302,7 @@ def validate_schema_transform_processor(
302
302
  for processor_config in processor_configs:
303
303
  if processor_config.processor_type == ProcessorType.SCHEMA_TRANSFORM:
304
304
  for col, template in processor_config.template.items():
305
- template_keywords = get_prompt_template_keywords(template)
305
+ template_keywords = extract_keywords_from_jinja2_template(template)
306
306
  invalid_keywords = set(template_keywords) - all_column_names
307
307
  if len(invalid_keywords) > 0:
308
308
  invalid_keywords = ", ".join([f"'{k}'" for k in invalid_keywords])
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.engine.validators.base import BaseValidator, ValidationResult
5
7
  from data_designer.engine.validators.local_callable import LocalCallableValidator
6
8
  from data_designer.engine.validators.python import PythonValidator
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
5
7
  from typing import Iterator
6
8
 
@@ -1,14 +1,19 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- import logging
4
+ from __future__ import annotations
5
5
 
6
- import pandas as pd
6
+ import logging
7
+ from typing import TYPE_CHECKING
7
8
 
8
9
  from data_designer.config.validator_params import LocalCallableValidatorParams
9
10
  from data_designer.engine.errors import LocalCallableValidationError
10
11
  from data_designer.engine.processing.gsonschema.validators import validate
11
12
  from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
13
+ from data_designer.lazy_heavy_imports import pd
14
+
15
+ if TYPE_CHECKING:
16
+ import pandas as pd
12
17
 
13
18
  logger = logging.getLogger(__name__)
14
19
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import ast
5
7
  import json
6
8
  import logging
@@ -8,14 +10,18 @@ import subprocess
8
10
  import tempfile
9
11
  from collections import defaultdict
10
12
  from pathlib import Path
13
+ from typing import TYPE_CHECKING
11
14
  from uuid import uuid4
12
15
 
13
- import pandas as pd
14
16
  from pydantic import BaseModel
15
17
  from ruff.__main__ import find_ruff_bin
16
18
 
17
19
  from data_designer.config.validator_params import CodeValidatorParams
18
20
  from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
21
+ from data_designer.lazy_heavy_imports import pd
22
+
23
+ if TYPE_CHECKING:
24
+ import pandas as pd
19
25
 
20
26
  logger = logging.getLogger(__name__)
21
27
 
@@ -1,9 +1,11 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import logging
7
+ from typing import TYPE_CHECKING
5
8
 
6
- import httpx
7
9
  from httpx_retries import Retry, RetryTransport
8
10
 
9
11
  from data_designer.config.validator_params import RemoteValidatorParams
@@ -11,6 +13,10 @@ from data_designer.engine.errors import RemoteValidationSchemaError
11
13
  from data_designer.engine.processing.gsonschema.exceptions import JSONSchemaValidationError
12
14
  from data_designer.engine.processing.gsonschema.validators import validate
13
15
  from data_designer.engine.validators.base import BaseValidator, ValidationResult
16
+ from data_designer.lazy_heavy_imports import httpx
17
+
18
+ if TYPE_CHECKING:
19
+ import httpx
14
20
 
15
21
  logger = logging.getLogger(__name__)
16
22
 
@@ -1,15 +1,20 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import logging
5
7
  import re
6
-
7
- import pandas as pd
8
- import sqlfluff
8
+ from typing import TYPE_CHECKING
9
9
 
10
10
  from data_designer.config.utils.code_lang import CodeLang
11
11
  from data_designer.config.validator_params import CodeValidatorParams
12
12
  from data_designer.engine.validators.base import BaseValidator, ValidationOutput, ValidationResult
13
+ from data_designer.lazy_heavy_imports import pd, sqlfluff
14
+
15
+ if TYPE_CHECKING:
16
+ import pandas as pd
17
+ import sqlfluff
13
18
 
14
19
  sqlfluff_logger = logging.getLogger("sqlfluff")
15
20
  sqlfluff_logger.setLevel(logging.WARNING)
data_designer/errors.py CHANGED
@@ -1,5 +1,7 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
 
5
7
  class DataDesignerError(Exception): ...
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.config.default_model_settings import resolve_seed_default_model_settings
5
7
  from data_designer.config.exports import * # noqa: F403
6
8
  from data_designer.config.run_config import RunConfig