data-designer 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (173) hide show
  1. data_designer/__init__.py +2 -0
  2. data_designer/_version.py +2 -2
  3. data_designer/cli/__init__.py +2 -0
  4. data_designer/cli/commands/download.py +2 -0
  5. data_designer/cli/commands/list.py +2 -0
  6. data_designer/cli/commands/models.py +2 -0
  7. data_designer/cli/commands/providers.py +2 -0
  8. data_designer/cli/commands/reset.py +2 -0
  9. data_designer/cli/controllers/__init__.py +2 -0
  10. data_designer/cli/controllers/download_controller.py +2 -0
  11. data_designer/cli/controllers/model_controller.py +6 -1
  12. data_designer/cli/controllers/provider_controller.py +6 -1
  13. data_designer/cli/forms/__init__.py +2 -0
  14. data_designer/cli/forms/builder.py +2 -0
  15. data_designer/cli/forms/field.py +2 -0
  16. data_designer/cli/forms/form.py +2 -0
  17. data_designer/cli/forms/model_builder.py +2 -0
  18. data_designer/cli/forms/provider_builder.py +2 -0
  19. data_designer/cli/main.py +2 -0
  20. data_designer/cli/repositories/__init__.py +2 -0
  21. data_designer/cli/repositories/base.py +2 -0
  22. data_designer/cli/repositories/model_repository.py +2 -0
  23. data_designer/cli/repositories/persona_repository.py +2 -0
  24. data_designer/cli/repositories/provider_repository.py +2 -0
  25. data_designer/cli/services/__init__.py +2 -0
  26. data_designer/cli/services/download_service.py +2 -0
  27. data_designer/cli/services/model_service.py +2 -0
  28. data_designer/cli/services/provider_service.py +2 -0
  29. data_designer/cli/ui.py +2 -0
  30. data_designer/cli/utils.py +2 -0
  31. data_designer/config/analysis/column_profilers.py +2 -0
  32. data_designer/config/analysis/column_statistics.py +8 -5
  33. data_designer/config/analysis/dataset_profiler.py +9 -3
  34. data_designer/config/analysis/utils/errors.py +2 -0
  35. data_designer/config/analysis/utils/reporting.py +7 -3
  36. data_designer/config/column_configs.py +77 -7
  37. data_designer/config/column_types.py +33 -36
  38. data_designer/config/dataset_builders.py +2 -0
  39. data_designer/config/default_model_settings.py +1 -0
  40. data_designer/config/errors.py +2 -0
  41. data_designer/config/exports.py +2 -0
  42. data_designer/config/interface.py +3 -2
  43. data_designer/config/models.py +7 -2
  44. data_designer/config/preview_results.py +7 -3
  45. data_designer/config/processors.py +2 -0
  46. data_designer/config/run_config.py +2 -0
  47. data_designer/config/sampler_constraints.py +2 -0
  48. data_designer/config/sampler_params.py +7 -2
  49. data_designer/config/seed.py +2 -0
  50. data_designer/config/seed_source.py +7 -2
  51. data_designer/config/seed_source_types.py +2 -0
  52. data_designer/config/utils/constants.py +2 -0
  53. data_designer/config/utils/errors.py +2 -0
  54. data_designer/config/utils/info.py +2 -0
  55. data_designer/config/utils/io_helpers.py +8 -3
  56. data_designer/config/utils/misc.py +2 -2
  57. data_designer/config/utils/numerical_helpers.py +2 -0
  58. data_designer/config/utils/type_helpers.py +2 -0
  59. data_designer/config/utils/visualization.py +8 -4
  60. data_designer/config/validator_params.py +2 -0
  61. data_designer/engine/analysis/column_profilers/base.py +9 -8
  62. data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
  63. data_designer/engine/analysis/column_profilers/registry.py +2 -0
  64. data_designer/engine/analysis/column_statistics.py +5 -2
  65. data_designer/engine/analysis/dataset_profiler.py +12 -9
  66. data_designer/engine/analysis/errors.py +2 -0
  67. data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
  68. data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
  69. data_designer/engine/column_generators/generators/base.py +26 -14
  70. data_designer/engine/column_generators/generators/embedding.py +4 -11
  71. data_designer/engine/column_generators/generators/expression.py +7 -16
  72. data_designer/engine/column_generators/generators/llm_completion.py +11 -37
  73. data_designer/engine/column_generators/generators/samplers.py +8 -14
  74. data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
  75. data_designer/engine/column_generators/generators/validation.py +8 -20
  76. data_designer/engine/column_generators/registry.py +2 -0
  77. data_designer/engine/column_generators/utils/errors.py +2 -0
  78. data_designer/engine/column_generators/utils/generator_classification.py +2 -0
  79. data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
  80. data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
  81. data_designer/engine/compiler.py +3 -6
  82. data_designer/engine/configurable_task.py +12 -13
  83. data_designer/engine/dataset_builders/artifact_storage.py +87 -8
  84. data_designer/engine/dataset_builders/column_wise_builder.py +32 -34
  85. data_designer/engine/dataset_builders/errors.py +2 -0
  86. data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
  87. data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
  88. data_designer/engine/dataset_builders/utils/dag.py +7 -2
  89. data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +9 -6
  90. data_designer/engine/dataset_builders/utils/errors.py +2 -0
  91. data_designer/engine/errors.py +2 -0
  92. data_designer/engine/model_provider.py +2 -0
  93. data_designer/engine/models/errors.py +23 -31
  94. data_designer/engine/models/facade.py +12 -9
  95. data_designer/engine/models/factory.py +42 -0
  96. data_designer/engine/models/litellm_overrides.py +22 -11
  97. data_designer/engine/models/parsers/errors.py +2 -0
  98. data_designer/engine/models/parsers/parser.py +2 -2
  99. data_designer/engine/models/parsers/postprocessors.py +1 -0
  100. data_designer/engine/models/parsers/tag_parsers.py +2 -0
  101. data_designer/engine/models/parsers/types.py +2 -0
  102. data_designer/engine/models/recipes/base.py +2 -0
  103. data_designer/engine/models/recipes/response_recipes.py +2 -0
  104. data_designer/engine/models/registry.py +11 -18
  105. data_designer/engine/models/telemetry.py +6 -2
  106. data_designer/engine/processing/ginja/ast.py +2 -0
  107. data_designer/engine/processing/ginja/environment.py +2 -0
  108. data_designer/engine/processing/ginja/exceptions.py +2 -0
  109. data_designer/engine/processing/ginja/record.py +2 -0
  110. data_designer/engine/processing/gsonschema/exceptions.py +9 -2
  111. data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
  112. data_designer/engine/processing/gsonschema/types.py +2 -0
  113. data_designer/engine/processing/gsonschema/validators.py +10 -6
  114. data_designer/engine/processing/processors/base.py +1 -5
  115. data_designer/engine/processing/processors/drop_columns.py +7 -10
  116. data_designer/engine/processing/processors/registry.py +2 -0
  117. data_designer/engine/processing/processors/schema_transform.py +7 -10
  118. data_designer/engine/processing/utils.py +7 -3
  119. data_designer/engine/registry/base.py +2 -0
  120. data_designer/engine/registry/data_designer_registry.py +2 -0
  121. data_designer/engine/registry/errors.py +2 -0
  122. data_designer/engine/resources/managed_dataset_generator.py +6 -2
  123. data_designer/engine/resources/managed_dataset_repository.py +8 -5
  124. data_designer/engine/resources/managed_storage.py +2 -0
  125. data_designer/engine/resources/resource_provider.py +8 -1
  126. data_designer/engine/resources/seed_reader.py +7 -2
  127. data_designer/engine/sampling_gen/column.py +2 -0
  128. data_designer/engine/sampling_gen/constraints.py +8 -2
  129. data_designer/engine/sampling_gen/data_sources/base.py +10 -7
  130. data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
  131. data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
  132. data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
  133. data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
  134. data_designer/engine/sampling_gen/entities/errors.py +2 -0
  135. data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
  136. data_designer/engine/sampling_gen/entities/person.py +2 -0
  137. data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
  138. data_designer/engine/sampling_gen/errors.py +2 -0
  139. data_designer/engine/sampling_gen/generator.py +5 -4
  140. data_designer/engine/sampling_gen/jinja_utils.py +7 -3
  141. data_designer/engine/sampling_gen/people_gen.py +7 -7
  142. data_designer/engine/sampling_gen/person_constants.py +2 -0
  143. data_designer/engine/sampling_gen/schema.py +5 -1
  144. data_designer/engine/sampling_gen/schema_builder.py +2 -0
  145. data_designer/engine/sampling_gen/utils.py +7 -1
  146. data_designer/engine/secret_resolver.py +2 -0
  147. data_designer/engine/validation.py +2 -2
  148. data_designer/engine/validators/__init__.py +2 -0
  149. data_designer/engine/validators/base.py +2 -0
  150. data_designer/engine/validators/local_callable.py +7 -2
  151. data_designer/engine/validators/python.py +7 -1
  152. data_designer/engine/validators/remote.py +7 -1
  153. data_designer/engine/validators/sql.py +8 -3
  154. data_designer/errors.py +2 -0
  155. data_designer/essentials/__init__.py +2 -0
  156. data_designer/interface/data_designer.py +23 -17
  157. data_designer/interface/errors.py +2 -0
  158. data_designer/interface/results.py +5 -2
  159. data_designer/lazy_heavy_imports.py +54 -0
  160. data_designer/logging.py +2 -0
  161. data_designer/plugins/__init__.py +2 -0
  162. data_designer/plugins/errors.py +2 -0
  163. data_designer/plugins/plugin.py +0 -1
  164. data_designer/plugins/registry.py +2 -0
  165. data_designer/plugins/testing/__init__.py +2 -0
  166. data_designer/plugins/testing/stubs.py +21 -43
  167. data_designer/plugins/testing/utils.py +2 -0
  168. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/METADATA +12 -5
  169. data_designer-0.3.6.dist-info/RECORD +196 -0
  170. data_designer-0.3.4.dist-info/RECORD +0 -194
  171. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/WHEEL +0 -0
  172. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/entry_points.txt +0 -0
  173. {data_designer-0.3.4.dist-info → data_designer-0.3.6.dist-info}/licenses/LICENSE +0 -0
data_designer/__init__.py CHANGED
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  try:
5
7
  from data_designer._version import __version__
6
8
  except ImportError:
data_designer/_version.py CHANGED
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.4'
32
- __version_tuple__ = version_tuple = (0, 3, 4)
31
+ __version__ = version = '0.3.6'
32
+ __version_tuple__ = version_tuple = (0, 3, 6)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.main import app, main
5
7
 
6
8
  __all__ = ["app", "main"]
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import typer
5
7
 
6
8
  from data_designer.cli.controllers.download_controller import DownloadController
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from rich.table import Table
5
7
 
6
8
  from data_designer.cli.repositories.model_repository import ModelRepository
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.controllers.model_controller import ModelController
5
7
  from data_designer.config.utils.constants import DATA_DESIGNER_HOME
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.controllers.provider_controller import ProviderController
5
7
  from data_designer.config.utils.constants import DATA_DESIGNER_HOME
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import typer
5
7
 
6
8
  from data_designer.cli.repositories.model_repository import ModelRepository
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.controllers.download_controller import DownloadController
5
7
  from data_designer.cli.controllers.model_controller import ModelController
6
8
  from data_designer.cli.controllers.provider_controller import ProviderController
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import subprocess
5
7
  from pathlib import Path
6
8
 
@@ -1,7 +1,10 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from pathlib import Path
7
+ from typing import TYPE_CHECKING
5
8
 
6
9
  from data_designer.cli.forms.model_builder import ModelFormBuilder
7
10
  from data_designer.cli.repositories.model_repository import ModelRepository
@@ -20,7 +23,9 @@ from data_designer.cli.ui import (
20
23
  print_warning,
21
24
  select_with_arrows,
22
25
  )
23
- from data_designer.config.models import ModelConfig
26
+
27
+ if TYPE_CHECKING:
28
+ from data_designer.config.models import ModelConfig
24
29
 
25
30
 
26
31
  class ModelController:
@@ -1,8 +1,11 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import copy
5
7
  from pathlib import Path
8
+ from typing import TYPE_CHECKING
6
9
 
7
10
  from data_designer.cli.forms.provider_builder import ProviderFormBuilder
8
11
  from data_designer.cli.repositories.model_repository import ModelRepository
@@ -20,7 +23,9 @@ from data_designer.cli.ui import (
20
23
  print_warning,
21
24
  select_with_arrows,
22
25
  )
23
- from data_designer.engine.model_provider import ModelProvider
26
+
27
+ if TYPE_CHECKING:
28
+ from data_designer.engine.model_provider import ModelProvider
24
29
 
25
30
 
26
31
  class ProviderController:
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.forms.builder import FormBuilder
5
7
  from data_designer.cli.forms.field import Field, NumericField, SelectField, TextField, ValidationError
6
8
  from data_designer.cli.forms.form import Form
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
5
7
  from typing import Any, Generic, TypeVar
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
5
7
  from collections.abc import Callable
6
8
  from typing import Any, Generic, TypeVar
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from typing import Any
5
7
 
6
8
  from data_designer.cli.forms.field import Field
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from typing import Any
5
7
 
6
8
  from data_designer.cli.forms.builder import FormBuilder
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from typing import Any
5
7
 
6
8
  from data_designer.cli.forms.builder import FormBuilder
data_designer/cli/main.py CHANGED
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import typer
5
7
 
6
8
  from data_designer.cli.commands import download, models, providers, reset
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.repositories.base import ConfigRepository
5
7
  from data_designer.cli.repositories.model_repository import ModelRepository
6
8
  from data_designer.cli.repositories.provider_repository import ProviderRepository
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC, abstractmethod
5
7
  from pathlib import Path
6
8
  from typing import Generic, TypeVar
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from pathlib import Path
5
7
 
6
8
  from pydantic import BaseModel
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from pydantic import BaseModel
5
7
 
6
8
  from data_designer.config.utils.constants import (
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from pathlib import Path
5
7
 
6
8
  from pydantic import BaseModel
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.services.download_service import DownloadService
5
7
  from data_designer.cli.services.model_service import ModelService
6
8
  from data_designer.cli.services.provider_service import ProviderService
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import glob
5
7
  import shutil
6
8
  import subprocess
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.repositories.model_repository import ModelConfigRegistry, ModelRepository
5
7
  from data_designer.config.models import ModelConfig
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.cli.repositories.provider_repository import ModelProviderRegistry, ProviderRepository
5
7
  from data_designer.config.models import ModelProvider
6
8
 
data_designer/cli/ui.py CHANGED
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from collections.abc import Callable
5
7
 
6
8
  from prompt_toolkit import Application, prompt
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import shutil
5
7
  import subprocess
6
8
 
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from abc import ABC
5
7
  from enum import Enum
6
8
 
@@ -5,9 +5,8 @@ from __future__ import annotations
5
5
 
6
6
  from abc import ABC, abstractmethod
7
7
  from enum import Enum
8
- from typing import Any, Literal
8
+ from typing import TYPE_CHECKING, Any, Literal
9
9
 
10
- from pandas import Series
11
10
  from pydantic import BaseModel, ConfigDict, create_model, field_validator, model_validator
12
11
  from typing_extensions import Self, TypeAlias
13
12
 
@@ -15,8 +14,12 @@ from data_designer.config.column_types import DataDesignerColumnType
15
14
  from data_designer.config.sampler_params import SamplerType
16
15
  from data_designer.config.utils.constants import EPSILON
17
16
  from data_designer.config.utils.numerical_helpers import is_float, is_int, prepare_number_for_reporting
17
+ from data_designer.lazy_heavy_imports import pd
18
18
  from data_designer.plugin_manager import PluginManager
19
19
 
20
+ if TYPE_CHECKING:
21
+ import pandas as pd
22
+
20
23
 
21
24
  class MissingValue(str, Enum):
22
25
  CALCULATION_FAILED = "--"
@@ -314,7 +317,7 @@ class CategoricalHistogramData(BaseModel):
314
317
  return self
315
318
 
316
319
  @classmethod
317
- def from_series(cls, series: Series) -> Self:
320
+ def from_series(cls, series: pd.Series) -> Self:
318
321
  counts = series.value_counts()
319
322
  return cls(categories=counts.index.tolist(), counts=counts.tolist())
320
323
 
@@ -337,7 +340,7 @@ class CategoricalDistribution(BaseModel):
337
340
  return str(v) if not is_int(v) else prepare_number_for_reporting(v, int)
338
341
 
339
342
  @classmethod
340
- def from_series(cls, series: Series) -> Self:
343
+ def from_series(cls, series: pd.Series) -> Self:
341
344
  counts = series.value_counts()
342
345
  return cls(
343
346
  most_common_value=counts.index[0],
@@ -368,7 +371,7 @@ class NumericalDistribution(BaseModel):
368
371
  return prepare_number_for_reporting(v, int if is_int(v) else float)
369
372
 
370
373
  @classmethod
371
- def from_series(cls, series: Series) -> Self:
374
+ def from_series(cls, series: pd.Series) -> Self:
372
375
  return cls(
373
376
  min=series.min(skipna=True),
374
377
  max=series.max(skipna=True),
@@ -1,19 +1,25 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from functools import cached_property
5
7
  from pathlib import Path
6
- from typing import Annotated
8
+ from typing import TYPE_CHECKING, Annotated
7
9
 
8
10
  from pydantic import BaseModel, Field, field_validator
9
11
 
10
12
  from data_designer.config.analysis.column_profilers import ColumnProfilerResultsT
11
13
  from data_designer.config.analysis.column_statistics import ColumnStatisticsT
12
- from data_designer.config.analysis.utils.reporting import ReportSection, generate_analysis_report
13
- from data_designer.config.column_types import DataDesignerColumnType, get_column_display_order
14
+ from data_designer.config.analysis.utils.reporting import generate_analysis_report
15
+ from data_designer.config.column_types import get_column_display_order
14
16
  from data_designer.config.utils.constants import EPSILON
15
17
  from data_designer.config.utils.numerical_helpers import prepare_number_for_reporting
16
18
 
19
+ if TYPE_CHECKING:
20
+ from data_designer.config.analysis.utils.reporting import ReportSection
21
+ from data_designer.config.column_types import DataDesignerColumnType
22
+
17
23
 
18
24
  class DatasetProfilerResults(BaseModel):
19
25
  """Container for complete dataset profiling and analysis results.
@@ -1,6 +1,8 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  from data_designer.errors import DataDesignerError
5
7
 
6
8
 
@@ -14,9 +14,12 @@ from rich.rule import Rule
14
14
  from rich.table import Column, Table
15
15
  from rich.text import Text
16
16
 
17
- from data_designer.config.analysis.column_statistics import CategoricalHistogramData
18
17
  from data_designer.config.analysis.utils.errors import AnalysisReportError
19
- from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP, DataDesignerColumnType, get_column_display_order
18
+ from data_designer.config.column_types import (
19
+ DataDesignerColumnType,
20
+ get_column_display_order,
21
+ get_column_emoji_from_type,
22
+ )
20
23
  from data_designer.config.utils.visualization import (
21
24
  ColorPalette,
22
25
  convert_to_row_element,
@@ -25,6 +28,7 @@ from data_designer.config.utils.visualization import (
25
28
  )
26
29
 
27
30
  if TYPE_CHECKING:
31
+ from data_designer.config.analysis.column_statistics import CategoricalHistogramData
28
32
  from data_designer.config.analysis.dataset_profiler import DatasetProfilerResults
29
33
 
30
34
  HEADER_STYLE = "dim"
@@ -101,7 +105,7 @@ def generate_analysis_report(
101
105
  displayed_column_types.add(column_type)
102
106
  column_label = column_type.replace("_", " ").title().replace("Llm", "LLM")
103
107
  table = Table(
104
- title=f"{COLUMN_TYPE_EMOJI_MAP[column_type]} {column_label} Columns",
108
+ title=f"{get_column_emoji_from_type(column_type)} {column_label} Columns",
105
109
  **table_kws,
106
110
  )
107
111
 
@@ -1,7 +1,9 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from abc import ABC
4
+ from __future__ import annotations
5
+
6
+ from abc import ABC, abstractmethod
5
7
  from typing import Annotated, Literal
6
8
 
7
9
  from pydantic import BaseModel, Discriminator, Field, model_validator
@@ -13,7 +15,7 @@ from data_designer.config.models import ImageContext
13
15
  from data_designer.config.sampler_params import SamplerParamsT, SamplerType
14
16
  from data_designer.config.utils.code_lang import CodeLang
15
17
  from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
16
- from data_designer.config.utils.misc import assert_valid_jinja2_template, get_prompt_template_keywords
18
+ from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
17
19
  from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
18
20
 
19
21
 
@@ -35,7 +37,12 @@ class SingleColumnConfig(ConfigBase, ABC):
35
37
  drop: bool = False
36
38
  column_type: str
37
39
 
40
+ @staticmethod
41
+ def get_column_emoji() -> str:
42
+ return "🎨"
43
+
38
44
  @property
45
+ @abstractmethod
39
46
  def required_columns(self) -> list[str]:
40
47
  """Returns a list of column names that must exist before this column can be generated.
41
48
 
@@ -43,9 +50,9 @@ class SingleColumnConfig(ConfigBase, ABC):
43
50
  List of column names that this column depends on. Empty list indicates
44
51
  no dependencies. Override in subclasses to specify dependencies.
45
52
  """
46
- return []
47
53
 
48
54
  @property
55
+ @abstractmethod
49
56
  def side_effect_columns(self) -> list[str]:
50
57
  """Returns a list of additional columns that this column will create as a side effect.
51
58
 
@@ -56,7 +63,6 @@ class SingleColumnConfig(ConfigBase, ABC):
56
63
  List of column names that this column will create as a side effect. Empty list
57
64
  indicates no side effect columns. Override in subclasses to specify side effects.
58
65
  """
59
- return []
60
66
 
61
67
 
62
68
  class SamplerColumnConfig(SingleColumnConfig):
@@ -94,6 +100,18 @@ class SamplerColumnConfig(SingleColumnConfig):
94
100
  convert_to: str | None = None
95
101
  column_type: Literal["sampler"] = "sampler"
96
102
 
103
+ @staticmethod
104
+ def get_column_emoji() -> str:
105
+ return "🎲"
106
+
107
+ @property
108
+ def required_columns(self) -> list[str]:
109
+ return []
110
+
111
+ @property
112
+ def side_effect_columns(self) -> list[str]:
113
+ return []
114
+
97
115
  @model_validator(mode="before")
98
116
  @classmethod
99
117
  def inject_sampler_type_into_params(cls, data: dict) -> dict:
@@ -150,6 +168,10 @@ class LLMTextColumnConfig(SingleColumnConfig):
150
168
  multi_modal_context: list[ImageContext] | None = None
151
169
  column_type: Literal["llm-text"] = "llm-text"
152
170
 
171
+ @staticmethod
172
+ def get_column_emoji() -> str:
173
+ return "📝"
174
+
153
175
  @property
154
176
  def required_columns(self) -> list[str]:
155
177
  """Get columns referenced in the prompt and system_prompt templates.
@@ -157,9 +179,9 @@ class LLMTextColumnConfig(SingleColumnConfig):
157
179
  Returns:
158
180
  List of unique column names referenced in Jinja2 templates.
159
181
  """
160
- required_cols = list(get_prompt_template_keywords(self.prompt))
182
+ required_cols = list(extract_keywords_from_jinja2_template(self.prompt))
161
183
  if self.system_prompt:
162
- required_cols.extend(list(get_prompt_template_keywords(self.system_prompt)))
184
+ required_cols.extend(list(extract_keywords_from_jinja2_template(self.system_prompt)))
163
185
  return list(set(required_cols))
164
186
 
165
187
  @property
@@ -207,6 +229,10 @@ class LLMCodeColumnConfig(LLMTextColumnConfig):
207
229
  code_lang: CodeLang
208
230
  column_type: Literal["llm-code"] = "llm-code"
209
231
 
232
+ @staticmethod
233
+ def get_column_emoji() -> str:
234
+ return "💻"
235
+
210
236
 
211
237
  class LLMStructuredColumnConfig(LLMTextColumnConfig):
212
238
  """Configuration for structured JSON generation columns using Large Language Models.
@@ -225,6 +251,10 @@ class LLMStructuredColumnConfig(LLMTextColumnConfig):
225
251
  output_format: dict | type[BaseModel]
226
252
  column_type: Literal["llm-structured"] = "llm-structured"
227
253
 
254
+ @staticmethod
255
+ def get_column_emoji() -> str:
256
+ return "🗂️"
257
+
228
258
  @model_validator(mode="after")
229
259
  def validate_output_format(self) -> Self:
230
260
  """Convert Pydantic model to JSON schema if needed.
@@ -275,6 +305,10 @@ class LLMJudgeColumnConfig(LLMTextColumnConfig):
275
305
  scores: list[Score] = Field(..., min_length=1)
276
306
  column_type: Literal["llm-judge"] = "llm-judge"
277
307
 
308
+ @staticmethod
309
+ def get_column_emoji() -> str:
310
+ return "⚖️"
311
+
278
312
 
279
313
  class ExpressionColumnConfig(SingleColumnConfig):
280
314
  """Configuration for derived columns using Jinja2 expressions.
@@ -297,10 +331,18 @@ class ExpressionColumnConfig(SingleColumnConfig):
297
331
  dtype: Literal["int", "float", "str", "bool"] = "str"
298
332
  column_type: Literal["expression"] = "expression"
299
333
 
334
+ @staticmethod
335
+ def get_column_emoji() -> str:
336
+ return "🧩"
337
+
300
338
  @property
301
339
  def required_columns(self) -> list[str]:
302
340
  """Returns the columns referenced in the expression template."""
303
- return list(get_prompt_template_keywords(self.expr))
341
+ return list(extract_keywords_from_jinja2_template(self.expr))
342
+
343
+ @property
344
+ def side_effect_columns(self) -> list[str]:
345
+ return []
304
346
 
305
347
  @model_validator(mode="after")
306
348
  def assert_expression_valid_jinja(self) -> Self:
@@ -359,11 +401,19 @@ class ValidationColumnConfig(SingleColumnConfig):
359
401
  batch_size: int = Field(default=10, ge=1, description="Number of records to process in each batch")
360
402
  column_type: Literal["validation"] = "validation"
361
403
 
404
+ @staticmethod
405
+ def get_column_emoji() -> str:
406
+ return "🔍"
407
+
362
408
  @property
363
409
  def required_columns(self) -> list[str]:
364
410
  """Returns the columns that need to be validated."""
365
411
  return self.target_columns
366
412
 
413
+ @property
414
+ def side_effect_columns(self) -> list[str]:
415
+ return []
416
+
367
417
 
368
418
  class SeedDatasetColumnConfig(SingleColumnConfig):
369
419
  """Configuration for columns sourced from seed datasets.
@@ -378,6 +428,18 @@ class SeedDatasetColumnConfig(SingleColumnConfig):
378
428
 
379
429
  column_type: Literal["seed-dataset"] = "seed-dataset"
380
430
 
431
+ @staticmethod
432
+ def get_column_emoji() -> str:
433
+ return "🌱"
434
+
435
+ @property
436
+ def required_columns(self) -> list[str]:
437
+ return []
438
+
439
+ @property
440
+ def side_effect_columns(self) -> list[str]:
441
+ return []
442
+
381
443
 
382
444
  class EmbeddingColumnConfig(SingleColumnConfig):
383
445
  """Configuration for embedding generation columns.
@@ -395,6 +457,14 @@ class EmbeddingColumnConfig(SingleColumnConfig):
395
457
  model_alias: str
396
458
  column_type: Literal["embedding"] = "embedding"
397
459
 
460
+ @staticmethod
461
+ def get_column_emoji() -> str:
462
+ return "🧬"
463
+
398
464
  @property
399
465
  def required_columns(self) -> list[str]:
400
466
  return [self.target_column]
467
+
468
+ @property
469
+ def side_effect_columns(self) -> list[str]:
470
+ return []