data-designer 0.3.6__tar.gz → 0.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (444) hide show
  1. data_designer-0.3.8/.claude/agents/docs-searcher.md +74 -0
  2. data_designer-0.3.8/.claude/agents/github-searcher.md +81 -0
  3. data_designer-0.3.8/.claude/settings.json +1 -0
  4. data_designer-0.3.8/.claude/settings.local.json +22 -0
  5. data_designer-0.3.8/.claude/skills/new-sdg/SKILL.md +117 -0
  6. data_designer-0.3.8/.claude/skills/search-docs/SKILL.md +16 -0
  7. data_designer-0.3.8/.claude/skills/search-github/SKILL.md +16 -0
  8. {data_designer-0.3.6 → data_designer-0.3.8}/.gitignore +0 -2
  9. {data_designer-0.3.6 → data_designer-0.3.8}/PKG-INFO +1 -1
  10. data_designer-0.3.8/docs/code_reference/run_config.md +6 -0
  11. {data_designer-0.3.6 → data_designer-0.3.8}/docs/recipes/cards.md +3 -0
  12. {data_designer-0.3.6 → data_designer-0.3.8}/mkdocs.yml +1 -0
  13. data_designer-0.3.8/packages/data-designer/src/data_designer/interface/_version.py +34 -0
  14. data_designer-0.3.8/packages/data-designer-config/src/data_designer/config/_version.py +34 -0
  15. data_designer-0.3.8/packages/data-designer-engine/src/data_designer/engine/_version.py +34 -0
  16. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/_version.py +2 -2
  17. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/default_model_settings.py +14 -6
  18. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/run_config.py +3 -0
  19. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/constants.py +2 -0
  20. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/llm_completion.py +15 -3
  21. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/compiler.py +32 -1
  22. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/column_wise_builder.py +2 -5
  23. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -3
  24. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/litellm_overrides.py +28 -22
  25. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/utils.py +15 -8
  26. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +7 -3
  27. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/interface/data_designer.py +8 -5
  28. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/controllers/test_download_controller.py +7 -3
  29. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/repositories/test_persona_repository.py +3 -3
  30. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/services/test_download_service.py +3 -1
  31. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_default_model_settings.py +76 -6
  32. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_llm_completion_generators.py +79 -0
  33. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/test_column_wise_builder.py +22 -16
  34. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_litellm_overrides.py +1 -5
  35. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_compiler.py +68 -2
  36. {data_designer-0.3.6 → data_designer-0.3.8}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  37. {data_designer-0.3.6 → data_designer-0.3.8}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  38. {data_designer-0.3.6 → data_designer-0.3.8}/.github/ISSUE_TEMPLATE/development-task.yml +0 -0
  39. {data_designer-0.3.6 → data_designer-0.3.8}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
  40. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/build-docs.yml +0 -0
  41. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/build-notebooks.yml +0 -0
  42. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/check-colab-notebooks.yml +0 -0
  43. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/ci.yml +0 -0
  44. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/dco-assistant.yml +0 -0
  45. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/pack-tutorials.yml +0 -0
  46. {data_designer-0.3.6 → data_designer-0.3.8}/.github/workflows/semantic-pull-requests.yml +0 -0
  47. {data_designer-0.3.6 → data_designer-0.3.8}/.pre-commit-config.yaml +0 -0
  48. {data_designer-0.3.6 → data_designer-0.3.8}/AGENTS.md +0 -0
  49. {data_designer-0.3.6 → data_designer-0.3.8}/CLAUDE.md +0 -0
  50. {data_designer-0.3.6 → data_designer-0.3.8}/CODE_OF_CONDUCT.md +0 -0
  51. {data_designer-0.3.6 → data_designer-0.3.8}/CONTRIBUTING.md +0 -0
  52. {data_designer-0.3.6 → data_designer-0.3.8}/DCO +0 -0
  53. {data_designer-0.3.6 → data_designer-0.3.8}/LICENSE +0 -0
  54. {data_designer-0.3.6 → data_designer-0.3.8}/Makefile +0 -0
  55. {data_designer-0.3.6 → data_designer-0.3.8}/README.md +0 -0
  56. {data_designer-0.3.6 → data_designer-0.3.8}/VERSIONING.md +0 -0
  57. {data_designer-0.3.6 → data_designer-0.3.8}/docs/CONTRIBUTING.md +0 -0
  58. {data_designer-0.3.6 → data_designer-0.3.8}/docs/assets/palette-favicon.png +0 -0
  59. {data_designer-0.3.6 → data_designer-0.3.8}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
  60. {data_designer-0.3.6 → data_designer-0.3.8}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
  61. {data_designer-0.3.6 → data_designer-0.3.8}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
  62. {data_designer-0.3.6 → data_designer-0.3.8}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
  63. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/analysis.md +0 -0
  64. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/column_configs.md +0 -0
  65. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/config_builder.md +0 -0
  66. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/data_designer_config.md +0 -0
  67. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/models.md +0 -0
  68. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/processors.md +0 -0
  69. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/sampler_params.md +0 -0
  70. {data_designer-0.3.6 → data_designer-0.3.8}/docs/code_reference/validator_params.md +0 -0
  71. {data_designer-0.3.6 → data_designer-0.3.8}/docs/colab_notebooks/1-the-basics.ipynb +0 -0
  72. {data_designer-0.3.6 → data_designer-0.3.8}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
  73. {data_designer-0.3.6 → data_designer-0.3.8}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -0
  74. {data_designer-0.3.6 → data_designer-0.3.8}/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -0
  75. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/columns.md +0 -0
  76. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
  77. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/custom-model-settings.md +0 -0
  78. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/default-model-settings.md +0 -0
  79. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/inference-parameters.md +0 -0
  80. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/model-configs.md +0 -0
  81. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/models/model-providers.md +0 -0
  82. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/person_sampling.md +0 -0
  83. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/processors.md +0 -0
  84. {data_designer-0.3.6 → data_designer-0.3.8}/docs/concepts/validators.md +0 -0
  85. {data_designer-0.3.6 → data_designer-0.3.8}/docs/css/mkdocstrings.css +0 -0
  86. {data_designer-0.3.6 → data_designer-0.3.8}/docs/css/style.css +0 -0
  87. {data_designer-0.3.6 → data_designer-0.3.8}/docs/images/top-models.png +0 -0
  88. {data_designer-0.3.6 → data_designer-0.3.8}/docs/index.md +0 -0
  89. {data_designer-0.3.6 → data_designer-0.3.8}/docs/installation.md +0 -0
  90. {data_designer-0.3.6 → data_designer-0.3.8}/docs/js/toc-toggle.js +0 -0
  91. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/1-the-basics.py +0 -0
  92. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
  93. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
  94. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/4-providing-images-as-context.py +0 -0
  95. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/README.md +0 -0
  96. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/_README.md +0 -0
  97. {data_designer-0.3.6 → data_designer-0.3.8}/docs/notebook_source/_pyproject.toml +0 -0
  98. {data_designer-0.3.6 → data_designer-0.3.8}/docs/overrides/main.html +0 -0
  99. {data_designer-0.3.6 → data_designer-0.3.8}/docs/plugins/available.md +0 -0
  100. {data_designer-0.3.6 → data_designer-0.3.8}/docs/plugins/example.md +0 -0
  101. {data_designer-0.3.6 → data_designer-0.3.8}/docs/plugins/overview.md +0 -0
  102. {data_designer-0.3.6 → data_designer-0.3.8}/docs/quick-start.md +0 -0
  103. {data_designer-0.3.6 → data_designer-0.3.8}/docs/recipes/code_generation/text_to_python.md +0 -0
  104. {data_designer-0.3.6 → data_designer-0.3.8}/docs/recipes/code_generation/text_to_sql.md +0 -0
  105. {data_designer-0.3.6 → data_designer-0.3.8}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
  106. {data_designer-0.3.6 → data_designer-0.3.8}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
  107. {data_designer-0.3.6 → data_designer-0.3.8}/docs/scripts/generate_colab_notebooks.py +0 -0
  108. {data_designer-0.3.6 → data_designer-0.3.8}/pyproject.toml +0 -0
  109. {data_designer-0.3.6 → data_designer-0.3.8}/scripts/test_license_headers.py +0 -0
  110. {data_designer-0.3.6 → data_designer-0.3.8}/scripts/update_license_headers.py +0 -0
  111. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/__init__.py +0 -0
  112. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/README.md +0 -0
  113. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/__init__.py +0 -0
  114. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/__init__.py +0 -0
  115. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/download.py +0 -0
  116. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/list.py +0 -0
  117. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/models.py +0 -0
  118. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/providers.py +0 -0
  119. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/commands/reset.py +0 -0
  120. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/controllers/__init__.py +0 -0
  121. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/controllers/download_controller.py +0 -0
  122. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/controllers/model_controller.py +0 -0
  123. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/controllers/provider_controller.py +0 -0
  124. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/__init__.py +0 -0
  125. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/builder.py +0 -0
  126. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/field.py +0 -0
  127. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/form.py +0 -0
  128. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/model_builder.py +0 -0
  129. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/forms/provider_builder.py +0 -0
  130. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/main.py +0 -0
  131. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/repositories/__init__.py +0 -0
  132. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/repositories/base.py +0 -0
  133. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/repositories/model_repository.py +0 -0
  134. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/repositories/persona_repository.py +0 -0
  135. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/repositories/provider_repository.py +0 -0
  136. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/services/__init__.py +0 -0
  137. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/services/download_service.py +0 -0
  138. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/services/model_service.py +0 -0
  139. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/services/provider_service.py +0 -0
  140. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/ui.py +0 -0
  141. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/cli/utils.py +0 -0
  142. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/__init__.py +0 -0
  143. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/__init__.py +0 -0
  144. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/column_profilers.py +0 -0
  145. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/column_statistics.py +0 -0
  146. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
  147. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/utils/errors.py +0 -0
  148. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/analysis/utils/reporting.py +0 -0
  149. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/base.py +0 -0
  150. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/column_configs.py +0 -0
  151. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/column_types.py +0 -0
  152. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/config_builder.py +0 -0
  153. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/data_designer_config.py +0 -0
  154. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/dataset_builders.py +0 -0
  155. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/dataset_metadata.py +0 -0
  156. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/errors.py +0 -0
  157. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/exports.py +0 -0
  158. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/interface.py +0 -0
  159. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/models.py +0 -0
  160. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/preview_results.py +0 -0
  161. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/processors.py +0 -0
  162. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/sampler_constraints.py +0 -0
  163. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/sampler_params.py +0 -0
  164. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/seed.py +0 -0
  165. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/seed_source.py +0 -0
  166. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/seed_source_types.py +0 -0
  167. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/code_lang.py +0 -0
  168. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/errors.py +0 -0
  169. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/info.py +0 -0
  170. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/io_helpers.py +0 -0
  171. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/misc.py +0 -0
  172. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/numerical_helpers.py +0 -0
  173. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/type_helpers.py +0 -0
  174. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/utils/visualization.py +0 -0
  175. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/config/validator_params.py +0 -0
  176. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/__init__.py +0 -0
  177. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
  178. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
  179. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  180. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/column_statistics.py +0 -0
  181. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
  182. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/errors.py +0 -0
  183. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
  184. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
  185. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/__init__.py +0 -0
  186. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
  187. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/base.py +0 -0
  188. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
  189. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  190. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  191. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  192. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  193. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/registry.py +0 -0
  194. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  195. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
  196. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
  197. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  198. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/configurable_task.py +0 -0
  199. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
  200. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  201. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  202. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
  203. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  204. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  205. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  206. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  207. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/errors.py +0 -0
  208. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/model_provider.py +0 -0
  209. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/__init__.py +0 -0
  210. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/errors.py +0 -0
  211. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/facade.py +0 -0
  212. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/factory.py +0 -0
  213. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/__init__.py +0 -0
  214. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/errors.py +0 -0
  215. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/parser.py +0 -0
  216. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
  217. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  218. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/parsers/types.py +0 -0
  219. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/recipes/base.py +0 -0
  220. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  221. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/registry.py +0 -0
  222. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/telemetry.py +0 -0
  223. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/usage.py +0 -0
  224. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/models/utils.py +0 -0
  225. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
  226. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/ginja/ast.py +0 -0
  227. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  228. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  229. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/ginja/record.py +0 -0
  230. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
  231. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  232. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  233. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  234. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  235. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/processors/base.py +0 -0
  236. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
  237. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/processors/registry.py +0 -0
  238. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
  239. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/registry/base.py +0 -0
  240. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  241. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/registry/errors.py +0 -0
  242. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  243. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  244. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/resources/managed_storage.py +0 -0
  245. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/resources/resource_provider.py +0 -0
  246. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/resources/seed_reader.py +0 -0
  247. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/column.py +0 -0
  248. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
  249. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
  250. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  251. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  252. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
  253. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  254. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  255. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  256. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  257. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  258. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
  259. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  260. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  261. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  262. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
  263. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  264. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  265. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  266. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  267. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/secret_resolver.py +0 -0
  268. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validation.py +0 -0
  269. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/__init__.py +0 -0
  270. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/base.py +0 -0
  271. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/local_callable.py +0 -0
  272. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/python.py +0 -0
  273. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/remote.py +0 -0
  274. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/engine/validators/sql.py +0 -0
  275. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/errors.py +0 -0
  276. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/essentials/__init__.py +0 -0
  277. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/interface/__init__.py +0 -0
  278. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/interface/errors.py +0 -0
  279. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/interface/results.py +0 -0
  280. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/lazy_heavy_imports.py +0 -0
  281. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/logging.py +0 -0
  282. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugin_manager.py +0 -0
  283. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/__init__.py +0 -0
  284. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/errors.py +0 -0
  285. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/plugin.py +0 -0
  286. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/registry.py +0 -0
  287. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/testing/__init__.py +0 -0
  288. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/testing/stubs.py +0 -0
  289. {data_designer-0.3.6 → data_designer-0.3.8}/src/data_designer/plugins/testing/utils.py +0 -0
  290. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/commands/test_download_command.py +0 -0
  291. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/commands/test_list_command.py +0 -0
  292. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/commands/test_models_command.py +0 -0
  293. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/commands/test_providers_command.py +0 -0
  294. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/commands/test_reset_command.py +0 -0
  295. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/conftest.py +0 -0
  296. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/controllers/test_model_controller.py +0 -0
  297. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/controllers/test_provider_controller.py +0 -0
  298. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/forms/test_field.py +0 -0
  299. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/forms/test_form.py +0 -0
  300. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/forms/test_model_builder.py +0 -0
  301. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/forms/test_provider_builder.py +0 -0
  302. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/repositories/test_model_repository.py +0 -0
  303. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/repositories/test_provider_repository.py +0 -0
  304. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/services/test_model_service.py +0 -0
  305. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/services/test_provider_service.py +0 -0
  306. {data_designer-0.3.6 → data_designer-0.3.8}/tests/cli/test_cli_utils.py +0 -0
  307. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/analysis/conftest.py +0 -0
  308. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/analysis/test_column_statistics.py +0 -0
  309. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
  310. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/analysis/utils/test_reporting.py +0 -0
  311. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_columns.py +0 -0
  312. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_config_builder.py +0 -0
  313. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_data_designer_config.py +0 -0
  314. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_models.py +0 -0
  315. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_processors.py +0 -0
  316. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_sampler_constraints.py +0 -0
  317. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_sampler_params.py +0 -0
  318. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_seed.py +0 -0
  319. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_seed_source.py +0 -0
  320. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/test_validator_params.py +0 -0
  321. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/__init__.py +0 -0
  322. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_code_lang.py +0 -0
  323. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_info.py +0 -0
  324. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_io_helpers.py +0 -0
  325. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_misc.py +0 -0
  326. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_type_helpers.py +0 -0
  327. {data_designer-0.3.6 → data_designer-0.3.8}/tests/config/utils/test_visualization.py +0 -0
  328. {data_designer-0.3.6 → data_designer-0.3.8}/tests/conftest.py +0 -0
  329. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/column_profilers/test_base.py +0 -0
  330. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  331. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/conftest.py +0 -0
  332. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  333. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  334. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  335. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  336. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  337. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/test_errors.py +0 -0
  338. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  339. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  340. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/__init__.py +0 -0
  341. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  342. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_embedding.py +0 -0
  343. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_expression.py +0 -0
  344. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  345. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  346. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/generators/test_validation.py +0 -0
  347. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/test_registry.py +0 -0
  348. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  349. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
  350. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  351. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  352. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/conftest.py +0 -0
  353. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  354. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  355. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  356. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  357. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  358. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  359. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/conftest.py +0 -0
  360. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/parsers/test_parser.py +0 -0
  361. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  362. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  363. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  364. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  365. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  366. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/stub_secrets.json +0 -0
  367. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_facade.py +0 -0
  368. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_model_errors.py +0 -0
  369. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_model_registry.py +0 -0
  370. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_model_utils.py +0 -0
  371. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/models/test_usage.py +0 -0
  372. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/__init__.py +0 -0
  373. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/ginja/__init__.py +0 -0
  374. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/ginja/test_ast.py +0 -0
  375. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/ginja/test_environment.py +0 -0
  376. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  377. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/ginja/test_record.py +0 -0
  378. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/gsonschema/__init__.py +0 -0
  379. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  380. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  381. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/gsonschema/test_types.py +0 -0
  382. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  383. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/processors/__init__.py +0 -0
  384. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/processors/test_drop_columns.py +0 -0
  385. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/processors/test_registry.py +0 -0
  386. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/processors/test_schema_transform.py +0 -0
  387. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/processing/test_utils.py +0 -0
  388. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/registry/__init__.py +0 -0
  389. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/registry/conftest.py +0 -0
  390. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/registry/test_base.py +0 -0
  391. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/registry/test_data_designer_registry.py +0 -0
  392. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/registry/test_errors.py +0 -0
  393. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/__init__.py +0 -0
  394. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/conftest.py +0 -0
  395. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  396. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  397. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/test_managed_storage.py +0 -0
  398. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/test_resource_provider.py +0 -0
  399. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/resources/test_seed_reader.py +0 -0
  400. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/conftest.py +0 -0
  401. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  402. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  403. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  404. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  405. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  406. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  407. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_column.py +0 -0
  408. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_constraints.py +0 -0
  409. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_generator.py +0 -0
  410. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  411. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  412. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_schema.py +0 -0
  413. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/sampling_gen/test_utils.py +0 -0
  414. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_configurable_task.py +0 -0
  415. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_dataset_metadata.py +0 -0
  416. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_engine_errors.py +0 -0
  417. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_model_provider.py +0 -0
  418. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_secret_resolver.py +0 -0
  419. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/test_validation.py +0 -0
  420. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/validators/test_local_callable.py +0 -0
  421. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/validators/test_python.py +0 -0
  422. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/validators/test_remote.py +0 -0
  423. {data_designer-0.3.6 → data_designer-0.3.8}/tests/engine/validators/test_sql.py +0 -0
  424. {data_designer-0.3.6 → data_designer-0.3.8}/tests/essentials/test_init.py +0 -0
  425. {data_designer-0.3.6 → data_designer-0.3.8}/tests/interface/test_data_designer.py +0 -0
  426. {data_designer-0.3.6 → data_designer-0.3.8}/tests/interface/test_results.py +0 -0
  427. {data_designer-0.3.6 → data_designer-0.3.8}/tests/plugins/test_plugin.py +0 -0
  428. {data_designer-0.3.6 → data_designer-0.3.8}/tests/plugins/test_plugin_registry.py +0 -0
  429. {data_designer-0.3.6 → data_designer-0.3.8}/tests/test_import_perf.py +0 -0
  430. {data_designer-0.3.6 → data_designer-0.3.8}/tests/test_logging.py +0 -0
  431. {data_designer-0.3.6 → data_designer-0.3.8}/tests/test_plugin_manager.py +0 -0
  432. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/pyproject.toml +0 -0
  433. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/__init__.py +0 -0
  434. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/__init__.py +0 -0
  435. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/config.py +0 -0
  436. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/impl.py +0 -0
  437. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -0
  438. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/__init__.py +0 -0
  439. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -0
  440. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -0
  441. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -0
  442. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/tests/test_e2e.py +0 -0
  443. {data_designer-0.3.6 → data_designer-0.3.8}/tests_e2e/tests/test_seed.csv +0 -0
  444. {data_designer-0.3.6 → data_designer-0.3.8}/uv.lock +0 -0
@@ -0,0 +1,74 @@
1
+ ---
2
+ name: docs-searcher
3
+ description: Search local documentation in the docs/ folder for content related to a topic. Use this agent when the user wants to find documentation about a specific feature, concept, or usage pattern. Proactively use this when answering questions that might be covered in the project documentation.
4
+ tools: Glob, Grep, Read
5
+ model: haiku
6
+ permissionMode: bypassPermissions
7
+ ---
8
+
9
+ # Documentation Search Agent
10
+
11
+ You are a documentation search specialist. Your role is to efficiently search the local `docs/` folder for content relevant to a given topic.
12
+
13
+ ## Instructions
14
+
15
+ When given a search topic, perform the following searches:
16
+
17
+ 1. **Find all documentation files** in the docs/ folder:
18
+ ```
19
+ Glob pattern: "docs/**/*.md"
20
+ ```
21
+
22
+ 2. **Search for topic keywords** across all markdown files:
23
+ ```
24
+ Grep pattern: "<topic keywords>" in path: "docs/"
25
+ ```
26
+ - Try multiple variations of the search terms (singular/plural, related terms)
27
+ - Use case-insensitive search (`-i: true`)
28
+
29
+ 3. **Read relevant sections** from files with matches:
30
+ - Read the matched files to get full context
31
+ - Extract the most relevant sections around the matches
32
+
33
+ 4. **Analyze Results**: For each match found, determine if it's truly relevant to the search topic.
34
+
35
+ 5. **Output Format**: Return a structured markdown summary with:
36
+ - Links to relevant documentation files
37
+ - Brief excerpts showing the relevant content
38
+ - A sentence explaining why each result is pertinent
39
+
40
+ ## Output Template
41
+
42
+ ```markdown
43
+ ## Documentation Search Results for "<topic>"
44
+
45
+ ### Relevant Documentation
46
+
47
+ - **[docs/path/to/file.md](docs/path/to/file.md)**
48
+ > Brief excerpt showing relevant content...
49
+
50
+ Explanation of why this is relevant to the search topic.
51
+
52
+ - **[docs/another/file.md](docs/another/file.md)**
53
+ > Another relevant excerpt...
54
+
55
+ Explanation of relevance.
56
+
57
+ ### Summary
58
+ Brief summary of what was found and any recommendations for the user.
59
+ ```
60
+
61
+ ## Important Notes
62
+
63
+ - Only include results that are actually relevant to the search topic
64
+ - If no relevant documentation is found, clearly state that
65
+ - Keep excerpts concise but include enough context to be useful
66
+ - Prioritize user guides and examples over API reference when both exist
67
+ - If the docs/ folder doesn't exist or is empty, report that clearly
68
+
69
+ ## Search Strategy
70
+
71
+ 1. Start with exact keyword matches
72
+ 2. If few results, try related terms or partial matches
73
+ 3. Check file names for topic-related terms (e.g., searching "models" should check files named `models.md`, `model-config.md`, etc.)
74
+ 4. Look at section headings within files for topic mentions
@@ -0,0 +1,81 @@
1
+ ---
2
+ name: github-searcher
3
+ description: Search GitHub issues, discussions, and PRs for content related to a topic. Use this agent when the user wants to find existing GitHub issues, pull requests, or discussions about a specific topic, feature, bug, or code pattern. Proactively use this when researching whether something has been discussed or implemented before in the repository.
4
+ tools: Bash
5
+ model: haiku
6
+ permissionMode: bypassPermissions
7
+ ---
8
+
9
+ # GitHub Content Search Agent
10
+
11
+ You are a GitHub search specialist. Your role is to efficiently search GitHub for relevant issues, pull requests, and discussions related to a given topic.
12
+
13
+ ## Instructions
14
+
15
+ When given a search topic, perform the following searches:
16
+
17
+ 1. **Search Issues** using the `gh` CLI:
18
+ ```bash
19
+ gh issue list --search "<topic>" --limit 20 --json number,title,url,body,state
20
+ ```
21
+
22
+ 2. **Search Pull Requests** using the `gh` CLI:
23
+ ```bash
24
+ gh pr list --search "<topic>" --limit 20 --json number,title,url,body,state
25
+ ```
26
+
27
+ 3. **Search Discussions** using the `gh` CLI (if the repository has discussions enabled):
28
+ ```bash
29
+ gh api graphql -f query='
30
+ query($search: String!) {
31
+ search(query: $search, type: DISCUSSION, first: 20) {
32
+ nodes {
33
+ ... on Discussion {
34
+ title
35
+ url
36
+ body
37
+ category { name }
38
+ }
39
+ }
40
+ }
41
+ }
42
+ ' -f search="repo:{owner}/{repo} <topic>"
43
+ ```
44
+ Note: Get the owner/repo from `gh repo view --json nameWithOwner -q .nameWithOwner`
45
+
46
+ 4. **Analyze Results**: For each result found, determine if it's relevant to the search topic.
47
+
48
+ 5. **Output Format**: Return a markdown list with:
49
+ - A link to each relevant item (issue, PR, or discussion)
50
+ - A *single* sentence explaining why that link is pertinent to the search topic
51
+
52
+ ## Output Template
53
+
54
+ ```markdown
55
+ ## GitHub Search Results for "<topic>"
56
+
57
+ ### Issues
58
+ - [Issue #123: Title](url) - Brief explanation of relevance.
59
+ - [Issue #456: Title](url) - Brief explanation of relevance.
60
+
61
+ ### Pull Requests
62
+ - [PR #789: Title](url) - Brief explanation of relevance.
63
+
64
+ ### Discussions
65
+ - [Discussion: Title](url) - Brief explanation of relevance.
66
+ ```
67
+
68
+ ## Important Notes
69
+
70
+ - Only include results that are actually relevant to the search topic
71
+ - If a category (issues, PRs, discussions) has no relevant results, note "No relevant items found"
72
+ - Keep descriptions to a single sentence
73
+ - If discussions search fails (repository doesn't have discussions), skip that section
74
+ - Prioritize open items over closed ones, but include relevant closed items too
75
+
76
+ ## Command Guidelines
77
+
78
+ - **NEVER use pipes or shell fallbacks** like `|| echo "..."` or `| grep ...` in your commands
79
+ - Run each `gh` command directly without any error handling wrappers
80
+ - If a command returns an error or empty result, handle it in your analysis logic, not with shell constructs
81
+ - Run the three searches (issues, PRs, discussions) as separate Bash commands
@@ -0,0 +1 @@
1
+ {}
@@ -0,0 +1,22 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(uv run:*)",
5
+ "Bash(make install-dev-notebooks:*)",
6
+ "Bash(make lint:*)",
7
+ "Bash(curl:*)",
8
+ "Bash(uv pip:*)",
9
+ "Bash(make lint-fix:*)",
10
+ "Bash(mv:*)",
11
+ "Bash(make test:*)",
12
+ "Bash(make serve-docs-locally:*)",
13
+ "Bash(rm:*)",
14
+ "Bash(ls:*)",
15
+ "Bash(find:*)",
16
+ "Bash(git -C /Users/johnnygreco/projects/nvidia/DataDesigner diff --stat)",
17
+ "Bash(git cherry-pick:*)"
18
+ ],
19
+ "deny": [],
20
+ "ask": []
21
+ }
22
+ }
@@ -0,0 +1,117 @@
1
+ ---
2
+ name: new-sdg
3
+ description: Implement a new synthetic data generator using NeMo Data Designer by defining its configuration and executing a preview job.
4
+ argument-hint: <dataset-description>
5
+ ---
6
+
7
+ # Your Goal
8
+
9
+ Implement a new synthetic data generator using NeMo Data Designer to match the user's specifications below.
10
+
11
+ <dataset-description>
12
+ **$ARGUMENTS**
13
+ </dataset-description>
14
+
15
+ ## Getting Exact Specifications
16
+
17
+ The user will provide you with some description, but it is likely that you
18
+ do not have enough information to precisely define what they want. It is hard
19
+ for a user to define everything up front. Ask follow up questions to the user
20
+ using the AskUser tool to narrow down on precisely what they want.
21
+
22
+ Common things to make precise are:
23
+
24
+ - IMPORTANT: What the "axes of diversity" are -- e.g. what should be well represented and diverse in the resulting dataset.
25
+ - The kind an nature of any input data to the dataset.
26
+ - What variables should be randomized.
27
+ - The schema of the final dataset.
28
+ - The structure of any required structured output columns.
29
+ - What facets of the output dataset are important to capture.
30
+
31
+ ## Interactive, Iterative Design
32
+
33
+ > USER: Request
34
+ > YOU: Clarifying AskUser Questions
35
+ > YOU: Script Impelmentation (with preview)
36
+ > YOU: Script Execution
37
+ > YOU: Result Presentation
38
+ > YOU: Followup Questions
39
+ > USER: Respond
40
+ > YOU: ...repeat...
41
+
42
+ Very often, the initial implementation will not conform precisely to what the user wants. You are to engage in an **iterative design loop** with the user. As shown
43
+ in the example below, you will construct a configuration, then review its outputs,
44
+ present those outputs to the user, and ask follow up questions.
45
+
46
+ Depending on the user responses, you will then edit the script, re-run it, and present the user with the results and ask followups and so. When showing results to the user DO NOT SUMMARIZE content, it is *very important* that you show them the records as-is so they can make thoughtful decisions.
47
+
48
+ DO NOT disengage from this **iterative design loop** unless commanded by the user.
49
+
50
+
51
+ ## Implementing a NeMo Data Designer Synthetic Data Generator
52
+
53
+ - You will be writing a new python script for execution.
54
+ - The script should be made in the current working directory, so `$(pwd)/script-name.py`.
55
+ - Implement the script as a stand-alone, `uv`-executable script (https://docs.astral.sh/uv/guides/scripts/#creating-a-python-script).
56
+ - The script should depend on the latest version of `data-designer`.
57
+ - Include other third-party dependencies only if the job requires it.
58
+ - Model aliases are required when definining LLM generation columns.
59
+ - Before implementing, make sure to use the Explore tool to understand the src/ and docs/.
60
+ - Review available model aliases and providers.
61
+ - You will need to ask the user what Model Provider they want to use via AskUser tool.
62
+ - You may use Web Search to find any information you need to help you construct the SDG, since real-world grounding is key to a good dataset.
63
+ - If you need to use a large number of categories for a sampler, just build a pandas DataFrame and use it as a Seed dataset.
64
+
65
+ ### Model Alises and Providers
66
+
67
+ View known model aliases and providers with the following command. You will need a longer timeout on first run (package first-time boot).
68
+
69
+ ```bash
70
+ uv run --with data-designer data-designer config list
71
+ ```
72
+
73
+ ### Real World Seed Data
74
+
75
+ Depending on user requirements, you may need to access real-world datasets to serve as Seed datasets for your Data Designer SDG.
76
+ In these cases, you may use Web Search tools to search for datasets available on HuggingFace, and use the `datasets` python library
77
+ to load them. You will have to convert them to Pandas DataFrames in these cases.
78
+
79
+ If you do use real-world data, pay attention to file sizes and avoid large file transfers. Only download small sections of datasets or use a streaming option.
80
+
81
+ ### Example
82
+
83
+ ```python
84
+ # /// script
85
+ # dependencies = [
86
+ # "data-designer",
87
+ # ]
88
+ # ///
89
+
90
+ # ... data designer config_builder implementation
91
+
92
+ def build_config() -> DataDesignerConfigBuilder:
93
+ """Implements the definition of the synthetic data generator.
94
+ """
95
+ config_builder = DataDesignerConfigBuilder()
96
+
97
+ ## Add whatever columns need to be added
98
+ # config_builder.add_column(...)
99
+ # config_builder.add_column(...)
100
+ # config_builder.add_column(...)
101
+
102
+ return config_builder
103
+
104
+ if __name__ == "__main__":
105
+ config_builder = build_config()
106
+ designer = DataDesigner()
107
+ preview = designer.preview(config_builder=config_builder)
108
+
109
+ # The following command will print a random sample record
110
+ # which you can present to the user
111
+ preview.display_sample_record()
112
+
113
+ # The raw data is located in this Pandas DataFrame object.
114
+ # You can implenent code to display some or all of this
115
+ # to STDOUT so you can see the outputs and report to the user.
116
+ preview.dataset
117
+ ```
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: search-docs
3
+ description: Search local documentation in the docs/ folder for content related to a topic
4
+ argument-hint: <search-topic>
5
+ ---
6
+
7
+ # Documentation Search
8
+
9
+ Use the `docs-searcher` subagent to search local documentation for content related to: **$ARGUMENTS**
10
+
11
+ Call the Task tool with:
12
+ - `subagent_type: "docs-searcher"`
13
+ - `mode: "bypassPermissions"`
14
+ - `prompt`: the search topic
15
+
16
+ Report the results back to the user exactly as returned by the agent.
@@ -0,0 +1,16 @@
1
+ ---
2
+ name: search-github
3
+ description: Search GitHub issues, discussions, and PRs for content related to a topic
4
+ argument-hint: <search-topic>
5
+ ---
6
+
7
+ # GitHub Search
8
+
9
+ Use the `github-searcher` subagent to search GitHub for content related to: **$ARGUMENTS**
10
+
11
+ Call the Task tool with:
12
+ - `subagent_type: "github-searcher"`
13
+ - `mode: "bypassPermissions"`
14
+ - `prompt`: the search topic
15
+
16
+ Report the results back to the user exactly as returned by the agent.
@@ -85,8 +85,6 @@ src/data_designer/_version.py
85
85
  # Local scratch space
86
86
  .scratch/
87
87
 
88
- .claude/
89
-
90
88
  docs/notebooks/
91
89
  docs/notebook_source/*.ipynb
92
90
  docs/notebook_source/*.csv
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.3.6
3
+ Version: 0.3.8
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -0,0 +1,6 @@
1
+ # Run Config
2
+
3
+ The `run_config` module defines runtime settings that control dataset generation behavior,
4
+ including early shutdown thresholds, batch sizing, and non-inference worker concurrency.
5
+
6
+ :::: data_designer.config.run_config
@@ -6,6 +6,9 @@ Each recipe is a self-contained example that can be run independently.
6
6
  !!! question "New to Data Designer?"
7
7
  Recipes provide working code for specific use cases without detailed explanations. If you're learning Data Designer for the first time, we recommend starting with our [tutorial notebooks](../../notebooks/), which offer step-by-step guidance and explain core concepts. Once you're familiar with the basics, return here for practical, ready-to-use implementations.
8
8
 
9
+ !!! tip Prerequisite
10
+ These recipes use the Open AI model provider by default. Ensure your OpenAI model provider has been set up using the Data Designer CLI before running a recipe.
11
+
9
12
  <div class="grid cards" markdown>
10
13
 
11
14
  - :material-snake:{ .lg .middle } **Text to Python**
@@ -43,6 +43,7 @@ nav:
43
43
  - column_configs: code_reference/column_configs.md
44
44
  - config_builder: code_reference/config_builder.md
45
45
  - data_designer_config: code_reference/data_designer_config.md
46
+ - run_config: code_reference/run_config.md
46
47
  - sampler_params: code_reference/sampler_params.md
47
48
  - validator_params: code_reference/validator_params.md
48
49
  - processors: code_reference/processors.md
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.3.8rc3.dev39+gb6e375d29.d20260126'
32
+ __version_tuple__ = version_tuple = (0, 3, 8, 'rc3', 'dev39', 'gb6e375d29.d20260126')
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.3.8rc3.dev39+gb6e375d29.d20260126'
32
+ __version_tuple__ = version_tuple = (0, 3, 8, 'rc3', 'dev39', 'gb6e375d29.d20260126')
33
+
34
+ __commit_id__ = commit_id = None
@@ -0,0 +1,34 @@
1
+ # file generated by setuptools-scm
2
+ # don't change, don't track in version control
3
+
4
+ __all__ = [
5
+ "__version__",
6
+ "__version_tuple__",
7
+ "version",
8
+ "version_tuple",
9
+ "__commit_id__",
10
+ "commit_id",
11
+ ]
12
+
13
+ TYPE_CHECKING = False
14
+ if TYPE_CHECKING:
15
+ from typing import Tuple
16
+ from typing import Union
17
+
18
+ VERSION_TUPLE = Tuple[Union[int, str], ...]
19
+ COMMIT_ID = Union[str, None]
20
+ else:
21
+ VERSION_TUPLE = object
22
+ COMMIT_ID = object
23
+
24
+ version: str
25
+ __version__: str
26
+ __version_tuple__: VERSION_TUPLE
27
+ version_tuple: VERSION_TUPLE
28
+ commit_id: COMMIT_ID
29
+ __commit_id__: COMMIT_ID
30
+
31
+ __version__ = version = '0.3.8rc3.dev39+gb6e375d29.d20260126'
32
+ __version_tuple__ = version_tuple = (0, 3, 8, 'rc3', 'dev39', 'gb6e375d29.d20260126')
33
+
34
+ __commit_id__ = commit_id = None
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.6'
32
- __version_tuple__ = version_tuple = (0, 3, 6)
31
+ __version__ = version = '0.3.8'
32
+ __version_tuple__ = version_tuple = (0, 3, 8)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -71,12 +71,20 @@ def get_default_model_configs() -> list[ModelConfig]:
71
71
  return []
72
72
 
73
73
 
74
- def get_default_model_providers_missing_api_keys() -> list[str]:
75
- missing_api_keys = []
76
- for predefined_provider in PREDEFINED_PROVIDERS:
77
- if os.environ.get(predefined_provider["api_key"]) is None:
78
- missing_api_keys.append(predefined_provider["api_key"])
79
- return missing_api_keys
74
+ def get_providers_with_missing_api_keys(providers: list[ModelProvider]) -> list[ModelProvider]:
75
+ providers_with_missing_keys = []
76
+
77
+ for provider in providers:
78
+ if provider.api_key is None:
79
+ # No API key specified at all
80
+ providers_with_missing_keys.append(provider)
81
+ elif provider.api_key.isupper() and "_" in provider.api_key:
82
+ # Looks like an environment variable name, check if it's set
83
+ if os.environ.get(provider.api_key) is None:
84
+ providers_with_missing_keys.append(provider)
85
+ # else: It's an actual API key value (not an env var), so it's valid
86
+
87
+ return providers_with_missing_keys
80
88
 
81
89
 
82
90
  def get_default_providers() -> list[ModelProvider]:
@@ -26,6 +26,8 @@ class RunConfig(ConfigBase):
26
26
  buffer_size: Number of records to process in each batch during dataset generation.
27
27
  A batch is processed end-to-end (column generation, post-batch processors, and writing the batch
28
28
  to artifact storage) before moving on to the next batch. Must be > 0. Default is 1000.
29
+ non_inference_max_parallel_workers: Maximum number of worker threads used for non-inference
30
+ cell-by-cell generators. Must be >= 1. Default is 4.
29
31
  max_conversation_restarts: Maximum number of full conversation restarts permitted when
30
32
  generation tasks call `ModelFacade.generate(...)`. Must be >= 0. Default is 5.
31
33
  max_conversation_correction_steps: Maximum number of correction rounds permitted within a
@@ -37,6 +39,7 @@ class RunConfig(ConfigBase):
37
39
  shutdown_error_rate: float = Field(default=0.5, ge=0.0, le=1.0)
38
40
  shutdown_error_window: int = Field(default=10, ge=0)
39
41
  buffer_size: int = Field(default=1000, gt=0)
42
+ non_inference_max_parallel_workers: int = Field(default=4, ge=1)
40
43
  max_conversation_restarts: int = Field(default=5, ge=0)
41
44
  max_conversation_correction_steps: int = Field(default=0, ge=0)
42
45
 
@@ -353,9 +353,11 @@ PREDEFINED_PROVIDERS_MODEL_MAP = {
353
353
  NEMOTRON_PERSONAS_DATASET_SIZES = {
354
354
  "en_US": "1.24 GB",
355
355
  "en_IN": "2.39 GB",
356
+ "en_SG": "0.30 GB",
356
357
  "hi_Deva_IN": "4.14 GB",
357
358
  "hi_Latn_IN": "2.7 GB",
358
359
  "ja_JP": "1.69 GB",
360
+ "pt_BR": "2.33 GB",
359
361
  }
360
362
 
361
363
  LOCALES_WITH_MANAGED_DATASETS = list[str](NEMOTRON_PERSONAS_DATASET_SIZES.keys())
@@ -55,6 +55,9 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
55
55
  )
56
56
 
57
57
  def generate(self, data: dict) -> dict:
58
+ # Deserialize input data from previous columns so Jinja2 templates can access nested fields
59
+ # Example: If prev column stored '{"key": "value"}', templates can use {{ prev_column.key }}
60
+ # Note: This creates a new dict and doesn't mutate the original `data` argument
58
61
  deserialized_record = deserialize_json_values(data)
59
62
 
60
63
  multi_modal_context = None
@@ -81,13 +84,18 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
81
84
  purpose=f"running generation for column '{self.config.name}'",
82
85
  )
83
86
 
84
- data[self.config.name] = deserialize_json_values(self.response_recipe.serialize_output(response))
87
+ serialized_output = self.response_recipe.serialize_output(response)
88
+ data[self.config.name] = self._process_serialized_output(serialized_output)
85
89
 
86
90
  if reasoning_trace:
87
91
  data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace
88
92
 
89
93
  return data
90
94
 
95
+ def _process_serialized_output(self, serialized_output: str) -> str | dict | list:
96
+ """Process the serialized output from the model. Subclasses can override to customize deserialization."""
97
+ return serialized_output
98
+
91
99
 
92
100
  class LLMTextCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMTextColumnConfig]): ...
93
101
 
@@ -95,7 +103,11 @@ class LLMTextCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMTextColumnC
95
103
  class LLMCodeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMCodeColumnConfig]): ...
96
104
 
97
105
 
98
- class LLMStructuredCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMStructuredColumnConfig]): ...
106
+ class LLMStructuredCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMStructuredColumnConfig]):
107
+ def _process_serialized_output(self, serialized_output: str) -> dict | list:
108
+ return deserialize_json_values(serialized_output)
99
109
 
100
110
 
101
- class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColumnConfig]): ...
111
+ class LLMJudgeCellGenerator(ColumnGeneratorWithModelChatCompletion[LLMJudgeColumnConfig]):
112
+ def _process_serialized_output(self, serialized_output: str) -> dict | list:
113
+ return deserialize_json_values(serialized_output)
@@ -5,9 +5,10 @@ from __future__ import annotations
5
5
 
6
6
  import logging
7
7
 
8
- from data_designer.config.column_configs import SeedDatasetColumnConfig
8
+ from data_designer.config.column_configs import SamplerColumnConfig, SeedDatasetColumnConfig
9
9
  from data_designer.config.data_designer_config import DataDesignerConfig
10
10
  from data_designer.config.errors import InvalidConfigError
11
+ from data_designer.config.sampler_params import UUIDSamplerParams
11
12
  from data_designer.engine.resources.resource_provider import ResourceProvider
12
13
  from data_designer.engine.resources.seed_reader import SeedReader
13
14
  from data_designer.engine.validation import ViolationLevel, rich_print_violations, validate_data_designer_config
@@ -17,6 +18,7 @@ logger = logging.getLogger(__name__)
17
18
 
18
19
  def compile_data_designer_config(config: DataDesignerConfig, resource_provider: ResourceProvider) -> DataDesignerConfig:
19
20
  _resolve_and_add_seed_columns(config, resource_provider.seed_reader)
21
+ _add_internal_row_id_column_if_needed(config)
20
22
  _validate(config)
21
23
  return config
22
24
 
@@ -41,6 +43,35 @@ def _resolve_and_add_seed_columns(config: DataDesignerConfig, seed_reader: SeedR
41
43
  config.columns.extend([SeedDatasetColumnConfig(name=col_name) for col_name in seed_col_names])
42
44
 
43
45
 
46
+ def _add_internal_row_id_column_if_needed(config: DataDesignerConfig) -> None:
47
+ """Adds a UUID sampler column named '_internal_row_id' (set to drop) if needed to enable generation.
48
+
49
+ Generation requires either:
50
+ - At least one sampler column (which can generate data from scratch), OR
51
+ - A seed dataset (which provides initial data rows)
52
+
53
+ If neither exists, a UUID sampler column '_internal_row_id' is automatically added and marked for drop
54
+ to enable the generation process to start.
55
+
56
+ Args:
57
+ config: The DataDesigner configuration to potentially modify.
58
+ """
59
+ has_sampler_column = any(isinstance(col, SamplerColumnConfig) for col in config.columns)
60
+ has_seed_dataset_column = any(isinstance(col, SeedDatasetColumnConfig) for col in config.columns)
61
+
62
+ if not has_sampler_column and not has_seed_dataset_column:
63
+ logger.warning(
64
+ "🔔 No sampler column or seed dataset detected. Adding UUID column '_internal_row_id' (marked for drop) to enable generation."
65
+ )
66
+ id_column = SamplerColumnConfig(
67
+ name="_internal_row_id",
68
+ sampler_type="uuid",
69
+ params=UUIDSamplerParams(),
70
+ drop=True,
71
+ )
72
+ config.columns.insert(0, id_column)
73
+
74
+
44
75
  def _validate(config: DataDesignerConfig) -> None:
45
76
  allowed_references = _get_allowed_references(config)
46
77
  violations = validate_data_designer_config(