data-designer 0.3.8rc2__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (449) hide show
  1. {data_designer-0.3.8rc2 → data_designer-0.4.0}/.gitignore +6 -2
  2. {data_designer-0.3.8rc2 → data_designer-0.4.0}/PKG-INFO +10 -42
  3. {data_designer-0.3.8rc2 → data_designer-0.4.0}/README.md +7 -14
  4. data_designer-0.4.0/dev-tools/hatch_build.py +31 -0
  5. data_designer-0.4.0/pyproject.toml +58 -0
  6. data_designer-0.4.0/src/data_designer/cli/commands/__init__.py +2 -0
  7. data_designer-0.4.0/src/data_designer/interface/__init__.py +22 -0
  8. {data_designer-0.3.8rc2/src/data_designer → data_designer-0.4.0/src/data_designer/interface}/_version.py +2 -2
  9. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/interface/data_designer.py +1 -7
  10. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/controllers/test_download_controller.py +7 -3
  11. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/repositories/test_persona_repository.py +3 -3
  12. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/services/test_download_service.py +3 -1
  13. data_designer-0.4.0/tests/conftest.py +4 -0
  14. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/interface/test_data_designer.py +7 -4
  15. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/test_import_perf.py +2 -1
  16. data_designer-0.3.8rc2/.claude/agents/docs-searcher.md +0 -74
  17. data_designer-0.3.8rc2/.claude/agents/github-searcher.md +0 -81
  18. data_designer-0.3.8rc2/.claude/settings.json +0 -1
  19. data_designer-0.3.8rc2/.claude/settings.local.json +0 -22
  20. data_designer-0.3.8rc2/.claude/skills/new-sdg/SKILL.md +0 -117
  21. data_designer-0.3.8rc2/.claude/skills/search-docs/SKILL.md +0 -16
  22. data_designer-0.3.8rc2/.claude/skills/search-github/SKILL.md +0 -16
  23. data_designer-0.3.8rc2/.github/ISSUE_TEMPLATE/bug-report.yml +0 -51
  24. data_designer-0.3.8rc2/.github/ISSUE_TEMPLATE/config.yml +0 -5
  25. data_designer-0.3.8rc2/.github/ISSUE_TEMPLATE/development-task.yml +0 -32
  26. data_designer-0.3.8rc2/.github/ISSUE_TEMPLATE/feature-request.yml +0 -45
  27. data_designer-0.3.8rc2/.github/workflows/build-docs.yml +0 -68
  28. data_designer-0.3.8rc2/.github/workflows/build-notebooks.yml +0 -30
  29. data_designer-0.3.8rc2/.github/workflows/check-colab-notebooks.yml +0 -55
  30. data_designer-0.3.8rc2/.github/workflows/ci.yml +0 -113
  31. data_designer-0.3.8rc2/.github/workflows/dco-assistant.yml +0 -44
  32. data_designer-0.3.8rc2/.github/workflows/pack-tutorials.yml +0 -77
  33. data_designer-0.3.8rc2/.github/workflows/semantic-pull-requests.yml +0 -26
  34. data_designer-0.3.8rc2/.pre-commit-config.yaml +0 -25
  35. data_designer-0.3.8rc2/AGENTS.md +0 -558
  36. data_designer-0.3.8rc2/CLAUDE.md +0 -3
  37. data_designer-0.3.8rc2/CODE_OF_CONDUCT.md +0 -76
  38. data_designer-0.3.8rc2/CONTRIBUTING.md +0 -236
  39. data_designer-0.3.8rc2/DCO +0 -34
  40. data_designer-0.3.8rc2/LICENSE +0 -201
  41. data_designer-0.3.8rc2/Makefile +0 -212
  42. data_designer-0.3.8rc2/VERSIONING.md +0 -90
  43. data_designer-0.3.8rc2/docs/CONTRIBUTING.md +0 -1
  44. data_designer-0.3.8rc2/docs/assets/palette-favicon.png +0 -0
  45. data_designer-0.3.8rc2/docs/assets/recipes/code_generation/text_to_python.py +0 -318
  46. data_designer-0.3.8rc2/docs/assets/recipes/code_generation/text_to_sql.py +0 -323
  47. data_designer-0.3.8rc2/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -204
  48. data_designer-0.3.8rc2/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -224
  49. data_designer-0.3.8rc2/docs/code_reference/analysis.md +0 -31
  50. data_designer-0.3.8rc2/docs/code_reference/column_configs.md +0 -8
  51. data_designer-0.3.8rc2/docs/code_reference/config_builder.md +0 -10
  52. data_designer-0.3.8rc2/docs/code_reference/data_designer_config.md +0 -7
  53. data_designer-0.3.8rc2/docs/code_reference/models.md +0 -11
  54. data_designer-0.3.8rc2/docs/code_reference/processors.md +0 -6
  55. data_designer-0.3.8rc2/docs/code_reference/run_config.md +0 -6
  56. data_designer-0.3.8rc2/docs/code_reference/sampler_params.md +0 -12
  57. data_designer-0.3.8rc2/docs/code_reference/validator_params.md +0 -6
  58. data_designer-0.3.8rc2/docs/colab_notebooks/1-the-basics.ipynb +0 -537
  59. data_designer-0.3.8rc2/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -567
  60. data_designer-0.3.8rc2/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -466
  61. data_designer-0.3.8rc2/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -530
  62. data_designer-0.3.8rc2/docs/concepts/columns.md +0 -152
  63. data_designer-0.3.8rc2/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -136
  64. data_designer-0.3.8rc2/docs/concepts/models/custom-model-settings.md +0 -229
  65. data_designer-0.3.8rc2/docs/concepts/models/default-model-settings.md +0 -124
  66. data_designer-0.3.8rc2/docs/concepts/models/inference-parameters.md +0 -145
  67. data_designer-0.3.8rc2/docs/concepts/models/model-configs.md +0 -123
  68. data_designer-0.3.8rc2/docs/concepts/models/model-providers.md +0 -55
  69. data_designer-0.3.8rc2/docs/concepts/person_sampling.md +0 -217
  70. data_designer-0.3.8rc2/docs/concepts/processors.md +0 -153
  71. data_designer-0.3.8rc2/docs/concepts/validators.md +0 -340
  72. data_designer-0.3.8rc2/docs/css/mkdocstrings.css +0 -80
  73. data_designer-0.3.8rc2/docs/css/style.css +0 -179
  74. data_designer-0.3.8rc2/docs/images/top-models.png +0 -0
  75. data_designer-0.3.8rc2/docs/index.md +0 -48
  76. data_designer-0.3.8rc2/docs/installation.md +0 -29
  77. data_designer-0.3.8rc2/docs/js/toc-toggle.js +0 -25
  78. data_designer-0.3.8rc2/docs/notebook_source/1-the-basics.py +0 -341
  79. data_designer-0.3.8rc2/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -383
  80. data_designer-0.3.8rc2/docs/notebook_source/3-seeding-with-a-dataset.py +0 -292
  81. data_designer-0.3.8rc2/docs/notebook_source/4-providing-images-as-context.py +0 -311
  82. data_designer-0.3.8rc2/docs/notebook_source/README.md +0 -23
  83. data_designer-0.3.8rc2/docs/notebook_source/_README.md +0 -124
  84. data_designer-0.3.8rc2/docs/notebook_source/_pyproject.toml +0 -9
  85. data_designer-0.3.8rc2/docs/overrides/main.html +0 -31
  86. data_designer-0.3.8rc2/docs/plugins/available.md +0 -3
  87. data_designer-0.3.8rc2/docs/plugins/example.md +0 -280
  88. data_designer-0.3.8rc2/docs/plugins/overview.md +0 -45
  89. data_designer-0.3.8rc2/docs/quick-start.md +0 -88
  90. data_designer-0.3.8rc2/docs/recipes/cards.md +0 -84
  91. data_designer-0.3.8rc2/docs/recipes/code_generation/text_to_python.md +0 -5
  92. data_designer-0.3.8rc2/docs/recipes/code_generation/text_to_sql.md +0 -7
  93. data_designer-0.3.8rc2/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -5
  94. data_designer-0.3.8rc2/docs/recipes/qa_and_chat/product_info_qa.md +0 -5
  95. data_designer-0.3.8rc2/docs/scripts/generate_colab_notebooks.py +0 -186
  96. data_designer-0.3.8rc2/mkdocs.yml +0 -145
  97. data_designer-0.3.8rc2/packages/data-designer/src/data_designer/interface/_version.py +0 -34
  98. data_designer-0.3.8rc2/packages/data-designer-config/src/data_designer/config/_version.py +0 -34
  99. data_designer-0.3.8rc2/packages/data-designer-engine/src/data_designer/engine/_version.py +0 -34
  100. data_designer-0.3.8rc2/pyproject.toml +0 -140
  101. data_designer-0.3.8rc2/scripts/test_license_headers.py +0 -899
  102. data_designer-0.3.8rc2/scripts/update_license_headers.py +0 -373
  103. data_designer-0.3.8rc2/src/data_designer/__init__.py +0 -17
  104. data_designer-0.3.8rc2/src/data_designer/cli/commands/__init__.py +0 -2
  105. data_designer-0.3.8rc2/src/data_designer/config/__init__.py +0 -2
  106. data_designer-0.3.8rc2/src/data_designer/config/analysis/__init__.py +0 -2
  107. data_designer-0.3.8rc2/src/data_designer/config/analysis/column_profilers.py +0 -159
  108. data_designer-0.3.8rc2/src/data_designer/config/analysis/column_statistics.py +0 -421
  109. data_designer-0.3.8rc2/src/data_designer/config/analysis/dataset_profiler.py +0 -84
  110. data_designer-0.3.8rc2/src/data_designer/config/analysis/utils/errors.py +0 -10
  111. data_designer-0.3.8rc2/src/data_designer/config/analysis/utils/reporting.py +0 -192
  112. data_designer-0.3.8rc2/src/data_designer/config/base.py +0 -69
  113. data_designer-0.3.8rc2/src/data_designer/config/column_configs.py +0 -470
  114. data_designer-0.3.8rc2/src/data_designer/config/column_types.py +0 -141
  115. data_designer-0.3.8rc2/src/data_designer/config/config_builder.py +0 -595
  116. data_designer-0.3.8rc2/src/data_designer/config/data_designer_config.py +0 -40
  117. data_designer-0.3.8rc2/src/data_designer/config/dataset_builders.py +0 -13
  118. data_designer-0.3.8rc2/src/data_designer/config/dataset_metadata.py +0 -18
  119. data_designer-0.3.8rc2/src/data_designer/config/default_model_settings.py +0 -129
  120. data_designer-0.3.8rc2/src/data_designer/config/errors.py +0 -24
  121. data_designer-0.3.8rc2/src/data_designer/config/exports.py +0 -145
  122. data_designer-0.3.8rc2/src/data_designer/config/interface.py +0 -55
  123. data_designer-0.3.8rc2/src/data_designer/config/models.py +0 -455
  124. data_designer-0.3.8rc2/src/data_designer/config/preview_results.py +0 -41
  125. data_designer-0.3.8rc2/src/data_designer/config/processors.py +0 -148
  126. data_designer-0.3.8rc2/src/data_designer/config/run_config.py +0 -51
  127. data_designer-0.3.8rc2/src/data_designer/config/sampler_constraints.py +0 -52
  128. data_designer-0.3.8rc2/src/data_designer/config/sampler_params.py +0 -639
  129. data_designer-0.3.8rc2/src/data_designer/config/seed.py +0 -116
  130. data_designer-0.3.8rc2/src/data_designer/config/seed_source.py +0 -84
  131. data_designer-0.3.8rc2/src/data_designer/config/seed_source_types.py +0 -19
  132. data_designer-0.3.8rc2/src/data_designer/config/utils/code_lang.py +0 -82
  133. data_designer-0.3.8rc2/src/data_designer/config/utils/constants.py +0 -363
  134. data_designer-0.3.8rc2/src/data_designer/config/utils/errors.py +0 -21
  135. data_designer-0.3.8rc2/src/data_designer/config/utils/info.py +0 -94
  136. data_designer-0.3.8rc2/src/data_designer/config/utils/io_helpers.py +0 -258
  137. data_designer-0.3.8rc2/src/data_designer/config/utils/misc.py +0 -78
  138. data_designer-0.3.8rc2/src/data_designer/config/utils/numerical_helpers.py +0 -30
  139. data_designer-0.3.8rc2/src/data_designer/config/utils/type_helpers.py +0 -106
  140. data_designer-0.3.8rc2/src/data_designer/config/utils/visualization.py +0 -482
  141. data_designer-0.3.8rc2/src/data_designer/config/validator_params.py +0 -94
  142. data_designer-0.3.8rc2/src/data_designer/engine/__init__.py +0 -2
  143. data_designer-0.3.8rc2/src/data_designer/engine/analysis/column_profilers/base.py +0 -49
  144. data_designer-0.3.8rc2/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
  145. data_designer-0.3.8rc2/src/data_designer/engine/analysis/column_profilers/registry.py +0 -22
  146. data_designer-0.3.8rc2/src/data_designer/engine/analysis/column_statistics.py +0 -145
  147. data_designer-0.3.8rc2/src/data_designer/engine/analysis/dataset_profiler.py +0 -149
  148. data_designer-0.3.8rc2/src/data_designer/engine/analysis/errors.py +0 -9
  149. data_designer-0.3.8rc2/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
  150. data_designer-0.3.8rc2/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
  151. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/__init__.py +0 -2
  152. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/__init__.py +0 -2
  153. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/base.py +0 -122
  154. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/embedding.py +0 -35
  155. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/expression.py +0 -55
  156. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/llm_completion.py +0 -113
  157. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/samplers.py +0 -69
  158. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
  159. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/generators/validation.py +0 -140
  160. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/registry.py +0 -60
  161. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/utils/errors.py +0 -15
  162. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -43
  163. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
  164. data_designer-0.3.8rc2/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
  165. data_designer-0.3.8rc2/src/data_designer/engine/compiler.py +0 -97
  166. data_designer-0.3.8rc2/src/data_designer/engine/configurable_task.py +0 -71
  167. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -283
  168. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -335
  169. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/errors.py +0 -15
  170. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
  171. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -2
  172. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -212
  173. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
  174. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/dag.py +0 -62
  175. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
  176. data_designer-0.3.8rc2/src/data_designer/engine/dataset_builders/utils/errors.py +0 -15
  177. data_designer-0.3.8rc2/src/data_designer/engine/errors.py +0 -51
  178. data_designer-0.3.8rc2/src/data_designer/engine/model_provider.py +0 -77
  179. data_designer-0.3.8rc2/src/data_designer/engine/models/__init__.py +0 -2
  180. data_designer-0.3.8rc2/src/data_designer/engine/models/errors.py +0 -300
  181. data_designer-0.3.8rc2/src/data_designer/engine/models/facade.py +0 -287
  182. data_designer-0.3.8rc2/src/data_designer/engine/models/factory.py +0 -42
  183. data_designer-0.3.8rc2/src/data_designer/engine/models/litellm_overrides.py +0 -179
  184. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/__init__.py +0 -2
  185. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/errors.py +0 -34
  186. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/parser.py +0 -235
  187. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/postprocessors.py +0 -93
  188. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/tag_parsers.py +0 -62
  189. data_designer-0.3.8rc2/src/data_designer/engine/models/parsers/types.py +0 -84
  190. data_designer-0.3.8rc2/src/data_designer/engine/models/recipes/base.py +0 -81
  191. data_designer-0.3.8rc2/src/data_designer/engine/models/recipes/response_recipes.py +0 -293
  192. data_designer-0.3.8rc2/src/data_designer/engine/models/registry.py +0 -146
  193. data_designer-0.3.8rc2/src/data_designer/engine/models/telemetry.py +0 -359
  194. data_designer-0.3.8rc2/src/data_designer/engine/models/usage.py +0 -73
  195. data_designer-0.3.8rc2/src/data_designer/engine/models/utils.py +0 -38
  196. data_designer-0.3.8rc2/src/data_designer/engine/processing/ginja/__init__.py +0 -2
  197. data_designer-0.3.8rc2/src/data_designer/engine/processing/ginja/ast.py +0 -65
  198. data_designer-0.3.8rc2/src/data_designer/engine/processing/ginja/environment.py +0 -463
  199. data_designer-0.3.8rc2/src/data_designer/engine/processing/ginja/exceptions.py +0 -56
  200. data_designer-0.3.8rc2/src/data_designer/engine/processing/ginja/record.py +0 -32
  201. data_designer-0.3.8rc2/src/data_designer/engine/processing/gsonschema/__init__.py +0 -2
  202. data_designer-0.3.8rc2/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -15
  203. data_designer-0.3.8rc2/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
  204. data_designer-0.3.8rc2/src/data_designer/engine/processing/gsonschema/types.py +0 -10
  205. data_designer-0.3.8rc2/src/data_designer/engine/processing/gsonschema/validators.py +0 -202
  206. data_designer-0.3.8rc2/src/data_designer/engine/processing/processors/base.py +0 -13
  207. data_designer-0.3.8rc2/src/data_designer/engine/processing/processors/drop_columns.py +0 -42
  208. data_designer-0.3.8rc2/src/data_designer/engine/processing/processors/registry.py +0 -25
  209. data_designer-0.3.8rc2/src/data_designer/engine/processing/processors/schema_transform.py +0 -49
  210. data_designer-0.3.8rc2/src/data_designer/engine/processing/utils.py +0 -169
  211. data_designer-0.3.8rc2/src/data_designer/engine/registry/base.py +0 -99
  212. data_designer-0.3.8rc2/src/data_designer/engine/registry/data_designer_registry.py +0 -39
  213. data_designer-0.3.8rc2/src/data_designer/engine/registry/errors.py +0 -12
  214. data_designer-0.3.8rc2/src/data_designer/engine/resources/managed_dataset_generator.py +0 -39
  215. data_designer-0.3.8rc2/src/data_designer/engine/resources/managed_dataset_repository.py +0 -197
  216. data_designer-0.3.8rc2/src/data_designer/engine/resources/managed_storage.py +0 -65
  217. data_designer-0.3.8rc2/src/data_designer/engine/resources/resource_provider.py +0 -77
  218. data_designer-0.3.8rc2/src/data_designer/engine/resources/seed_reader.py +0 -154
  219. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/column.py +0 -91
  220. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/constraints.py +0 -100
  221. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -217
  222. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
  223. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
  224. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -2
  225. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  226. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
  227. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
  228. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/errors.py +0 -10
  229. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
  230. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/person.py +0 -144
  231. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
  232. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/errors.py +0 -26
  233. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/generator.py +0 -122
  234. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -64
  235. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/people_gen.py +0 -199
  236. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/person_constants.py +0 -56
  237. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/schema.py +0 -147
  238. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/schema_builder.py +0 -61
  239. data_designer-0.3.8rc2/src/data_designer/engine/sampling_gen/utils.py +0 -46
  240. data_designer-0.3.8rc2/src/data_designer/engine/secret_resolver.py +0 -82
  241. data_designer-0.3.8rc2/src/data_designer/engine/validation.py +0 -367
  242. data_designer-0.3.8rc2/src/data_designer/engine/validators/__init__.py +0 -19
  243. data_designer-0.3.8rc2/src/data_designer/engine/validators/base.py +0 -38
  244. data_designer-0.3.8rc2/src/data_designer/engine/validators/local_callable.py +0 -39
  245. data_designer-0.3.8rc2/src/data_designer/engine/validators/python.py +0 -254
  246. data_designer-0.3.8rc2/src/data_designer/engine/validators/remote.py +0 -89
  247. data_designer-0.3.8rc2/src/data_designer/engine/validators/sql.py +0 -65
  248. data_designer-0.3.8rc2/src/data_designer/errors.py +0 -7
  249. data_designer-0.3.8rc2/src/data_designer/essentials/__init__.py +0 -33
  250. data_designer-0.3.8rc2/src/data_designer/interface/__init__.py +0 -2
  251. data_designer-0.3.8rc2/src/data_designer/lazy_heavy_imports.py +0 -54
  252. data_designer-0.3.8rc2/src/data_designer/logging.py +0 -163
  253. data_designer-0.3.8rc2/src/data_designer/plugin_manager.py +0 -78
  254. data_designer-0.3.8rc2/src/data_designer/plugins/__init__.py +0 -8
  255. data_designer-0.3.8rc2/src/data_designer/plugins/errors.py +0 -15
  256. data_designer-0.3.8rc2/src/data_designer/plugins/plugin.py +0 -141
  257. data_designer-0.3.8rc2/src/data_designer/plugins/registry.py +0 -88
  258. data_designer-0.3.8rc2/src/data_designer/plugins/testing/__init__.py +0 -10
  259. data_designer-0.3.8rc2/src/data_designer/plugins/testing/stubs.py +0 -116
  260. data_designer-0.3.8rc2/src/data_designer/plugins/testing/utils.py +0 -20
  261. data_designer-0.3.8rc2/tests/config/analysis/conftest.py +0 -82
  262. data_designer-0.3.8rc2/tests/config/analysis/test_column_statistics.py +0 -299
  263. data_designer-0.3.8rc2/tests/config/analysis/test_dataset_profiler_results.py +0 -160
  264. data_designer-0.3.8rc2/tests/config/analysis/utils/test_reporting.py +0 -292
  265. data_designer-0.3.8rc2/tests/config/test_columns.py +0 -468
  266. data_designer-0.3.8rc2/tests/config/test_config_builder.py +0 -742
  267. data_designer-0.3.8rc2/tests/config/test_data_designer_config.py +0 -29
  268. data_designer-0.3.8rc2/tests/config/test_default_model_settings.py +0 -266
  269. data_designer-0.3.8rc2/tests/config/test_models.py +0 -387
  270. data_designer-0.3.8rc2/tests/config/test_processors.py +0 -139
  271. data_designer-0.3.8rc2/tests/config/test_sampler_constraints.py +0 -25
  272. data_designer-0.3.8rc2/tests/config/test_sampler_params.py +0 -141
  273. data_designer-0.3.8rc2/tests/config/test_seed.py +0 -56
  274. data_designer-0.3.8rc2/tests/config/test_seed_source.py +0 -78
  275. data_designer-0.3.8rc2/tests/config/test_validator_params.py +0 -59
  276. data_designer-0.3.8rc2/tests/config/utils/__init__.py +0 -2
  277. data_designer-0.3.8rc2/tests/config/utils/test_code_lang.py +0 -37
  278. data_designer-0.3.8rc2/tests/config/utils/test_info.py +0 -59
  279. data_designer-0.3.8rc2/tests/config/utils/test_io_helpers.py +0 -178
  280. data_designer-0.3.8rc2/tests/config/utils/test_misc.py +0 -75
  281. data_designer-0.3.8rc2/tests/config/utils/test_type_helpers.py +0 -162
  282. data_designer-0.3.8rc2/tests/config/utils/test_visualization.py +0 -94
  283. data_designer-0.3.8rc2/tests/conftest.py +0 -324
  284. data_designer-0.3.8rc2/tests/engine/analysis/column_profilers/test_base.py +0 -54
  285. data_designer-0.3.8rc2/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -292
  286. data_designer-0.3.8rc2/tests/engine/analysis/conftest.py +0 -159
  287. data_designer-0.3.8rc2/tests/engine/analysis/test_column_statistics_calculator.py +0 -79
  288. data_designer-0.3.8rc2/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -145
  289. data_designer-0.3.8rc2/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -2929
  290. data_designer-0.3.8rc2/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -27
  291. data_designer-0.3.8rc2/tests/engine/analysis/test_dataset_profiler.py +0 -130
  292. data_designer-0.3.8rc2/tests/engine/analysis/test_errors.py +0 -59
  293. data_designer-0.3.8rc2/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -357
  294. data_designer-0.3.8rc2/tests/engine/analysis/utils/test_judge_score_processing.py +0 -171
  295. data_designer-0.3.8rc2/tests/engine/column_generators/generators/__init__.py +0 -2
  296. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_column_generator_base.py +0 -85
  297. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_embedding.py +0 -47
  298. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_expression.py +0 -166
  299. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_llm_completion_generators.py +0 -350
  300. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_samplers.py +0 -131
  301. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_seed_dataset.py +0 -796
  302. data_designer-0.3.8rc2/tests/engine/column_generators/generators/test_validation.py +0 -248
  303. data_designer-0.3.8rc2/tests/engine/column_generators/test_registry.py +0 -39
  304. data_designer-0.3.8rc2/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -15
  305. data_designer-0.3.8rc2/tests/engine/column_generators/utils/test_generator_classification.py +0 -32
  306. data_designer-0.3.8rc2/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -97
  307. data_designer-0.3.8rc2/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -127
  308. data_designer-0.3.8rc2/tests/engine/conftest.py +0 -61
  309. data_designer-0.3.8rc2/tests/engine/dataset_builders/test_artifact_storage.py +0 -362
  310. data_designer-0.3.8rc2/tests/engine/dataset_builders/test_column_wise_builder.py +0 -388
  311. data_designer-0.3.8rc2/tests/engine/dataset_builders/test_multi_column_configs.py +0 -158
  312. data_designer-0.3.8rc2/tests/engine/dataset_builders/utils/test_concurrency.py +0 -577
  313. data_designer-0.3.8rc2/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -92
  314. data_designer-0.3.8rc2/tests/engine/dataset_builders/utils/test_dag.py +0 -113
  315. data_designer-0.3.8rc2/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -421
  316. data_designer-0.3.8rc2/tests/engine/models/conftest.py +0 -75
  317. data_designer-0.3.8rc2/tests/engine/models/parsers/test_parser.py +0 -175
  318. data_designer-0.3.8rc2/tests/engine/models/parsers/test_parsers_types.py +0 -96
  319. data_designer-0.3.8rc2/tests/engine/models/parsers/test_postprocessors.py +0 -122
  320. data_designer-0.3.8rc2/tests/engine/models/parsers/test_tag_parsers.py +0 -118
  321. data_designer-0.3.8rc2/tests/engine/models/recipes/test_recipe_base.py +0 -130
  322. data_designer-0.3.8rc2/tests/engine/models/recipes/test_response_recipes.py +0 -257
  323. data_designer-0.3.8rc2/tests/engine/models/stub_secrets.json +0 -3
  324. data_designer-0.3.8rc2/tests/engine/models/test_facade.py +0 -233
  325. data_designer-0.3.8rc2/tests/engine/models/test_litellm_overrides.py +0 -140
  326. data_designer-0.3.8rc2/tests/engine/models/test_model_errors.py +0 -231
  327. data_designer-0.3.8rc2/tests/engine/models/test_model_registry.py +0 -329
  328. data_designer-0.3.8rc2/tests/engine/models/test_model_utils.py +0 -36
  329. data_designer-0.3.8rc2/tests/engine/models/test_usage.py +0 -65
  330. data_designer-0.3.8rc2/tests/engine/processing/__init__.py +0 -2
  331. data_designer-0.3.8rc2/tests/engine/processing/ginja/__init__.py +0 -2
  332. data_designer-0.3.8rc2/tests/engine/processing/ginja/test_ast.py +0 -124
  333. data_designer-0.3.8rc2/tests/engine/processing/ginja/test_environment.py +0 -213
  334. data_designer-0.3.8rc2/tests/engine/processing/ginja/test_exceptions.py +0 -21
  335. data_designer-0.3.8rc2/tests/engine/processing/ginja/test_record.py +0 -25
  336. data_designer-0.3.8rc2/tests/engine/processing/gsonschema/__init__.py +0 -2
  337. data_designer-0.3.8rc2/tests/engine/processing/gsonschema/test_exceptions.py +0 -42
  338. data_designer-0.3.8rc2/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -368
  339. data_designer-0.3.8rc2/tests/engine/processing/gsonschema/test_types.py +0 -109
  340. data_designer-0.3.8rc2/tests/engine/processing/gsonschema/test_validators.py +0 -229
  341. data_designer-0.3.8rc2/tests/engine/processing/processors/__init__.py +0 -2
  342. data_designer-0.3.8rc2/tests/engine/processing/processors/test_drop_columns.py +0 -162
  343. data_designer-0.3.8rc2/tests/engine/processing/processors/test_registry.py +0 -18
  344. data_designer-0.3.8rc2/tests/engine/processing/processors/test_schema_transform.py +0 -135
  345. data_designer-0.3.8rc2/tests/engine/processing/test_utils.py +0 -141
  346. data_designer-0.3.8rc2/tests/engine/registry/__init__.py +0 -2
  347. data_designer-0.3.8rc2/tests/engine/registry/conftest.py +0 -37
  348. data_designer-0.3.8rc2/tests/engine/registry/test_base.py +0 -227
  349. data_designer-0.3.8rc2/tests/engine/registry/test_data_designer_registry.py +0 -215
  350. data_designer-0.3.8rc2/tests/engine/registry/test_errors.py +0 -63
  351. data_designer-0.3.8rc2/tests/engine/resources/__init__.py +0 -2
  352. data_designer-0.3.8rc2/tests/engine/resources/conftest.py +0 -62
  353. data_designer-0.3.8rc2/tests/engine/resources/test_managed_dataset_generator.py +0 -125
  354. data_designer-0.3.8rc2/tests/engine/resources/test_managed_dataset_repository.py +0 -221
  355. data_designer-0.3.8rc2/tests/engine/resources/test_managed_storage.py +0 -95
  356. data_designer-0.3.8rc2/tests/engine/resources/test_resource_provider.py +0 -40
  357. data_designer-0.3.8rc2/tests/engine/resources/test_seed_reader.py +0 -58
  358. data_designer-0.3.8rc2/tests/engine/sampling_gen/conftest.py +0 -306
  359. data_designer-0.3.8rc2/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -17
  360. data_designer-0.3.8rc2/tests/engine/sampling_gen/data_sources/test_sources.py +0 -369
  361. data_designer-0.3.8rc2/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -105
  362. data_designer-0.3.8rc2/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -61
  363. data_designer-0.3.8rc2/tests/engine/sampling_gen/entities/test_person.py +0 -286
  364. data_designer-0.3.8rc2/tests/engine/sampling_gen/entities/test_phone_number.py +0 -94
  365. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_column.py +0 -101
  366. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_constraints.py +0 -107
  367. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_generator.py +0 -536
  368. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_jinja_utils.py +0 -119
  369. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_people_gen.py +0 -56
  370. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_schema.py +0 -255
  371. data_designer-0.3.8rc2/tests/engine/sampling_gen/test_utils.py +0 -43
  372. data_designer-0.3.8rc2/tests/engine/test_compiler.py +0 -146
  373. data_designer-0.3.8rc2/tests/engine/test_configurable_task.py +0 -130
  374. data_designer-0.3.8rc2/tests/engine/test_dataset_metadata.py +0 -56
  375. data_designer-0.3.8rc2/tests/engine/test_engine_errors.py +0 -61
  376. data_designer-0.3.8rc2/tests/engine/test_model_provider.py +0 -61
  377. data_designer-0.3.8rc2/tests/engine/test_secret_resolver.py +0 -89
  378. data_designer-0.3.8rc2/tests/engine/test_validation.py +0 -300
  379. data_designer-0.3.8rc2/tests/engine/validators/test_local_callable.py +0 -40
  380. data_designer-0.3.8rc2/tests/engine/validators/test_python.py +0 -123
  381. data_designer-0.3.8rc2/tests/engine/validators/test_remote.py +0 -64
  382. data_designer-0.3.8rc2/tests/engine/validators/test_sql.py +0 -22
  383. data_designer-0.3.8rc2/tests/essentials/test_init.py +0 -326
  384. data_designer-0.3.8rc2/tests/plugins/test_plugin.py +0 -191
  385. data_designer-0.3.8rc2/tests/plugins/test_plugin_registry.py +0 -253
  386. data_designer-0.3.8rc2/tests/test_logging.py +0 -210
  387. data_designer-0.3.8rc2/tests/test_plugin_manager.py +0 -124
  388. data_designer-0.3.8rc2/tests_e2e/pyproject.toml +0 -38
  389. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/__init__.py +0 -2
  390. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/__init__.py +0 -2
  391. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/config.py +0 -24
  392. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/impl.py +0 -20
  393. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -10
  394. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/__init__.py +0 -2
  395. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -13
  396. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -21
  397. data_designer-0.3.8rc2/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -10
  398. data_designer-0.3.8rc2/tests_e2e/tests/test_e2e.py +0 -73
  399. data_designer-0.3.8rc2/tests_e2e/tests/test_seed.csv +0 -4
  400. data_designer-0.3.8rc2/uv.lock +0 -5248
  401. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/README.md +0 -0
  402. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/__init__.py +0 -0
  403. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/commands/download.py +0 -0
  404. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/commands/list.py +0 -0
  405. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/commands/models.py +0 -0
  406. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/commands/providers.py +0 -0
  407. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/commands/reset.py +0 -0
  408. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/controllers/__init__.py +0 -0
  409. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/controllers/download_controller.py +0 -0
  410. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/controllers/model_controller.py +0 -0
  411. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/controllers/provider_controller.py +0 -0
  412. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/__init__.py +0 -0
  413. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/builder.py +0 -0
  414. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/field.py +0 -0
  415. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/form.py +0 -0
  416. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/model_builder.py +0 -0
  417. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/forms/provider_builder.py +0 -0
  418. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/main.py +0 -0
  419. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/repositories/__init__.py +0 -0
  420. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/repositories/base.py +0 -0
  421. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/repositories/model_repository.py +0 -0
  422. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/repositories/persona_repository.py +0 -0
  423. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/repositories/provider_repository.py +0 -0
  424. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/services/__init__.py +0 -0
  425. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/services/download_service.py +0 -0
  426. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/services/model_service.py +0 -0
  427. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/services/provider_service.py +0 -0
  428. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/ui.py +0 -0
  429. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/cli/utils.py +0 -0
  430. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/interface/errors.py +0 -0
  431. {data_designer-0.3.8rc2 → data_designer-0.4.0}/src/data_designer/interface/results.py +0 -0
  432. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/commands/test_download_command.py +0 -0
  433. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/commands/test_list_command.py +0 -0
  434. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/commands/test_models_command.py +0 -0
  435. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/commands/test_providers_command.py +0 -0
  436. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/commands/test_reset_command.py +0 -0
  437. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/conftest.py +0 -0
  438. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/controllers/test_model_controller.py +0 -0
  439. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/controllers/test_provider_controller.py +0 -0
  440. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/forms/test_field.py +0 -0
  441. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/forms/test_form.py +0 -0
  442. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/forms/test_model_builder.py +0 -0
  443. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/forms/test_provider_builder.py +0 -0
  444. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/repositories/test_model_repository.py +0 -0
  445. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/repositories/test_provider_repository.py +0 -0
  446. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/services/test_model_service.py +0 -0
  447. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/services/test_provider_service.py +0 -0
  448. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/cli/test_cli_utils.py +0 -0
  449. {data_designer-0.3.8rc2 → data_designer-0.4.0}/tests/interface/test_results.py +0 -0
@@ -79,8 +79,8 @@ venv.bak/
79
79
  *.tar.gz
80
80
  *.zip
81
81
 
82
- # Auto-generated version file
83
- src/data_designer/_version.py
82
+ # Auto-generated version files
83
+ **/_version.py
84
84
 
85
85
  # Local scratch space
86
86
  .scratch/
@@ -94,3 +94,7 @@ tests_e2e/uv.lock
94
94
 
95
95
  # Performance profiling
96
96
  perf_*.txt
97
+ NOTEPAD.md
98
+
99
+ # Build-time copy of README for data-designer package (copied from top-level during build)
100
+ packages/data-designer/README.md
@@ -1,9 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.3.8rc2
3
+ Version: 0.4.0
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
- License-File: LICENSE
7
6
  Classifier: Development Status :: 4 - Beta
8
7
  Classifier: Intended Audience :: Developers
9
8
  Classifier: Intended Audience :: Science/Research
@@ -15,33 +14,9 @@ Classifier: Programming Language :: Python :: 3.13
15
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
15
  Classifier: Topic :: Software Development
17
16
  Requires-Python: >=3.10
18
- Requires-Dist: anyascii<1,>=0.3.3
19
- Requires-Dist: duckdb<2,>=1.1.3
20
- Requires-Dist: faker<21,>=20.1.0
21
- Requires-Dist: httpx-retries<1,>=0.4.2
22
- Requires-Dist: httpx<1,>=0.27.2
23
- Requires-Dist: huggingface-hub<2,>=1.0.1
24
- Requires-Dist: jinja2<4,>=3.1.6
25
- Requires-Dist: json-repair<1,>=0.48.0
26
- Requires-Dist: jsonpath-rust-bindings<2,>=1.0
27
- Requires-Dist: litellm<1.80.12,>=1.73.6
28
- Requires-Dist: lxml<7,>=6.0.2
29
- Requires-Dist: marko<3,>=2.1.2
30
- Requires-Dist: networkx<4,>=3.0
31
- Requires-Dist: numpy<3,>=1.23.5
32
- Requires-Dist: pandas<3,>=2.3.3
17
+ Requires-Dist: data-designer-config
18
+ Requires-Dist: data-designer-engine
33
19
  Requires-Dist: prompt-toolkit<4,>=3.0.0
34
- Requires-Dist: pyarrow<20,>=19.0.1
35
- Requires-Dist: pydantic[email]<3,>=2.9.2
36
- Requires-Dist: pygments<3,>=2.19.2
37
- Requires-Dist: python-json-logger<4,>=3
38
- Requires-Dist: pyyaml<7,>=6.0.1
39
- Requires-Dist: requests<3,>=2.32.2
40
- Requires-Dist: rich<15,>=13.7.1
41
- Requires-Dist: ruff<1,>=0.14.10
42
- Requires-Dist: scipy<2,>=1.11.0
43
- Requires-Dist: sqlfluff<4,>=3.2.0
44
- Requires-Dist: tiktoken<1,>=0.8.0
45
20
  Requires-Dist: typer<1,>=0.12.0
46
21
  Description-Content-Type: text/markdown
47
22
 
@@ -104,26 +79,19 @@ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
104
79
 
105
80
  ### 3. Start generating data!
106
81
  ```python
107
- from data_designer.essentials import (
108
- CategorySamplerParams,
109
- DataDesigner,
110
- DataDesignerConfigBuilder,
111
- LLMTextColumnConfig,
112
- PersonSamplerParams,
113
- SamplerColumnConfig,
114
- SamplerType,
115
- )
82
+ import data_designer.config as dd
83
+ from data_designer.interface import DataDesigner
116
84
 
117
85
  # Initialize with default settings
118
86
  data_designer = DataDesigner()
119
- config_builder = DataDesignerConfigBuilder()
87
+ config_builder = dd.DataDesignerConfigBuilder()
120
88
 
121
89
  # Add a product category
122
90
  config_builder.add_column(
123
- SamplerColumnConfig(
91
+ dd.SamplerColumnConfig(
124
92
  name="product_category",
125
- sampler_type=SamplerType.CATEGORY,
126
- params=CategorySamplerParams(
93
+ sampler_type=dd.SamplerType.CATEGORY,
94
+ params=dd.CategorySamplerParams(
127
95
  values=["Electronics", "Clothing", "Home & Kitchen", "Books"],
128
96
  ),
129
97
  )
@@ -131,7 +99,7 @@ config_builder.add_column(
131
99
 
132
100
  # Generate personalized customer reviews
133
101
  config_builder.add_column(
134
- LLMTextColumnConfig(
102
+ dd.LLMTextColumnConfig(
135
103
  name="review",
136
104
  model_alias="nvidia-text",
137
105
  prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
@@ -57,26 +57,19 @@ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
57
57
 
58
58
  ### 3. Start generating data!
59
59
  ```python
60
- from data_designer.essentials import (
61
- CategorySamplerParams,
62
- DataDesigner,
63
- DataDesignerConfigBuilder,
64
- LLMTextColumnConfig,
65
- PersonSamplerParams,
66
- SamplerColumnConfig,
67
- SamplerType,
68
- )
60
+ import data_designer.config as dd
61
+ from data_designer.interface import DataDesigner
69
62
 
70
63
  # Initialize with default settings
71
64
  data_designer = DataDesigner()
72
- config_builder = DataDesignerConfigBuilder()
65
+ config_builder = dd.DataDesignerConfigBuilder()
73
66
 
74
67
  # Add a product category
75
68
  config_builder.add_column(
76
- SamplerColumnConfig(
69
+ dd.SamplerColumnConfig(
77
70
  name="product_category",
78
- sampler_type=SamplerType.CATEGORY,
79
- params=CategorySamplerParams(
71
+ sampler_type=dd.SamplerType.CATEGORY,
72
+ params=dd.CategorySamplerParams(
80
73
  values=["Electronics", "Clothing", "Home & Kitchen", "Books"],
81
74
  ),
82
75
  )
@@ -84,7 +77,7 @@ config_builder.add_column(
84
77
 
85
78
  # Generate personalized customer reviews
86
79
  config_builder.add_column(
87
- LLMTextColumnConfig(
80
+ dd.LLMTextColumnConfig(
88
81
  name="review",
89
82
  model_alias="nvidia-text",
90
83
  prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
@@ -0,0 +1,31 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Custom hatch metadata hook to sync README from root.
5
+
6
+ This hook runs during metadata resolution (before build hooks) to ensure
7
+ the README.md from the repository root is copied before hatchling validates
8
+ that the readme file exists.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import shutil
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from hatchling.metadata.plugin.interface import MetadataHookInterface
18
+
19
+
20
+ class ReadmeSyncHook(MetadataHookInterface):
21
+ """Metadata hook that copies README.md from repository root before building."""
22
+
23
+ PLUGIN_NAME = "readme-sync"
24
+
25
+ def update(self, metadata: dict[str, Any]) -> None:
26
+ """Copy README.md from repository root to package directory."""
27
+ root_readme = Path(self.root) / ".." / ".." / "README.md"
28
+ package_readme = Path(self.root) / "README.md"
29
+
30
+ if root_readme.exists():
31
+ shutil.copy2(root_readme, package_readme)
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "data-designer"
3
+ dynamic = ["version"]
4
+ description = "General framework for synthetic data generation"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "Apache-2.0"
8
+
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Intended Audience :: Developers",
12
+ "Intended Audience :: Science/Research",
13
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
14
+ "Topic :: Software Development",
15
+ "License :: OSI Approved :: Apache Software License",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ ]
21
+
22
+ dependencies = [
23
+ "data-designer-config",
24
+ "data-designer-engine",
25
+ "prompt-toolkit>=3.0.0,<4",
26
+ "typer>=0.12.0,<1",
27
+ ]
28
+
29
+ [project.scripts]
30
+ data-designer = "data_designer.cli:main"
31
+
32
+ [build-system]
33
+ requires = ["hatchling", "hatch-vcs"]
34
+ build-backend = "hatchling.build"
35
+
36
+ [tool.hatch.version]
37
+ source = "vcs"
38
+ fallback-version = "0.1.0.dev0"
39
+ raw-options = { root = "../.." }
40
+
41
+ [tool.hatch.build.hooks.vcs]
42
+ version-file = "src/data_designer/interface/_version.py"
43
+
44
+ [tool.hatch.metadata.hooks.custom]
45
+ path = "dev-tools/hatch_build.py"
46
+
47
+ [tool.hatch.build.targets.wheel]
48
+ packages = ["src/data_designer"]
49
+
50
+ [tool.ruff]
51
+ extend = "../../pyproject.toml"
52
+
53
+ [tool.uv]
54
+ package = true
55
+
56
+ [tool.uv.sources]
57
+ data-designer-config = { workspace = true }
58
+ data-designer-engine = { workspace = true }
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,22 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from data_designer.config.default_model_settings import resolve_seed_default_model_settings
5
+ from data_designer.interface.data_designer import DataDesigner
6
+ from data_designer.interface.errors import (
7
+ DataDesignerGenerationError,
8
+ DataDesignerProfilingError,
9
+ )
10
+ from data_designer.interface.results import DatasetCreationResults
11
+ from data_designer.logging import configure_logging
12
+
13
+ configure_logging()
14
+ resolve_seed_default_model_settings()
15
+
16
+
17
+ __all__ = [
18
+ "DataDesigner",
19
+ "DataDesignerGenerationError",
20
+ "DataDesignerProfilingError",
21
+ "DatasetCreationResults",
22
+ ]
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.8rc2'
32
- __version_tuple__ = version_tuple = (0, 3, 8, 'rc2')
31
+ __version__ = version = '0.4.0'
32
+ __version_tuple__ = version_tuple = (0, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -317,13 +317,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
317
317
  Args:
318
318
  run_config: A RunConfig instance containing runtime settings such as
319
319
  early shutdown behavior, batch sizing via `buffer_size`, and non-inference worker
320
- concurrency via `non_inference_max_parallel_workers`. Import RunConfig from
321
- data_designer.essentials.
322
-
323
- Example:
324
- >>> from data_designer.essentials import DataDesigner, RunConfig
325
- >>> dd = DataDesigner()
326
- >>> dd.set_run_config(RunConfig(disable_early_shutdown=True))
320
+ concurrency via `non_inference_max_parallel_workers`.
327
321
 
328
322
  Notes:
329
323
  When `disable_early_shutdown=True`, DataDesigner will never terminate generation early
@@ -85,16 +85,18 @@ def test_run_personas_with_all_flag(
85
85
  # Verify NGC check was called
86
86
  mock_check_ngc.assert_called_once()
87
87
 
88
- # Verify all 5 locales were downloaded
89
- assert mock_download.call_count == 5
88
+ # Verify all 7 locales were downloaded
89
+ assert mock_download.call_count == 7
90
90
 
91
91
  # Verify each locale was downloaded
92
92
  downloaded_locales = [call[0][0] for call in mock_download.call_args_list]
93
93
  assert "en_US" in downloaded_locales
94
94
  assert "en_IN" in downloaded_locales
95
+ assert "en_SG" in downloaded_locales
95
96
  assert "hi_Deva_IN" in downloaded_locales
96
97
  assert "hi_Latn_IN" in downloaded_locales
97
98
  assert "ja_JP" in downloaded_locales
99
+ assert "pt_BR" in downloaded_locales
98
100
 
99
101
 
100
102
  @patch.object(DownloadController, "_download_locale", return_value=True)
@@ -217,12 +219,14 @@ def test_determine_locales_with_all_flag(controller: DownloadController) -> None
217
219
  """Test _determine_locales returns all locales when all_locales=True."""
218
220
  result = controller._determine_locales(locales=None, all_locales=True)
219
221
 
220
- assert len(result) == 5
222
+ assert len(result) == 7
221
223
  assert "en_US" in result
222
224
  assert "en_IN" in result
225
+ assert "en_SG" in result
223
226
  assert "hi_Deva_IN" in result
224
227
  assert "hi_Latn_IN" in result
225
228
  assert "ja_JP" in result
229
+ assert "pt_BR" in result
226
230
 
227
231
 
228
232
  def test_determine_locales_with_valid_locale_flags(controller: DownloadController) -> None:
@@ -15,7 +15,7 @@ def repository() -> PersonaRepository:
15
15
  def test_init(repository: PersonaRepository) -> None:
16
16
  """Test repository initialization creates registry."""
17
17
  assert repository._registry is not None
18
- assert len(repository._registry.locales) == 5
18
+ assert len(repository._registry.locales) == 7
19
19
  assert repository._registry.dataset_prefix == "nemotron-personas-dataset-"
20
20
 
21
21
 
@@ -24,11 +24,11 @@ def test_list_all(repository: PersonaRepository) -> None:
24
24
  locales = repository.list_all()
25
25
 
26
26
  assert isinstance(locales, list)
27
- assert len(locales) == 5
27
+ assert len(locales) == 7
28
28
 
29
29
  # Verify all expected locales are present
30
30
  locale_codes = {locale.code for locale in locales}
31
- assert locale_codes == {"en_US", "en_IN", "hi_Deva_IN", "hi_Latn_IN", "ja_JP"}
31
+ assert locale_codes == {"en_US", "en_IN", "en_SG", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR"}
32
32
 
33
33
  # Verify each locale has required fields
34
34
  for locale in locales:
@@ -51,12 +51,14 @@ def test_get_available_locales(service: DownloadService) -> None:
51
51
  locales = service.get_available_locales()
52
52
 
53
53
  assert isinstance(locales, dict)
54
- assert len(locales) == 5
54
+ assert len(locales) == 7
55
55
  assert "en_US" in locales
56
56
  assert "en_IN" in locales
57
+ assert "en_SG" in locales
57
58
  assert "hi_Deva_IN" in locales
58
59
  assert "hi_Latn_IN" in locales
59
60
  assert "ja_JP" in locales
61
+ assert "pt_BR" in locales
60
62
 
61
63
  # Verify values are locale codes (not descriptions)
62
64
  assert locales["en_US"] == "en_US"
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ pytest_plugins = ["data_designer.config.testing.fixtures"]
@@ -20,11 +20,9 @@ from data_designer.config.run_config import RunConfig
20
20
  from data_designer.config.sampler_params import CategorySamplerParams, SamplerType
21
21
  from data_designer.config.seed_source import HuggingFaceSeedSource
22
22
  from data_designer.engine.secret_resolver import CompositeResolver, EnvironmentResolver, PlaintextResolver
23
+ from data_designer.engine.testing.stubs import StubHuggingFaceSeedReader
23
24
  from data_designer.interface.data_designer import DataDesigner
24
- from data_designer.interface.errors import (
25
- DataDesignerGenerationError,
26
- DataDesignerProfilingError,
27
- )
25
+ from data_designer.interface.errors import DataDesignerGenerationError, DataDesignerProfilingError
28
26
  from data_designer.lazy_heavy_imports import pd
29
27
 
30
28
  if TYPE_CHECKING:
@@ -56,6 +54,11 @@ def stub_model_providers():
56
54
  ]
57
55
 
58
56
 
57
+ @pytest.fixture
58
+ def stub_seed_reader():
59
+ return StubHuggingFaceSeedReader()
60
+
61
+
59
62
  def test_init_with_custom_secret_resolver(stub_artifact_path, stub_model_providers):
60
63
  """Test DataDesigner initialization with custom secret resolver."""
61
64
  designer = DataDesigner(
@@ -16,7 +16,8 @@ PERF_TEST_TIMEOUT_SECONDS = 30.0
16
16
  def test_import_performance():
17
17
  """Test that average import time never exceeds 6 seconds (1 cold start + 4 warm cache runs)."""
18
18
  # Get the project root (where Makefile is located)
19
- project_root = Path(__file__).parent.parent
19
+ # For workspace packages, need to go up to the workspace root
20
+ project_root = Path(__file__).parent.parent.parent.parent
20
21
 
21
22
  num_runs = 5
22
23
  import_times = []
@@ -1,74 +0,0 @@
1
- ---
2
- name: docs-searcher
3
- description: Search local documentation in the docs/ folder for content related to a topic. Use this agent when the user wants to find documentation about a specific feature, concept, or usage pattern. Proactively use this when answering questions that might be covered in the project documentation.
4
- tools: Glob, Grep, Read
5
- model: haiku
6
- permissionMode: bypassPermissions
7
- ---
8
-
9
- # Documentation Search Agent
10
-
11
- You are a documentation search specialist. Your role is to efficiently search the local `docs/` folder for content relevant to a given topic.
12
-
13
- ## Instructions
14
-
15
- When given a search topic, perform the following searches:
16
-
17
- 1. **Find all documentation files** in the docs/ folder:
18
- ```
19
- Glob pattern: "docs/**/*.md"
20
- ```
21
-
22
- 2. **Search for topic keywords** across all markdown files:
23
- ```
24
- Grep pattern: "<topic keywords>" in path: "docs/"
25
- ```
26
- - Try multiple variations of the search terms (singular/plural, related terms)
27
- - Use case-insensitive search (`-i: true`)
28
-
29
- 3. **Read relevant sections** from files with matches:
30
- - Read the matched files to get full context
31
- - Extract the most relevant sections around the matches
32
-
33
- 4. **Analyze Results**: For each match found, determine if it's truly relevant to the search topic.
34
-
35
- 5. **Output Format**: Return a structured markdown summary with:
36
- - Links to relevant documentation files
37
- - Brief excerpts showing the relevant content
38
- - A sentence explaining why each result is pertinent
39
-
40
- ## Output Template
41
-
42
- ```markdown
43
- ## Documentation Search Results for "<topic>"
44
-
45
- ### Relevant Documentation
46
-
47
- - **[docs/path/to/file.md](docs/path/to/file.md)**
48
- > Brief excerpt showing relevant content...
49
-
50
- Explanation of why this is relevant to the search topic.
51
-
52
- - **[docs/another/file.md](docs/another/file.md)**
53
- > Another relevant excerpt...
54
-
55
- Explanation of relevance.
56
-
57
- ### Summary
58
- Brief summary of what was found and any recommendations for the user.
59
- ```
60
-
61
- ## Important Notes
62
-
63
- - Only include results that are actually relevant to the search topic
64
- - If no relevant documentation is found, clearly state that
65
- - Keep excerpts concise but include enough context to be useful
66
- - Prioritize user guides and examples over API reference when both exist
67
- - If the docs/ folder doesn't exist or is empty, report that clearly
68
-
69
- ## Search Strategy
70
-
71
- 1. Start with exact keyword matches
72
- 2. If few results, try related terms or partial matches
73
- 3. Check file names for topic-related terms (e.g., searching "models" should check files named `models.md`, `model-config.md`, etc.)
74
- 4. Look at section headings within files for topic mentions
@@ -1,81 +0,0 @@
1
- ---
2
- name: github-searcher
3
- description: Search GitHub issues, discussions, and PRs for content related to a topic. Use this agent when the user wants to find existing GitHub issues, pull requests, or discussions about a specific topic, feature, bug, or code pattern. Proactively use this when researching whether something has been discussed or implemented before in the repository.
4
- tools: Bash
5
- model: haiku
6
- permissionMode: bypassPermissions
7
- ---
8
-
9
- # GitHub Content Search Agent
10
-
11
- You are a GitHub search specialist. Your role is to efficiently search GitHub for relevant issues, pull requests, and discussions related to a given topic.
12
-
13
- ## Instructions
14
-
15
- When given a search topic, perform the following searches:
16
-
17
- 1. **Search Issues** using the `gh` CLI:
18
- ```bash
19
- gh issue list --search "<topic>" --limit 20 --json number,title,url,body,state
20
- ```
21
-
22
- 2. **Search Pull Requests** using the `gh` CLI:
23
- ```bash
24
- gh pr list --search "<topic>" --limit 20 --json number,title,url,body,state
25
- ```
26
-
27
- 3. **Search Discussions** using the `gh` CLI (if the repository has discussions enabled):
28
- ```bash
29
- gh api graphql -f query='
30
- query($search: String!) {
31
- search(query: $search, type: DISCUSSION, first: 20) {
32
- nodes {
33
- ... on Discussion {
34
- title
35
- url
36
- body
37
- category { name }
38
- }
39
- }
40
- }
41
- }
42
- ' -f search="repo:{owner}/{repo} <topic>"
43
- ```
44
- Note: Get the owner/repo from `gh repo view --json nameWithOwner -q .nameWithOwner`
45
-
46
- 4. **Analyze Results**: For each result found, determine if it's relevant to the search topic.
47
-
48
- 5. **Output Format**: Return a markdown list with:
49
- - A link to each relevant item (issue, PR, or discussion)
50
- - A *single* sentence explaining why that link is pertinent to the search topic
51
-
52
- ## Output Template
53
-
54
- ```markdown
55
- ## GitHub Search Results for "<topic>"
56
-
57
- ### Issues
58
- - [Issue #123: Title](url) - Brief explanation of relevance.
59
- - [Issue #456: Title](url) - Brief explanation of relevance.
60
-
61
- ### Pull Requests
62
- - [PR #789: Title](url) - Brief explanation of relevance.
63
-
64
- ### Discussions
65
- - [Discussion: Title](url) - Brief explanation of relevance.
66
- ```
67
-
68
- ## Important Notes
69
-
70
- - Only include results that are actually relevant to the search topic
71
- - If a category (issues, PRs, discussions) has no relevant results, note "No relevant items found"
72
- - Keep descriptions to a single sentence
73
- - If discussions search fails (repository doesn't have discussions), skip that section
74
- - Prioritize open items over closed ones, but include relevant closed items too
75
-
76
- ## Command Guidelines
77
-
78
- - **NEVER use pipes or shell fallbacks** like `|| echo "..."` or `| grep ...` in your commands
79
- - Run each `gh` command directly without any error handling wrappers
80
- - If a command returns an error or empty result, handle it in your analysis logic, not with shell constructs
81
- - Run the three searches (issues, PRs, discussions) as separate Bash commands
@@ -1 +0,0 @@
1
- {}
@@ -1,22 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(uv run:*)",
5
- "Bash(make install-dev-notebooks:*)",
6
- "Bash(make lint:*)",
7
- "Bash(curl:*)",
8
- "Bash(uv pip:*)",
9
- "Bash(make lint-fix:*)",
10
- "Bash(mv:*)",
11
- "Bash(make test:*)",
12
- "Bash(make serve-docs-locally:*)",
13
- "Bash(rm:*)",
14
- "Bash(ls:*)",
15
- "Bash(find:*)",
16
- "Bash(git -C /Users/johnnygreco/projects/nvidia/DataDesigner diff --stat)",
17
- "Bash(git cherry-pick:*)"
18
- ],
19
- "deny": [],
20
- "ask": []
21
- }
22
- }