data-designer 0.3.8rc1__tar.gz → 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (438) hide show
  1. {data_designer-0.3.8rc1 → data_designer-0.4.0}/.gitignore +6 -4
  2. {data_designer-0.3.8rc1 → data_designer-0.4.0}/PKG-INFO +10 -42
  3. {data_designer-0.3.8rc1 → data_designer-0.4.0}/README.md +7 -14
  4. data_designer-0.4.0/dev-tools/hatch_build.py +31 -0
  5. data_designer-0.4.0/pyproject.toml +58 -0
  6. data_designer-0.4.0/src/data_designer/cli/commands/__init__.py +2 -0
  7. data_designer-0.4.0/src/data_designer/interface/__init__.py +22 -0
  8. {data_designer-0.3.8rc1/src/data_designer → data_designer-0.4.0/src/data_designer/interface}/_version.py +2 -2
  9. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/interface/data_designer.py +8 -11
  10. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/controllers/test_download_controller.py +7 -3
  11. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/repositories/test_persona_repository.py +3 -3
  12. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/services/test_download_service.py +3 -1
  13. data_designer-0.4.0/tests/conftest.py +4 -0
  14. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/interface/test_data_designer.py +7 -4
  15. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/test_import_perf.py +2 -1
  16. data_designer-0.3.8rc1/.github/ISSUE_TEMPLATE/bug-report.yml +0 -51
  17. data_designer-0.3.8rc1/.github/ISSUE_TEMPLATE/config.yml +0 -5
  18. data_designer-0.3.8rc1/.github/ISSUE_TEMPLATE/development-task.yml +0 -32
  19. data_designer-0.3.8rc1/.github/ISSUE_TEMPLATE/feature-request.yml +0 -45
  20. data_designer-0.3.8rc1/.github/workflows/build-docs.yml +0 -68
  21. data_designer-0.3.8rc1/.github/workflows/build-notebooks.yml +0 -30
  22. data_designer-0.3.8rc1/.github/workflows/check-colab-notebooks.yml +0 -55
  23. data_designer-0.3.8rc1/.github/workflows/ci.yml +0 -113
  24. data_designer-0.3.8rc1/.github/workflows/dco-assistant.yml +0 -44
  25. data_designer-0.3.8rc1/.github/workflows/pack-tutorials.yml +0 -77
  26. data_designer-0.3.8rc1/.github/workflows/semantic-pull-requests.yml +0 -26
  27. data_designer-0.3.8rc1/.pre-commit-config.yaml +0 -25
  28. data_designer-0.3.8rc1/AGENTS.md +0 -558
  29. data_designer-0.3.8rc1/CLAUDE.md +0 -3
  30. data_designer-0.3.8rc1/CODE_OF_CONDUCT.md +0 -76
  31. data_designer-0.3.8rc1/CONTRIBUTING.md +0 -236
  32. data_designer-0.3.8rc1/DCO +0 -34
  33. data_designer-0.3.8rc1/LICENSE +0 -201
  34. data_designer-0.3.8rc1/Makefile +0 -212
  35. data_designer-0.3.8rc1/VERSIONING.md +0 -90
  36. data_designer-0.3.8rc1/docs/CONTRIBUTING.md +0 -1
  37. data_designer-0.3.8rc1/docs/assets/palette-favicon.png +0 -0
  38. data_designer-0.3.8rc1/docs/assets/recipes/code_generation/text_to_python.py +0 -318
  39. data_designer-0.3.8rc1/docs/assets/recipes/code_generation/text_to_sql.py +0 -323
  40. data_designer-0.3.8rc1/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -204
  41. data_designer-0.3.8rc1/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -224
  42. data_designer-0.3.8rc1/docs/code_reference/analysis.md +0 -31
  43. data_designer-0.3.8rc1/docs/code_reference/column_configs.md +0 -8
  44. data_designer-0.3.8rc1/docs/code_reference/config_builder.md +0 -10
  45. data_designer-0.3.8rc1/docs/code_reference/data_designer_config.md +0 -7
  46. data_designer-0.3.8rc1/docs/code_reference/models.md +0 -11
  47. data_designer-0.3.8rc1/docs/code_reference/processors.md +0 -6
  48. data_designer-0.3.8rc1/docs/code_reference/sampler_params.md +0 -12
  49. data_designer-0.3.8rc1/docs/code_reference/validator_params.md +0 -6
  50. data_designer-0.3.8rc1/docs/colab_notebooks/1-the-basics.ipynb +0 -537
  51. data_designer-0.3.8rc1/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -567
  52. data_designer-0.3.8rc1/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -466
  53. data_designer-0.3.8rc1/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -530
  54. data_designer-0.3.8rc1/docs/concepts/columns.md +0 -152
  55. data_designer-0.3.8rc1/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -136
  56. data_designer-0.3.8rc1/docs/concepts/models/custom-model-settings.md +0 -229
  57. data_designer-0.3.8rc1/docs/concepts/models/default-model-settings.md +0 -124
  58. data_designer-0.3.8rc1/docs/concepts/models/inference-parameters.md +0 -145
  59. data_designer-0.3.8rc1/docs/concepts/models/model-configs.md +0 -123
  60. data_designer-0.3.8rc1/docs/concepts/models/model-providers.md +0 -55
  61. data_designer-0.3.8rc1/docs/concepts/person_sampling.md +0 -217
  62. data_designer-0.3.8rc1/docs/concepts/processors.md +0 -153
  63. data_designer-0.3.8rc1/docs/concepts/validators.md +0 -340
  64. data_designer-0.3.8rc1/docs/css/mkdocstrings.css +0 -80
  65. data_designer-0.3.8rc1/docs/css/style.css +0 -179
  66. data_designer-0.3.8rc1/docs/images/top-models.png +0 -0
  67. data_designer-0.3.8rc1/docs/index.md +0 -48
  68. data_designer-0.3.8rc1/docs/installation.md +0 -29
  69. data_designer-0.3.8rc1/docs/js/toc-toggle.js +0 -25
  70. data_designer-0.3.8rc1/docs/notebook_source/1-the-basics.py +0 -341
  71. data_designer-0.3.8rc1/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -383
  72. data_designer-0.3.8rc1/docs/notebook_source/3-seeding-with-a-dataset.py +0 -292
  73. data_designer-0.3.8rc1/docs/notebook_source/4-providing-images-as-context.py +0 -311
  74. data_designer-0.3.8rc1/docs/notebook_source/README.md +0 -23
  75. data_designer-0.3.8rc1/docs/notebook_source/_README.md +0 -124
  76. data_designer-0.3.8rc1/docs/notebook_source/_pyproject.toml +0 -9
  77. data_designer-0.3.8rc1/docs/overrides/main.html +0 -31
  78. data_designer-0.3.8rc1/docs/plugins/available.md +0 -3
  79. data_designer-0.3.8rc1/docs/plugins/example.md +0 -280
  80. data_designer-0.3.8rc1/docs/plugins/overview.md +0 -45
  81. data_designer-0.3.8rc1/docs/quick-start.md +0 -88
  82. data_designer-0.3.8rc1/docs/recipes/cards.md +0 -81
  83. data_designer-0.3.8rc1/docs/recipes/code_generation/text_to_python.md +0 -5
  84. data_designer-0.3.8rc1/docs/recipes/code_generation/text_to_sql.md +0 -7
  85. data_designer-0.3.8rc1/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -5
  86. data_designer-0.3.8rc1/docs/recipes/qa_and_chat/product_info_qa.md +0 -5
  87. data_designer-0.3.8rc1/docs/scripts/generate_colab_notebooks.py +0 -186
  88. data_designer-0.3.8rc1/mkdocs.yml +0 -144
  89. data_designer-0.3.8rc1/pyproject.toml +0 -140
  90. data_designer-0.3.8rc1/scripts/test_license_headers.py +0 -899
  91. data_designer-0.3.8rc1/scripts/update_license_headers.py +0 -373
  92. data_designer-0.3.8rc1/src/data_designer/__init__.py +0 -17
  93. data_designer-0.3.8rc1/src/data_designer/cli/commands/__init__.py +0 -2
  94. data_designer-0.3.8rc1/src/data_designer/config/__init__.py +0 -2
  95. data_designer-0.3.8rc1/src/data_designer/config/analysis/__init__.py +0 -2
  96. data_designer-0.3.8rc1/src/data_designer/config/analysis/column_profilers.py +0 -159
  97. data_designer-0.3.8rc1/src/data_designer/config/analysis/column_statistics.py +0 -421
  98. data_designer-0.3.8rc1/src/data_designer/config/analysis/dataset_profiler.py +0 -84
  99. data_designer-0.3.8rc1/src/data_designer/config/analysis/utils/errors.py +0 -10
  100. data_designer-0.3.8rc1/src/data_designer/config/analysis/utils/reporting.py +0 -192
  101. data_designer-0.3.8rc1/src/data_designer/config/base.py +0 -69
  102. data_designer-0.3.8rc1/src/data_designer/config/column_configs.py +0 -470
  103. data_designer-0.3.8rc1/src/data_designer/config/column_types.py +0 -141
  104. data_designer-0.3.8rc1/src/data_designer/config/config_builder.py +0 -595
  105. data_designer-0.3.8rc1/src/data_designer/config/data_designer_config.py +0 -40
  106. data_designer-0.3.8rc1/src/data_designer/config/dataset_builders.py +0 -13
  107. data_designer-0.3.8rc1/src/data_designer/config/dataset_metadata.py +0 -18
  108. data_designer-0.3.8rc1/src/data_designer/config/default_model_settings.py +0 -121
  109. data_designer-0.3.8rc1/src/data_designer/config/errors.py +0 -24
  110. data_designer-0.3.8rc1/src/data_designer/config/exports.py +0 -145
  111. data_designer-0.3.8rc1/src/data_designer/config/interface.py +0 -55
  112. data_designer-0.3.8rc1/src/data_designer/config/models.py +0 -455
  113. data_designer-0.3.8rc1/src/data_designer/config/preview_results.py +0 -41
  114. data_designer-0.3.8rc1/src/data_designer/config/processors.py +0 -148
  115. data_designer-0.3.8rc1/src/data_designer/config/run_config.py +0 -48
  116. data_designer-0.3.8rc1/src/data_designer/config/sampler_constraints.py +0 -52
  117. data_designer-0.3.8rc1/src/data_designer/config/sampler_params.py +0 -639
  118. data_designer-0.3.8rc1/src/data_designer/config/seed.py +0 -116
  119. data_designer-0.3.8rc1/src/data_designer/config/seed_source.py +0 -84
  120. data_designer-0.3.8rc1/src/data_designer/config/seed_source_types.py +0 -19
  121. data_designer-0.3.8rc1/src/data_designer/config/utils/code_lang.py +0 -82
  122. data_designer-0.3.8rc1/src/data_designer/config/utils/constants.py +0 -363
  123. data_designer-0.3.8rc1/src/data_designer/config/utils/errors.py +0 -21
  124. data_designer-0.3.8rc1/src/data_designer/config/utils/info.py +0 -94
  125. data_designer-0.3.8rc1/src/data_designer/config/utils/io_helpers.py +0 -258
  126. data_designer-0.3.8rc1/src/data_designer/config/utils/misc.py +0 -78
  127. data_designer-0.3.8rc1/src/data_designer/config/utils/numerical_helpers.py +0 -30
  128. data_designer-0.3.8rc1/src/data_designer/config/utils/type_helpers.py +0 -106
  129. data_designer-0.3.8rc1/src/data_designer/config/utils/visualization.py +0 -482
  130. data_designer-0.3.8rc1/src/data_designer/config/validator_params.py +0 -94
  131. data_designer-0.3.8rc1/src/data_designer/engine/__init__.py +0 -2
  132. data_designer-0.3.8rc1/src/data_designer/engine/analysis/column_profilers/base.py +0 -49
  133. data_designer-0.3.8rc1/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -153
  134. data_designer-0.3.8rc1/src/data_designer/engine/analysis/column_profilers/registry.py +0 -22
  135. data_designer-0.3.8rc1/src/data_designer/engine/analysis/column_statistics.py +0 -145
  136. data_designer-0.3.8rc1/src/data_designer/engine/analysis/dataset_profiler.py +0 -149
  137. data_designer-0.3.8rc1/src/data_designer/engine/analysis/errors.py +0 -9
  138. data_designer-0.3.8rc1/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -234
  139. data_designer-0.3.8rc1/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -132
  140. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/__init__.py +0 -2
  141. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/__init__.py +0 -2
  142. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/base.py +0 -122
  143. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/embedding.py +0 -35
  144. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/expression.py +0 -55
  145. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/llm_completion.py +0 -113
  146. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/samplers.py +0 -69
  147. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -144
  148. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/generators/validation.py +0 -140
  149. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/registry.py +0 -60
  150. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/utils/errors.py +0 -15
  151. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -43
  152. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -58
  153. data_designer-0.3.8rc1/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -100
  154. data_designer-0.3.8rc1/src/data_designer/engine/compiler.py +0 -97
  155. data_designer-0.3.8rc1/src/data_designer/engine/configurable_task.py +0 -71
  156. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -283
  157. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -338
  158. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/errors.py +0 -15
  159. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -46
  160. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -2
  161. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -215
  162. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -62
  163. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/dag.py +0 -62
  164. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -200
  165. data_designer-0.3.8rc1/src/data_designer/engine/dataset_builders/utils/errors.py +0 -15
  166. data_designer-0.3.8rc1/src/data_designer/engine/errors.py +0 -51
  167. data_designer-0.3.8rc1/src/data_designer/engine/model_provider.py +0 -77
  168. data_designer-0.3.8rc1/src/data_designer/engine/models/__init__.py +0 -2
  169. data_designer-0.3.8rc1/src/data_designer/engine/models/errors.py +0 -300
  170. data_designer-0.3.8rc1/src/data_designer/engine/models/facade.py +0 -287
  171. data_designer-0.3.8rc1/src/data_designer/engine/models/factory.py +0 -42
  172. data_designer-0.3.8rc1/src/data_designer/engine/models/litellm_overrides.py +0 -179
  173. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/__init__.py +0 -2
  174. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/errors.py +0 -34
  175. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/parser.py +0 -235
  176. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/postprocessors.py +0 -93
  177. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/tag_parsers.py +0 -62
  178. data_designer-0.3.8rc1/src/data_designer/engine/models/parsers/types.py +0 -84
  179. data_designer-0.3.8rc1/src/data_designer/engine/models/recipes/base.py +0 -81
  180. data_designer-0.3.8rc1/src/data_designer/engine/models/recipes/response_recipes.py +0 -293
  181. data_designer-0.3.8rc1/src/data_designer/engine/models/registry.py +0 -146
  182. data_designer-0.3.8rc1/src/data_designer/engine/models/telemetry.py +0 -359
  183. data_designer-0.3.8rc1/src/data_designer/engine/models/usage.py +0 -73
  184. data_designer-0.3.8rc1/src/data_designer/engine/models/utils.py +0 -38
  185. data_designer-0.3.8rc1/src/data_designer/engine/processing/ginja/__init__.py +0 -2
  186. data_designer-0.3.8rc1/src/data_designer/engine/processing/ginja/ast.py +0 -65
  187. data_designer-0.3.8rc1/src/data_designer/engine/processing/ginja/environment.py +0 -463
  188. data_designer-0.3.8rc1/src/data_designer/engine/processing/ginja/exceptions.py +0 -56
  189. data_designer-0.3.8rc1/src/data_designer/engine/processing/ginja/record.py +0 -32
  190. data_designer-0.3.8rc1/src/data_designer/engine/processing/gsonschema/__init__.py +0 -2
  191. data_designer-0.3.8rc1/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -15
  192. data_designer-0.3.8rc1/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -83
  193. data_designer-0.3.8rc1/src/data_designer/engine/processing/gsonschema/types.py +0 -10
  194. data_designer-0.3.8rc1/src/data_designer/engine/processing/gsonschema/validators.py +0 -202
  195. data_designer-0.3.8rc1/src/data_designer/engine/processing/processors/base.py +0 -13
  196. data_designer-0.3.8rc1/src/data_designer/engine/processing/processors/drop_columns.py +0 -42
  197. data_designer-0.3.8rc1/src/data_designer/engine/processing/processors/registry.py +0 -25
  198. data_designer-0.3.8rc1/src/data_designer/engine/processing/processors/schema_transform.py +0 -49
  199. data_designer-0.3.8rc1/src/data_designer/engine/processing/utils.py +0 -169
  200. data_designer-0.3.8rc1/src/data_designer/engine/registry/base.py +0 -99
  201. data_designer-0.3.8rc1/src/data_designer/engine/registry/data_designer_registry.py +0 -39
  202. data_designer-0.3.8rc1/src/data_designer/engine/registry/errors.py +0 -12
  203. data_designer-0.3.8rc1/src/data_designer/engine/resources/managed_dataset_generator.py +0 -39
  204. data_designer-0.3.8rc1/src/data_designer/engine/resources/managed_dataset_repository.py +0 -197
  205. data_designer-0.3.8rc1/src/data_designer/engine/resources/managed_storage.py +0 -65
  206. data_designer-0.3.8rc1/src/data_designer/engine/resources/resource_provider.py +0 -77
  207. data_designer-0.3.8rc1/src/data_designer/engine/resources/seed_reader.py +0 -154
  208. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/column.py +0 -91
  209. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/constraints.py +0 -100
  210. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -217
  211. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -12
  212. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -347
  213. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -2
  214. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  215. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -86
  216. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -171
  217. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/errors.py +0 -10
  218. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -102
  219. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/person.py +0 -144
  220. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -128
  221. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/errors.py +0 -26
  222. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/generator.py +0 -122
  223. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -64
  224. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/people_gen.py +0 -199
  225. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/person_constants.py +0 -56
  226. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/schema.py +0 -147
  227. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/schema_builder.py +0 -61
  228. data_designer-0.3.8rc1/src/data_designer/engine/sampling_gen/utils.py +0 -46
  229. data_designer-0.3.8rc1/src/data_designer/engine/secret_resolver.py +0 -82
  230. data_designer-0.3.8rc1/src/data_designer/engine/validation.py +0 -367
  231. data_designer-0.3.8rc1/src/data_designer/engine/validators/__init__.py +0 -19
  232. data_designer-0.3.8rc1/src/data_designer/engine/validators/base.py +0 -38
  233. data_designer-0.3.8rc1/src/data_designer/engine/validators/local_callable.py +0 -39
  234. data_designer-0.3.8rc1/src/data_designer/engine/validators/python.py +0 -254
  235. data_designer-0.3.8rc1/src/data_designer/engine/validators/remote.py +0 -89
  236. data_designer-0.3.8rc1/src/data_designer/engine/validators/sql.py +0 -65
  237. data_designer-0.3.8rc1/src/data_designer/errors.py +0 -7
  238. data_designer-0.3.8rc1/src/data_designer/essentials/__init__.py +0 -33
  239. data_designer-0.3.8rc1/src/data_designer/interface/__init__.py +0 -2
  240. data_designer-0.3.8rc1/src/data_designer/lazy_heavy_imports.py +0 -54
  241. data_designer-0.3.8rc1/src/data_designer/logging.py +0 -163
  242. data_designer-0.3.8rc1/src/data_designer/plugin_manager.py +0 -78
  243. data_designer-0.3.8rc1/src/data_designer/plugins/__init__.py +0 -8
  244. data_designer-0.3.8rc1/src/data_designer/plugins/errors.py +0 -15
  245. data_designer-0.3.8rc1/src/data_designer/plugins/plugin.py +0 -141
  246. data_designer-0.3.8rc1/src/data_designer/plugins/registry.py +0 -88
  247. data_designer-0.3.8rc1/src/data_designer/plugins/testing/__init__.py +0 -10
  248. data_designer-0.3.8rc1/src/data_designer/plugins/testing/stubs.py +0 -116
  249. data_designer-0.3.8rc1/src/data_designer/plugins/testing/utils.py +0 -20
  250. data_designer-0.3.8rc1/tests/config/analysis/conftest.py +0 -82
  251. data_designer-0.3.8rc1/tests/config/analysis/test_column_statistics.py +0 -299
  252. data_designer-0.3.8rc1/tests/config/analysis/test_dataset_profiler_results.py +0 -160
  253. data_designer-0.3.8rc1/tests/config/analysis/utils/test_reporting.py +0 -292
  254. data_designer-0.3.8rc1/tests/config/test_columns.py +0 -468
  255. data_designer-0.3.8rc1/tests/config/test_config_builder.py +0 -742
  256. data_designer-0.3.8rc1/tests/config/test_data_designer_config.py +0 -29
  257. data_designer-0.3.8rc1/tests/config/test_default_model_settings.py +0 -196
  258. data_designer-0.3.8rc1/tests/config/test_models.py +0 -387
  259. data_designer-0.3.8rc1/tests/config/test_processors.py +0 -139
  260. data_designer-0.3.8rc1/tests/config/test_sampler_constraints.py +0 -25
  261. data_designer-0.3.8rc1/tests/config/test_sampler_params.py +0 -141
  262. data_designer-0.3.8rc1/tests/config/test_seed.py +0 -56
  263. data_designer-0.3.8rc1/tests/config/test_seed_source.py +0 -78
  264. data_designer-0.3.8rc1/tests/config/test_validator_params.py +0 -59
  265. data_designer-0.3.8rc1/tests/config/utils/__init__.py +0 -2
  266. data_designer-0.3.8rc1/tests/config/utils/test_code_lang.py +0 -37
  267. data_designer-0.3.8rc1/tests/config/utils/test_info.py +0 -59
  268. data_designer-0.3.8rc1/tests/config/utils/test_io_helpers.py +0 -178
  269. data_designer-0.3.8rc1/tests/config/utils/test_misc.py +0 -75
  270. data_designer-0.3.8rc1/tests/config/utils/test_type_helpers.py +0 -162
  271. data_designer-0.3.8rc1/tests/config/utils/test_visualization.py +0 -94
  272. data_designer-0.3.8rc1/tests/conftest.py +0 -324
  273. data_designer-0.3.8rc1/tests/engine/analysis/column_profilers/test_base.py +0 -54
  274. data_designer-0.3.8rc1/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -292
  275. data_designer-0.3.8rc1/tests/engine/analysis/conftest.py +0 -159
  276. data_designer-0.3.8rc1/tests/engine/analysis/test_column_statistics_calculator.py +0 -79
  277. data_designer-0.3.8rc1/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -145
  278. data_designer-0.3.8rc1/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -2929
  279. data_designer-0.3.8rc1/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -27
  280. data_designer-0.3.8rc1/tests/engine/analysis/test_dataset_profiler.py +0 -130
  281. data_designer-0.3.8rc1/tests/engine/analysis/test_errors.py +0 -59
  282. data_designer-0.3.8rc1/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -357
  283. data_designer-0.3.8rc1/tests/engine/analysis/utils/test_judge_score_processing.py +0 -171
  284. data_designer-0.3.8rc1/tests/engine/column_generators/generators/__init__.py +0 -2
  285. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_column_generator_base.py +0 -85
  286. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_embedding.py +0 -47
  287. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_expression.py +0 -166
  288. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_llm_completion_generators.py +0 -350
  289. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_samplers.py +0 -131
  290. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_seed_dataset.py +0 -796
  291. data_designer-0.3.8rc1/tests/engine/column_generators/generators/test_validation.py +0 -248
  292. data_designer-0.3.8rc1/tests/engine/column_generators/test_registry.py +0 -39
  293. data_designer-0.3.8rc1/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -15
  294. data_designer-0.3.8rc1/tests/engine/column_generators/utils/test_generator_classification.py +0 -32
  295. data_designer-0.3.8rc1/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -97
  296. data_designer-0.3.8rc1/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -127
  297. data_designer-0.3.8rc1/tests/engine/conftest.py +0 -61
  298. data_designer-0.3.8rc1/tests/engine/dataset_builders/test_artifact_storage.py +0 -362
  299. data_designer-0.3.8rc1/tests/engine/dataset_builders/test_column_wise_builder.py +0 -390
  300. data_designer-0.3.8rc1/tests/engine/dataset_builders/test_multi_column_configs.py +0 -158
  301. data_designer-0.3.8rc1/tests/engine/dataset_builders/utils/test_concurrency.py +0 -577
  302. data_designer-0.3.8rc1/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -92
  303. data_designer-0.3.8rc1/tests/engine/dataset_builders/utils/test_dag.py +0 -113
  304. data_designer-0.3.8rc1/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -421
  305. data_designer-0.3.8rc1/tests/engine/models/conftest.py +0 -75
  306. data_designer-0.3.8rc1/tests/engine/models/parsers/test_parser.py +0 -175
  307. data_designer-0.3.8rc1/tests/engine/models/parsers/test_parsers_types.py +0 -96
  308. data_designer-0.3.8rc1/tests/engine/models/parsers/test_postprocessors.py +0 -122
  309. data_designer-0.3.8rc1/tests/engine/models/parsers/test_tag_parsers.py +0 -118
  310. data_designer-0.3.8rc1/tests/engine/models/recipes/test_recipe_base.py +0 -130
  311. data_designer-0.3.8rc1/tests/engine/models/recipes/test_response_recipes.py +0 -257
  312. data_designer-0.3.8rc1/tests/engine/models/stub_secrets.json +0 -3
  313. data_designer-0.3.8rc1/tests/engine/models/test_facade.py +0 -233
  314. data_designer-0.3.8rc1/tests/engine/models/test_litellm_overrides.py +0 -140
  315. data_designer-0.3.8rc1/tests/engine/models/test_model_errors.py +0 -231
  316. data_designer-0.3.8rc1/tests/engine/models/test_model_registry.py +0 -329
  317. data_designer-0.3.8rc1/tests/engine/models/test_model_utils.py +0 -36
  318. data_designer-0.3.8rc1/tests/engine/models/test_usage.py +0 -65
  319. data_designer-0.3.8rc1/tests/engine/processing/__init__.py +0 -2
  320. data_designer-0.3.8rc1/tests/engine/processing/ginja/__init__.py +0 -2
  321. data_designer-0.3.8rc1/tests/engine/processing/ginja/test_ast.py +0 -124
  322. data_designer-0.3.8rc1/tests/engine/processing/ginja/test_environment.py +0 -213
  323. data_designer-0.3.8rc1/tests/engine/processing/ginja/test_exceptions.py +0 -21
  324. data_designer-0.3.8rc1/tests/engine/processing/ginja/test_record.py +0 -25
  325. data_designer-0.3.8rc1/tests/engine/processing/gsonschema/__init__.py +0 -2
  326. data_designer-0.3.8rc1/tests/engine/processing/gsonschema/test_exceptions.py +0 -42
  327. data_designer-0.3.8rc1/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -368
  328. data_designer-0.3.8rc1/tests/engine/processing/gsonschema/test_types.py +0 -109
  329. data_designer-0.3.8rc1/tests/engine/processing/gsonschema/test_validators.py +0 -229
  330. data_designer-0.3.8rc1/tests/engine/processing/processors/__init__.py +0 -2
  331. data_designer-0.3.8rc1/tests/engine/processing/processors/test_drop_columns.py +0 -162
  332. data_designer-0.3.8rc1/tests/engine/processing/processors/test_registry.py +0 -18
  333. data_designer-0.3.8rc1/tests/engine/processing/processors/test_schema_transform.py +0 -135
  334. data_designer-0.3.8rc1/tests/engine/processing/test_utils.py +0 -141
  335. data_designer-0.3.8rc1/tests/engine/registry/__init__.py +0 -2
  336. data_designer-0.3.8rc1/tests/engine/registry/conftest.py +0 -37
  337. data_designer-0.3.8rc1/tests/engine/registry/test_base.py +0 -227
  338. data_designer-0.3.8rc1/tests/engine/registry/test_data_designer_registry.py +0 -215
  339. data_designer-0.3.8rc1/tests/engine/registry/test_errors.py +0 -63
  340. data_designer-0.3.8rc1/tests/engine/resources/__init__.py +0 -2
  341. data_designer-0.3.8rc1/tests/engine/resources/conftest.py +0 -62
  342. data_designer-0.3.8rc1/tests/engine/resources/test_managed_dataset_generator.py +0 -125
  343. data_designer-0.3.8rc1/tests/engine/resources/test_managed_dataset_repository.py +0 -221
  344. data_designer-0.3.8rc1/tests/engine/resources/test_managed_storage.py +0 -95
  345. data_designer-0.3.8rc1/tests/engine/resources/test_resource_provider.py +0 -40
  346. data_designer-0.3.8rc1/tests/engine/resources/test_seed_reader.py +0 -58
  347. data_designer-0.3.8rc1/tests/engine/sampling_gen/conftest.py +0 -306
  348. data_designer-0.3.8rc1/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -17
  349. data_designer-0.3.8rc1/tests/engine/sampling_gen/data_sources/test_sources.py +0 -369
  350. data_designer-0.3.8rc1/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -105
  351. data_designer-0.3.8rc1/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -61
  352. data_designer-0.3.8rc1/tests/engine/sampling_gen/entities/test_person.py +0 -286
  353. data_designer-0.3.8rc1/tests/engine/sampling_gen/entities/test_phone_number.py +0 -94
  354. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_column.py +0 -101
  355. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_constraints.py +0 -107
  356. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_generator.py +0 -536
  357. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_jinja_utils.py +0 -119
  358. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_people_gen.py +0 -56
  359. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_schema.py +0 -255
  360. data_designer-0.3.8rc1/tests/engine/sampling_gen/test_utils.py +0 -43
  361. data_designer-0.3.8rc1/tests/engine/test_compiler.py +0 -146
  362. data_designer-0.3.8rc1/tests/engine/test_configurable_task.py +0 -130
  363. data_designer-0.3.8rc1/tests/engine/test_dataset_metadata.py +0 -56
  364. data_designer-0.3.8rc1/tests/engine/test_engine_errors.py +0 -61
  365. data_designer-0.3.8rc1/tests/engine/test_model_provider.py +0 -61
  366. data_designer-0.3.8rc1/tests/engine/test_secret_resolver.py +0 -89
  367. data_designer-0.3.8rc1/tests/engine/test_validation.py +0 -300
  368. data_designer-0.3.8rc1/tests/engine/validators/test_local_callable.py +0 -40
  369. data_designer-0.3.8rc1/tests/engine/validators/test_python.py +0 -123
  370. data_designer-0.3.8rc1/tests/engine/validators/test_remote.py +0 -64
  371. data_designer-0.3.8rc1/tests/engine/validators/test_sql.py +0 -22
  372. data_designer-0.3.8rc1/tests/essentials/test_init.py +0 -326
  373. data_designer-0.3.8rc1/tests/plugins/test_plugin.py +0 -191
  374. data_designer-0.3.8rc1/tests/plugins/test_plugin_registry.py +0 -253
  375. data_designer-0.3.8rc1/tests/test_logging.py +0 -210
  376. data_designer-0.3.8rc1/tests/test_plugin_manager.py +0 -124
  377. data_designer-0.3.8rc1/tests_e2e/pyproject.toml +0 -38
  378. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/__init__.py +0 -2
  379. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/__init__.py +0 -2
  380. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/config.py +0 -24
  381. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/impl.py +0 -20
  382. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -10
  383. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/__init__.py +0 -2
  384. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -13
  385. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -21
  386. data_designer-0.3.8rc1/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -10
  387. data_designer-0.3.8rc1/tests_e2e/tests/test_e2e.py +0 -73
  388. data_designer-0.3.8rc1/tests_e2e/tests/test_seed.csv +0 -4
  389. data_designer-0.3.8rc1/uv.lock +0 -5248
  390. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/README.md +0 -0
  391. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/__init__.py +0 -0
  392. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/commands/download.py +0 -0
  393. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/commands/list.py +0 -0
  394. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/commands/models.py +0 -0
  395. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/commands/providers.py +0 -0
  396. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/commands/reset.py +0 -0
  397. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/controllers/__init__.py +0 -0
  398. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/controllers/download_controller.py +0 -0
  399. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/controllers/model_controller.py +0 -0
  400. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/controllers/provider_controller.py +0 -0
  401. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/__init__.py +0 -0
  402. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/builder.py +0 -0
  403. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/field.py +0 -0
  404. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/form.py +0 -0
  405. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/model_builder.py +0 -0
  406. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/forms/provider_builder.py +0 -0
  407. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/main.py +0 -0
  408. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/repositories/__init__.py +0 -0
  409. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/repositories/base.py +0 -0
  410. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/repositories/model_repository.py +0 -0
  411. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/repositories/persona_repository.py +0 -0
  412. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/repositories/provider_repository.py +0 -0
  413. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/services/__init__.py +0 -0
  414. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/services/download_service.py +0 -0
  415. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/services/model_service.py +0 -0
  416. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/services/provider_service.py +0 -0
  417. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/ui.py +0 -0
  418. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/cli/utils.py +0 -0
  419. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/interface/errors.py +0 -0
  420. {data_designer-0.3.8rc1 → data_designer-0.4.0}/src/data_designer/interface/results.py +0 -0
  421. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/commands/test_download_command.py +0 -0
  422. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/commands/test_list_command.py +0 -0
  423. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/commands/test_models_command.py +0 -0
  424. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/commands/test_providers_command.py +0 -0
  425. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/commands/test_reset_command.py +0 -0
  426. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/conftest.py +0 -0
  427. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/controllers/test_model_controller.py +0 -0
  428. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/controllers/test_provider_controller.py +0 -0
  429. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/forms/test_field.py +0 -0
  430. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/forms/test_form.py +0 -0
  431. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/forms/test_model_builder.py +0 -0
  432. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/forms/test_provider_builder.py +0 -0
  433. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/repositories/test_model_repository.py +0 -0
  434. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/repositories/test_provider_repository.py +0 -0
  435. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/services/test_model_service.py +0 -0
  436. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/services/test_provider_service.py +0 -0
  437. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/cli/test_cli_utils.py +0 -0
  438. {data_designer-0.3.8rc1 → data_designer-0.4.0}/tests/interface/test_results.py +0 -0
@@ -79,14 +79,12 @@ venv.bak/
79
79
  *.tar.gz
80
80
  *.zip
81
81
 
82
- # Auto-generated version file
83
- src/data_designer/_version.py
82
+ # Auto-generated version files
83
+ **/_version.py
84
84
 
85
85
  # Local scratch space
86
86
  .scratch/
87
87
 
88
- .claude/
89
-
90
88
  docs/notebooks/
91
89
  docs/notebook_source/*.ipynb
92
90
  docs/notebook_source/*.csv
@@ -96,3 +94,7 @@ tests_e2e/uv.lock
96
94
 
97
95
  # Performance profiling
98
96
  perf_*.txt
97
+ NOTEPAD.md
98
+
99
+ # Build-time copy of README for data-designer package (copied from top-level during build)
100
+ packages/data-designer/README.md
@@ -1,9 +1,8 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.3.8rc1
3
+ Version: 0.4.0
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
- License-File: LICENSE
7
6
  Classifier: Development Status :: 4 - Beta
8
7
  Classifier: Intended Audience :: Developers
9
8
  Classifier: Intended Audience :: Science/Research
@@ -15,33 +14,9 @@ Classifier: Programming Language :: Python :: 3.13
15
14
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
15
  Classifier: Topic :: Software Development
17
16
  Requires-Python: >=3.10
18
- Requires-Dist: anyascii<1,>=0.3.3
19
- Requires-Dist: duckdb<2,>=1.1.3
20
- Requires-Dist: faker<21,>=20.1.0
21
- Requires-Dist: httpx-retries<1,>=0.4.2
22
- Requires-Dist: httpx<1,>=0.27.2
23
- Requires-Dist: huggingface-hub<2,>=1.0.1
24
- Requires-Dist: jinja2<4,>=3.1.6
25
- Requires-Dist: json-repair<1,>=0.48.0
26
- Requires-Dist: jsonpath-rust-bindings<2,>=1.0
27
- Requires-Dist: litellm<1.80.12,>=1.73.6
28
- Requires-Dist: lxml<7,>=6.0.2
29
- Requires-Dist: marko<3,>=2.1.2
30
- Requires-Dist: networkx<4,>=3.0
31
- Requires-Dist: numpy<3,>=1.23.5
32
- Requires-Dist: pandas<3,>=2.3.3
17
+ Requires-Dist: data-designer-config
18
+ Requires-Dist: data-designer-engine
33
19
  Requires-Dist: prompt-toolkit<4,>=3.0.0
34
- Requires-Dist: pyarrow<20,>=19.0.1
35
- Requires-Dist: pydantic[email]<3,>=2.9.2
36
- Requires-Dist: pygments<3,>=2.19.2
37
- Requires-Dist: python-json-logger<4,>=3
38
- Requires-Dist: pyyaml<7,>=6.0.1
39
- Requires-Dist: requests<3,>=2.32.2
40
- Requires-Dist: rich<15,>=13.7.1
41
- Requires-Dist: ruff<1,>=0.14.10
42
- Requires-Dist: scipy<2,>=1.11.0
43
- Requires-Dist: sqlfluff<4,>=3.2.0
44
- Requires-Dist: tiktoken<1,>=0.8.0
45
20
  Requires-Dist: typer<1,>=0.12.0
46
21
  Description-Content-Type: text/markdown
47
22
 
@@ -104,26 +79,19 @@ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
104
79
 
105
80
  ### 3. Start generating data!
106
81
  ```python
107
- from data_designer.essentials import (
108
- CategorySamplerParams,
109
- DataDesigner,
110
- DataDesignerConfigBuilder,
111
- LLMTextColumnConfig,
112
- PersonSamplerParams,
113
- SamplerColumnConfig,
114
- SamplerType,
115
- )
82
+ import data_designer.config as dd
83
+ from data_designer.interface import DataDesigner
116
84
 
117
85
  # Initialize with default settings
118
86
  data_designer = DataDesigner()
119
- config_builder = DataDesignerConfigBuilder()
87
+ config_builder = dd.DataDesignerConfigBuilder()
120
88
 
121
89
  # Add a product category
122
90
  config_builder.add_column(
123
- SamplerColumnConfig(
91
+ dd.SamplerColumnConfig(
124
92
  name="product_category",
125
- sampler_type=SamplerType.CATEGORY,
126
- params=CategorySamplerParams(
93
+ sampler_type=dd.SamplerType.CATEGORY,
94
+ params=dd.CategorySamplerParams(
127
95
  values=["Electronics", "Clothing", "Home & Kitchen", "Books"],
128
96
  ),
129
97
  )
@@ -131,7 +99,7 @@ config_builder.add_column(
131
99
 
132
100
  # Generate personalized customer reviews
133
101
  config_builder.add_column(
134
- LLMTextColumnConfig(
102
+ dd.LLMTextColumnConfig(
135
103
  name="review",
136
104
  model_alias="nvidia-text",
137
105
  prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
@@ -57,26 +57,19 @@ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
57
57
 
58
58
  ### 3. Start generating data!
59
59
  ```python
60
- from data_designer.essentials import (
61
- CategorySamplerParams,
62
- DataDesigner,
63
- DataDesignerConfigBuilder,
64
- LLMTextColumnConfig,
65
- PersonSamplerParams,
66
- SamplerColumnConfig,
67
- SamplerType,
68
- )
60
+ import data_designer.config as dd
61
+ from data_designer.interface import DataDesigner
69
62
 
70
63
  # Initialize with default settings
71
64
  data_designer = DataDesigner()
72
- config_builder = DataDesignerConfigBuilder()
65
+ config_builder = dd.DataDesignerConfigBuilder()
73
66
 
74
67
  # Add a product category
75
68
  config_builder.add_column(
76
- SamplerColumnConfig(
69
+ dd.SamplerColumnConfig(
77
70
  name="product_category",
78
- sampler_type=SamplerType.CATEGORY,
79
- params=CategorySamplerParams(
71
+ sampler_type=dd.SamplerType.CATEGORY,
72
+ params=dd.CategorySamplerParams(
80
73
  values=["Electronics", "Clothing", "Home & Kitchen", "Books"],
81
74
  ),
82
75
  )
@@ -84,7 +77,7 @@ config_builder.add_column(
84
77
 
85
78
  # Generate personalized customer reviews
86
79
  config_builder.add_column(
87
- LLMTextColumnConfig(
80
+ dd.LLMTextColumnConfig(
88
81
  name="review",
89
82
  model_alias="nvidia-text",
90
83
  prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
@@ -0,0 +1,31 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ """Custom hatch metadata hook to sync README from root.
5
+
6
+ This hook runs during metadata resolution (before build hooks) to ensure
7
+ the README.md from the repository root is copied before hatchling validates
8
+ that the readme file exists.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import shutil
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from hatchling.metadata.plugin.interface import MetadataHookInterface
18
+
19
+
20
+ class ReadmeSyncHook(MetadataHookInterface):
21
+ """Metadata hook that copies README.md from repository root before building."""
22
+
23
+ PLUGIN_NAME = "readme-sync"
24
+
25
+ def update(self, metadata: dict[str, Any]) -> None:
26
+ """Copy README.md from repository root to package directory."""
27
+ root_readme = Path(self.root) / ".." / ".." / "README.md"
28
+ package_readme = Path(self.root) / "README.md"
29
+
30
+ if root_readme.exists():
31
+ shutil.copy2(root_readme, package_readme)
@@ -0,0 +1,58 @@
1
+ [project]
2
+ name = "data-designer"
3
+ dynamic = ["version"]
4
+ description = "General framework for synthetic data generation"
5
+ readme = "README.md"
6
+ requires-python = ">=3.10"
7
+ license = "Apache-2.0"
8
+
9
+ classifiers = [
10
+ "Development Status :: 4 - Beta",
11
+ "Intended Audience :: Developers",
12
+ "Intended Audience :: Science/Research",
13
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
14
+ "Topic :: Software Development",
15
+ "License :: OSI Approved :: Apache Software License",
16
+ "Programming Language :: Python :: 3.10",
17
+ "Programming Language :: Python :: 3.11",
18
+ "Programming Language :: Python :: 3.12",
19
+ "Programming Language :: Python :: 3.13",
20
+ ]
21
+
22
+ dependencies = [
23
+ "data-designer-config",
24
+ "data-designer-engine",
25
+ "prompt-toolkit>=3.0.0,<4",
26
+ "typer>=0.12.0,<1",
27
+ ]
28
+
29
+ [project.scripts]
30
+ data-designer = "data_designer.cli:main"
31
+
32
+ [build-system]
33
+ requires = ["hatchling", "hatch-vcs"]
34
+ build-backend = "hatchling.build"
35
+
36
+ [tool.hatch.version]
37
+ source = "vcs"
38
+ fallback-version = "0.1.0.dev0"
39
+ raw-options = { root = "../.." }
40
+
41
+ [tool.hatch.build.hooks.vcs]
42
+ version-file = "src/data_designer/interface/_version.py"
43
+
44
+ [tool.hatch.metadata.hooks.custom]
45
+ path = "dev-tools/hatch_build.py"
46
+
47
+ [tool.hatch.build.targets.wheel]
48
+ packages = ["src/data_designer"]
49
+
50
+ [tool.ruff]
51
+ extend = "../../pyproject.toml"
52
+
53
+ [tool.uv]
54
+ package = true
55
+
56
+ [tool.uv.sources]
57
+ data-designer-config = { workspace = true }
58
+ data-designer-engine = { workspace = true }
@@ -0,0 +1,2 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,22 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from data_designer.config.default_model_settings import resolve_seed_default_model_settings
5
+ from data_designer.interface.data_designer import DataDesigner
6
+ from data_designer.interface.errors import (
7
+ DataDesignerGenerationError,
8
+ DataDesignerProfilingError,
9
+ )
10
+ from data_designer.interface.results import DatasetCreationResults
11
+ from data_designer.logging import configure_logging
12
+
13
+ configure_logging()
14
+ resolve_seed_default_model_settings()
15
+
16
+
17
+ __all__ = [
18
+ "DataDesigner",
19
+ "DataDesignerGenerationError",
20
+ "DataDesignerProfilingError",
21
+ "DatasetCreationResults",
22
+ ]
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.3.8rc1'
32
- __version_tuple__ = version_tuple = (0, 3, 8, 'rc1')
31
+ __version__ = version = '0.4.0'
32
+ __version_tuple__ = version_tuple = (0, 4, 0)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,9 +12,9 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
12
12
  from data_designer.config.data_designer_config import DataDesignerConfig
13
13
  from data_designer.config.default_model_settings import (
14
14
  get_default_model_configs,
15
- get_default_model_providers_missing_api_keys,
16
15
  get_default_provider_name,
17
16
  get_default_providers,
17
+ get_providers_with_missing_api_keys,
18
18
  )
19
19
  from data_designer.config.interface import DataDesignerInterface
20
20
  from data_designer.config.models import (
@@ -28,7 +28,6 @@ from data_designer.config.utils.constants import (
28
28
  MANAGED_ASSETS_PATH,
29
29
  MODEL_CONFIGS_FILE_PATH,
30
30
  MODEL_PROVIDERS_FILE_PATH,
31
- PREDEFINED_PROVIDERS,
32
31
  )
33
32
  from data_designer.config.utils.info import InfoType, InterfaceInfo
34
33
  from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig
@@ -317,13 +316,8 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
317
316
 
318
317
  Args:
319
318
  run_config: A RunConfig instance containing runtime settings such as
320
- early shutdown behavior and batch sizing via `buffer_size`. Import RunConfig from
321
- data_designer.essentials.
322
-
323
- Example:
324
- >>> from data_designer.essentials import DataDesigner, RunConfig
325
- >>> dd = DataDesigner()
326
- >>> dd.set_run_config(RunConfig(disable_early_shutdown=True))
319
+ early shutdown behavior, batch sizing via `buffer_size`, and non-inference worker
320
+ concurrency via `non_inference_max_parallel_workers`.
327
321
 
328
322
  Notes:
329
323
  When `disable_early_shutdown=True`, DataDesigner will never terminate generation early
@@ -334,8 +328,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
334
328
  def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
335
329
  if model_providers is None:
336
330
  model_providers = get_default_providers()
337
- missing_api_keys = get_default_model_providers_missing_api_keys()
338
- if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
331
+ # Check which providers have missing API keys (from YAML file or env vars)
332
+ providers_with_missing_keys = get_providers_with_missing_api_keys(model_providers)
333
+
334
+ if len(providers_with_missing_keys) == len(model_providers):
335
+ # All providers have missing API keys
339
336
  logger.warning(
340
337
  "🚨 You are trying to use a default model provider but your API keys are missing."
341
338
  "\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
@@ -85,16 +85,18 @@ def test_run_personas_with_all_flag(
85
85
  # Verify NGC check was called
86
86
  mock_check_ngc.assert_called_once()
87
87
 
88
- # Verify all 5 locales were downloaded
89
- assert mock_download.call_count == 5
88
+ # Verify all 7 locales were downloaded
89
+ assert mock_download.call_count == 7
90
90
 
91
91
  # Verify each locale was downloaded
92
92
  downloaded_locales = [call[0][0] for call in mock_download.call_args_list]
93
93
  assert "en_US" in downloaded_locales
94
94
  assert "en_IN" in downloaded_locales
95
+ assert "en_SG" in downloaded_locales
95
96
  assert "hi_Deva_IN" in downloaded_locales
96
97
  assert "hi_Latn_IN" in downloaded_locales
97
98
  assert "ja_JP" in downloaded_locales
99
+ assert "pt_BR" in downloaded_locales
98
100
 
99
101
 
100
102
  @patch.object(DownloadController, "_download_locale", return_value=True)
@@ -217,12 +219,14 @@ def test_determine_locales_with_all_flag(controller: DownloadController) -> None
217
219
  """Test _determine_locales returns all locales when all_locales=True."""
218
220
  result = controller._determine_locales(locales=None, all_locales=True)
219
221
 
220
- assert len(result) == 5
222
+ assert len(result) == 7
221
223
  assert "en_US" in result
222
224
  assert "en_IN" in result
225
+ assert "en_SG" in result
223
226
  assert "hi_Deva_IN" in result
224
227
  assert "hi_Latn_IN" in result
225
228
  assert "ja_JP" in result
229
+ assert "pt_BR" in result
226
230
 
227
231
 
228
232
  def test_determine_locales_with_valid_locale_flags(controller: DownloadController) -> None:
@@ -15,7 +15,7 @@ def repository() -> PersonaRepository:
15
15
  def test_init(repository: PersonaRepository) -> None:
16
16
  """Test repository initialization creates registry."""
17
17
  assert repository._registry is not None
18
- assert len(repository._registry.locales) == 5
18
+ assert len(repository._registry.locales) == 7
19
19
  assert repository._registry.dataset_prefix == "nemotron-personas-dataset-"
20
20
 
21
21
 
@@ -24,11 +24,11 @@ def test_list_all(repository: PersonaRepository) -> None:
24
24
  locales = repository.list_all()
25
25
 
26
26
  assert isinstance(locales, list)
27
- assert len(locales) == 5
27
+ assert len(locales) == 7
28
28
 
29
29
  # Verify all expected locales are present
30
30
  locale_codes = {locale.code for locale in locales}
31
- assert locale_codes == {"en_US", "en_IN", "hi_Deva_IN", "hi_Latn_IN", "ja_JP"}
31
+ assert locale_codes == {"en_US", "en_IN", "en_SG", "hi_Deva_IN", "hi_Latn_IN", "ja_JP", "pt_BR"}
32
32
 
33
33
  # Verify each locale has required fields
34
34
  for locale in locales:
@@ -51,12 +51,14 @@ def test_get_available_locales(service: DownloadService) -> None:
51
51
  locales = service.get_available_locales()
52
52
 
53
53
  assert isinstance(locales, dict)
54
- assert len(locales) == 5
54
+ assert len(locales) == 7
55
55
  assert "en_US" in locales
56
56
  assert "en_IN" in locales
57
+ assert "en_SG" in locales
57
58
  assert "hi_Deva_IN" in locales
58
59
  assert "hi_Latn_IN" in locales
59
60
  assert "ja_JP" in locales
61
+ assert "pt_BR" in locales
60
62
 
61
63
  # Verify values are locale codes (not descriptions)
62
64
  assert locales["en_US"] == "en_US"
@@ -0,0 +1,4 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ pytest_plugins = ["data_designer.config.testing.fixtures"]
@@ -20,11 +20,9 @@ from data_designer.config.run_config import RunConfig
20
20
  from data_designer.config.sampler_params import CategorySamplerParams, SamplerType
21
21
  from data_designer.config.seed_source import HuggingFaceSeedSource
22
22
  from data_designer.engine.secret_resolver import CompositeResolver, EnvironmentResolver, PlaintextResolver
23
+ from data_designer.engine.testing.stubs import StubHuggingFaceSeedReader
23
24
  from data_designer.interface.data_designer import DataDesigner
24
- from data_designer.interface.errors import (
25
- DataDesignerGenerationError,
26
- DataDesignerProfilingError,
27
- )
25
+ from data_designer.interface.errors import DataDesignerGenerationError, DataDesignerProfilingError
28
26
  from data_designer.lazy_heavy_imports import pd
29
27
 
30
28
  if TYPE_CHECKING:
@@ -56,6 +54,11 @@ def stub_model_providers():
56
54
  ]
57
55
 
58
56
 
57
+ @pytest.fixture
58
+ def stub_seed_reader():
59
+ return StubHuggingFaceSeedReader()
60
+
61
+
59
62
  def test_init_with_custom_secret_resolver(stub_artifact_path, stub_model_providers):
60
63
  """Test DataDesigner initialization with custom secret resolver."""
61
64
  designer = DataDesigner(
@@ -16,7 +16,8 @@ PERF_TEST_TIMEOUT_SECONDS = 30.0
16
16
  def test_import_performance():
17
17
  """Test that average import time never exceeds 6 seconds (1 cold start + 4 warm cache runs)."""
18
18
  # Get the project root (where Makefile is located)
19
- project_root = Path(__file__).parent.parent
19
+ # For workspace packages, need to go up to the workspace root
20
+ project_root = Path(__file__).parent.parent.parent.parent
20
21
 
21
22
  num_runs = 5
22
23
  import_times = []
@@ -1,51 +0,0 @@
1
- name: 🐛 Bug Report
2
- description: Report a bug
3
- labels: ["bug"]
4
- body:
5
- - type: dropdown
6
- id: priority
7
- attributes:
8
- label: Priority Level
9
- description: How urgent is this issue?
10
- default: 2
11
- options:
12
- - Critical (Total blocker)
13
- - High (Major functionality broken)
14
- - Medium (Annoying but has workaround)
15
- - Low (Cosmetic / Minor)
16
- validations:
17
- required: true
18
- - type: textarea
19
- id: description
20
- attributes:
21
- label: Describe the bug
22
- placeholder: A clear and concise description of what the bug is.
23
- validations:
24
- required: true
25
- - type: textarea
26
- id: reproduction
27
- attributes:
28
- label: Steps/Code to reproduce bug
29
- description: |
30
- Please list *minimal* steps or code snippet for us to be able to reproduce the bug.
31
- A helpful guide on how to craft a minimal bug report: http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports
32
- placeholder: |
33
- Please include:
34
- - Minimal code example that reproduces the issue
35
- - Configuration used (if applicable)
36
- - Commands run (if applicable)
37
- - Error traceback
38
- validations:
39
- required: true
40
- - type: textarea
41
- id: expected
42
- attributes:
43
- label: Expected behavior
44
- placeholder: A clear and concise description of what you expected to happen.
45
- validations:
46
- required: true
47
- - type: textarea
48
- id: context
49
- attributes:
50
- label: Additional context
51
- placeholder: Add any other context about the problem here (e.g., screenshots, logs, browser version).
@@ -1,5 +0,0 @@
1
- blank_issues_enabled: false
2
- contact_links:
3
- - name: 💬 Ask a Question
4
- url: https://github.com/NVIDIA-NeMo/DataDesigner/discussions
5
- about: Please use GitHub Discussions for general questions.
@@ -1,32 +0,0 @@
1
- name: 🛠️ Development Task
2
- description: Track internal development work, refactoring, or infrastructure
3
- labels: ["task"]
4
- body:
5
- - type: dropdown
6
- id: priority
7
- attributes:
8
- label: Priority Level
9
- default: 1
10
- options:
11
- - High
12
- - Medium
13
- - Low
14
- validations:
15
- required: true
16
- - type: textarea
17
- id: summary
18
- attributes:
19
- label: Task Summary
20
- placeholder: What is the core objective of this task?
21
- validations:
22
- required: true
23
- - type: textarea
24
- id: technical-details
25
- attributes:
26
- label: Technical Details & Implementation Plan
27
- placeholder: Describe the technical approach, files affected, or logic changes.
28
- - type: input
29
- id: dependencies
30
- attributes:
31
- label: Dependencies
32
- placeholder: "e.g., Blocked by issue #123"
@@ -1,45 +0,0 @@
1
- name: ✨ Feature Request
2
- description: Put in a request for a new feature
3
- labels: ["enhancement"]
4
- body:
5
- - type: markdown
6
- attributes:
7
- value: |
8
- Please use this form to suggest new features or improvements.
9
- - type: dropdown
10
- id: priority
11
- attributes:
12
- label: Priority Level
13
- description: How important is this feature to your workflow?
14
- default: 2
15
- options:
16
- - Critical (Essential for use)
17
- - High (Major improvement)
18
- - Medium (Nice to have)
19
- - Low (Minor tweak)
20
- validations:
21
- required: true
22
- - type: textarea
23
- id: problem
24
- attributes:
25
- label: Is your feature request related to a problem? Please describe.
26
- placeholder: A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
27
- validations:
28
- required: true
29
- - type: textarea
30
- id: solution
31
- attributes:
32
- label: Describe the solution you'd like
33
- placeholder: A clear and concise description of what you want to happen.
34
- validations:
35
- required: true
36
- - type: textarea
37
- id: alternatives
38
- attributes:
39
- label: Describe alternatives you've considered
40
- placeholder: A clear and concise description of any alternative solutions or features you've considered.
41
- - type: textarea
42
- id: context
43
- attributes:
44
- label: Additional context
45
- placeholder: Add any other context or screenshots about the feature request here.
@@ -1,68 +0,0 @@
1
- name: Build docs
2
- on:
3
- workflow_dispatch:
4
- release:
5
- types:
6
- - published
7
-
8
- jobs:
9
- build-notebooks:
10
- uses: ./.github/workflows/build-notebooks.yml
11
- secrets: inherit
12
- deploy:
13
- needs: build-notebooks
14
- runs-on: ubuntu-latest
15
- permissions:
16
- contents: write
17
- steps:
18
- - name: Checkout repository
19
- uses: actions/checkout@v2
20
- - name: Install uv
21
- uses: astral-sh/setup-uv@v6
22
- with:
23
- version: "0.9.5"
24
- - name: Set up Python
25
- run: uv python install 3.11
26
- - name: Install dependencies for docs
27
- run: uv sync --group docs
28
- - name: Download artifact from previous step
29
- uses: actions/download-artifact@v5
30
- with:
31
- name: notebooks
32
- path: docs/notebooks
33
- - name: Find the latest existing release tag
34
- id: get_release
35
- run: |
36
- if [ "${{ github.event_name }}" == "release" ]; then
37
- LATEST_TAG="${{ github.event.release.tag_name }}"
38
- else
39
- echo "::notice::Running manually via workflow_dispatch. Fetching latest release tag..."
40
-
41
- gh auth status || echo "GitHub CLI is not authenticated, relying on GITHUB_TOKEN."
42
-
43
- # We use tr -d '\n' to remove the trailing newline for a clean tag string
44
- LATEST_TAG=$(gh release view --json tagName -q .tagName 2>/dev/null)
45
-
46
- if [ -z "$LATEST_TAG" ]; then
47
- echo "::error::Could not find the latest published release tag. Ensure a release exists."
48
- exit 1
49
- fi
50
- fi
51
-
52
- echo "Latest release tag found: $LATEST_TAG"
53
- echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_ENV
54
- env:
55
- GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56
- - name: Extract version from release tag
57
- run: |
58
- # Remove the 'v' prefix and any suffix after a space
59
- VERSION=$(echo ${{ env.LATEST_TAG }} | sed 's/^v//' | sed 's/ .*$//')
60
- echo "::notice::Extracted version: $VERSION"
61
- echo "VERSION=$VERSION" >> $GITHUB_ENV
62
- - name: Setup doc deploy
63
- run: |
64
- git fetch origin gh-pages --depth=1
65
- git config --global user.name "github-actions[bot]"
66
- git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
67
- - name: Build and deploy docs
68
- run: uv run mike deploy --push --update-aliases ${{ env.VERSION }} latest