data-designer 0.1.4__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (410) hide show
  1. data_designer-0.2.0/.github/workflows/build-docs.yml +68 -0
  2. data_designer-0.2.0/.github/workflows/check-colab-notebooks.yml +55 -0
  3. {data_designer-0.1.4 → data_designer-0.2.0}/.gitignore +1 -1
  4. {data_designer-0.1.4 → data_designer-0.2.0}/.pre-commit-config.yaml +1 -0
  5. {data_designer-0.1.4 → data_designer-0.2.0}/Makefile +7 -1
  6. {data_designer-0.1.4 → data_designer-0.2.0}/PKG-INFO +9 -9
  7. {data_designer-0.1.4 → data_designer-0.2.0}/README.md +8 -8
  8. data_designer-0.2.0/docs/assets/recipes/code_generation/text_to_python.py +318 -0
  9. data_designer-0.2.0/docs/assets/recipes/code_generation/text_to_sql.py +323 -0
  10. data_designer-0.2.0/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +204 -0
  11. data_designer-0.2.0/docs/assets/recipes/qa_and_chat/product_info_qa.py +224 -0
  12. data_designer-0.2.0/docs/code_reference/analysis.md +31 -0
  13. data_designer-0.2.0/docs/code_reference/processors.md +6 -0
  14. data_designer-0.2.0/docs/colab_notebooks/1-the-basics.ipynb +540 -0
  15. data_designer-0.2.0/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +570 -0
  16. data_designer-0.2.0/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +468 -0
  17. data_designer-0.2.0/docs/colab_notebooks/4-providing-images-as-context.ipynb +531 -0
  18. {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/columns.md +16 -0
  19. {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/configure-model-settings-with-the-cli.md +4 -3
  20. data_designer-0.2.0/docs/concepts/models/custom-model-settings.md +229 -0
  21. {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/default-model-settings.md +10 -4
  22. data_designer-0.2.0/docs/concepts/models/inference-parameters.md +148 -0
  23. data_designer-0.2.0/docs/concepts/models/model-configs.md +123 -0
  24. {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/model-providers.md +5 -1
  25. data_designer-0.2.0/docs/concepts/person_sampling.md +185 -0
  26. data_designer-0.2.0/docs/concepts/processors.md +153 -0
  27. {data_designer-0.1.4 → data_designer-0.2.0}/docs/css/style.css +0 -5
  28. {data_designer-0.1.4 → data_designer-0.2.0}/docs/js/toc-toggle.js +7 -4
  29. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/1-the-basics.py +9 -7
  30. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +9 -7
  31. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/3-seeding-with-a-dataset.py +9 -7
  32. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/4-providing-images-as-context.py +5 -11
  33. data_designer-0.2.0/docs/overrides/main.html +31 -0
  34. data_designer-0.2.0/docs/plugins/available.md +3 -0
  35. data_designer-0.2.0/docs/plugins/example.md +306 -0
  36. data_designer-0.2.0/docs/plugins/overview.md +45 -0
  37. data_designer-0.2.0/docs/recipes/cards.md +81 -0
  38. data_designer-0.2.0/docs/recipes/code_generation/text_to_python.md +5 -0
  39. data_designer-0.2.0/docs/recipes/code_generation/text_to_sql.md +7 -0
  40. data_designer-0.2.0/docs/recipes/qa_and_chat/multi_turn_chat.md +5 -0
  41. data_designer-0.2.0/docs/recipes/qa_and_chat/product_info_qa.md +5 -0
  42. data_designer-0.2.0/docs/scripts/generate_colab_notebooks.py +186 -0
  43. {data_designer-0.1.4 → data_designer-0.2.0}/mkdocs.yml +32 -5
  44. {data_designer-0.1.4 → data_designer-0.2.0}/pyproject.toml +11 -16
  45. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/_version.py +2 -2
  46. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/README.md +15 -1
  47. data_designer-0.2.0/src/data_designer/cli/commands/download.py +56 -0
  48. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/list.py +4 -18
  49. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/__init__.py +2 -1
  50. data_designer-0.2.0/src/data_designer/cli/controllers/download_controller.py +217 -0
  51. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/model_controller.py +4 -3
  52. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/field.py +65 -19
  53. data_designer-0.2.0/src/data_designer/cli/forms/model_builder.py +332 -0
  54. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/main.py +11 -1
  55. data_designer-0.2.0/src/data_designer/cli/repositories/persona_repository.py +88 -0
  56. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/__init__.py +2 -1
  57. data_designer-0.2.0/src/data_designer/cli/services/download_service.py +97 -0
  58. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/ui.py +131 -0
  59. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/utils.py +34 -0
  60. data_designer-0.2.0/src/data_designer/config/analysis/column_profilers.py +157 -0
  61. data_designer-0.2.0/src/data_designer/config/analysis/column_statistics.py +418 -0
  62. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/dataset_profiler.py +23 -5
  63. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/utils/reporting.py +3 -3
  64. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/base.py +3 -3
  65. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/column_configs.py +27 -6
  66. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/column_types.py +24 -17
  67. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/config_builder.py +34 -26
  68. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/data_designer_config.py +7 -7
  69. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/datastore.py +6 -6
  70. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/default_model_settings.py +27 -34
  71. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/exports.py +14 -1
  72. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/models.py +155 -29
  73. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/preview_results.py +5 -4
  74. data_designer-0.2.0/src/data_designer/config/processors.py +146 -0
  75. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/sampler_constraints.py +1 -2
  76. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/sampler_params.py +31 -31
  77. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/seed.py +1 -2
  78. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/code_lang.py +4 -5
  79. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/constants.py +31 -8
  80. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/io_helpers.py +5 -5
  81. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/misc.py +1 -4
  82. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/numerical_helpers.py +2 -2
  83. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/type_helpers.py +3 -3
  84. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/validation.py +39 -9
  85. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/visualization.py +62 -15
  86. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/validator_params.py +4 -8
  87. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/base.py +0 -7
  88. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +2 -3
  89. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_statistics.py +16 -16
  90. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/dataset_profiler.py +25 -4
  91. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +71 -49
  92. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/utils/judge_score_processing.py +5 -5
  93. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/base.py +34 -0
  94. data_designer-0.2.0/src/data_designer/engine/column_generators/generators/embedding.py +45 -0
  95. data_designer-0.1.4/src/data_designer/engine/column_generators/generators/llm_generators.py → data_designer-0.2.0/src/data_designer/engine/column_generators/generators/llm_completion.py +17 -49
  96. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/registry.py +4 -2
  97. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +5 -6
  98. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/configurable_task.py +2 -2
  99. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/artifact_storage.py +14 -5
  100. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/column_wise_builder.py +12 -8
  101. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/concurrency.py +6 -6
  102. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/facade.py +66 -9
  103. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/litellm_overrides.py +5 -6
  104. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/errors.py +2 -4
  105. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/parser.py +2 -3
  106. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/postprocessors.py +3 -4
  107. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/types.py +4 -4
  108. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/registry.py +20 -11
  109. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/usage.py +7 -9
  110. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/ast.py +1 -2
  111. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/drop_columns.py +1 -1
  112. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/registry.py +3 -0
  113. data_designer-0.2.0/src/data_designer/engine/processing/processors/schema_transform.py +53 -0
  114. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/utils.py +40 -2
  115. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/base.py +12 -12
  116. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/constraints.py +1 -2
  117. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/base.py +14 -14
  118. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/phone_number.py +1 -2
  119. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/people_gen.py +3 -7
  120. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/base.py +2 -2
  121. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/data_designer.py +12 -0
  122. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/results.py +36 -0
  123. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/logging.py +2 -2
  124. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugin_manager.py +3 -3
  125. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/plugin.py +3 -3
  126. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/registry.py +2 -2
  127. data_designer-0.2.0/tests/cli/commands/test_download_command.py +71 -0
  128. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/conftest.py +5 -5
  129. data_designer-0.2.0/tests/cli/controllers/test_download_controller.py +398 -0
  130. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/controllers/test_model_controller.py +2 -2
  131. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_field.py +142 -12
  132. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_model_builder.py +211 -45
  133. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/repositories/test_model_repository.py +3 -1
  134. data_designer-0.2.0/tests/cli/repositories/test_persona_repository.py +171 -0
  135. data_designer-0.2.0/tests/cli/services/test_download_service.py +270 -0
  136. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/services/test_model_service.py +7 -3
  137. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/test_cli_utils.py +73 -0
  138. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/conftest.py +6 -6
  139. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/test_column_statistics.py +12 -12
  140. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_columns.py +37 -9
  141. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_config_builder.py +7 -7
  142. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_default_model_settings.py +36 -11
  143. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_models.py +165 -31
  144. data_designer-0.2.0/tests/config/test_processors.py +139 -0
  145. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_type_helpers.py +8 -8
  146. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_validation.py +37 -3
  147. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_visualization.py +16 -1
  148. {data_designer-0.1.4 → data_designer-0.2.0}/tests/conftest.py +2 -2
  149. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/column_profilers/test_base.py +0 -11
  150. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +19 -19
  151. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/conftest.py +5 -5
  152. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_column_statistics_calculator.py +4 -4
  153. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/utils/test_column_statistics_calculations.py +92 -48
  154. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/utils/test_judge_score_processing.py +35 -35
  155. data_designer-0.2.0/tests/engine/column_generators/generators/test_embedding.py +49 -0
  156. data_designer-0.1.4/tests/engine/column_generators/generators/test_llm_generators.py → data_designer-0.2.0/tests/engine/column_generators/generators/test_llm_completion_generators.py +2 -19
  157. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/test_registry.py +1 -1
  158. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_judge_score_factory.py +22 -0
  159. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_column_wise_builder.py +5 -1
  160. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/conftest.py +15 -3
  161. data_designer-0.2.0/tests/engine/models/test_facade.py +224 -0
  162. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_registry.py +44 -34
  163. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_usage.py +12 -12
  164. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/processors/test_drop_columns.py +4 -2
  165. data_designer-0.2.0/tests/engine/processing/processors/test_schema_transform.py +137 -0
  166. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/test_utils.py +17 -0
  167. data_designer-0.2.0/tests/engine/resources/__init__.py +2 -0
  168. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_configurable_task.py +6 -4
  169. {data_designer-0.1.4 → data_designer-0.2.0}/tests/essentials/test_init.py +12 -0
  170. {data_designer-0.1.4 → data_designer-0.2.0}/tests/interface/test_data_designer.py +3 -1
  171. {data_designer-0.1.4 → data_designer-0.2.0}/tests/plugins/test_plugin_registry.py +1 -2
  172. {data_designer-0.1.4 → data_designer-0.2.0}/uv.lock +100 -0
  173. data_designer-0.1.4/.github/workflows/build-docs.yml +0 -39
  174. data_designer-0.1.4/docs/concepts/models/model-configs.md +0 -244
  175. data_designer-0.1.4/docs/concepts/person_sampling.md +0 -36
  176. data_designer-0.1.4/docs/concepts/plugins.md +0 -0
  177. data_designer-0.1.4/docs/overrides/main.html +0 -18
  178. data_designer-0.1.4/src/data_designer/cli/forms/model_builder.py +0 -125
  179. data_designer-0.1.4/src/data_designer/config/analysis/column_profilers.py +0 -89
  180. data_designer-0.1.4/src/data_designer/config/analysis/column_statistics.py +0 -274
  181. data_designer-0.1.4/src/data_designer/config/processors.py +0 -41
  182. data_designer-0.1.4/tests/config/test_processors.py +0 -66
  183. data_designer-0.1.4/tests/engine/models/test_facade.py +0 -174
  184. {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/build-notebooks.yml +0 -0
  185. {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/ci.yml +0 -0
  186. {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/dco-assistant.yml +0 -0
  187. {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/pack-tutorials.yml +0 -0
  188. {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/semantic-pull-requests.yml +0 -0
  189. {data_designer-0.1.4 → data_designer-0.2.0}/AGENTS.md +0 -0
  190. {data_designer-0.1.4 → data_designer-0.2.0}/CLAUDE.md +0 -0
  191. {data_designer-0.1.4 → data_designer-0.2.0}/CODE_OF_CONDUCT.md +0 -0
  192. {data_designer-0.1.4 → data_designer-0.2.0}/CONTRIBUTING.md +0 -0
  193. {data_designer-0.1.4 → data_designer-0.2.0}/DCO +0 -0
  194. {data_designer-0.1.4 → data_designer-0.2.0}/LICENSE +0 -0
  195. {data_designer-0.1.4 → data_designer-0.2.0}/VERSIONING.md +0 -0
  196. {data_designer-0.1.4 → data_designer-0.2.0}/docs/CONTRIBUTING.md +0 -0
  197. {data_designer-0.1.4 → data_designer-0.2.0}/docs/assets/palette-favicon.png +0 -0
  198. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/column_configs.md +0 -0
  199. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/config_builder.md +0 -0
  200. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/data_designer_config.md +0 -0
  201. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/models.md +0 -0
  202. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/sampler_params.md +0 -0
  203. {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/validator_params.md +0 -0
  204. {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/validators.md +0 -0
  205. {data_designer-0.1.4 → data_designer-0.2.0}/docs/css/mkdocstrings.css +0 -0
  206. {data_designer-0.1.4 → data_designer-0.2.0}/docs/index.md +0 -0
  207. {data_designer-0.1.4 → data_designer-0.2.0}/docs/installation.md +0 -0
  208. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/README.md +0 -0
  209. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/_README.md +0 -0
  210. {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/_pyproject.toml +0 -0
  211. {data_designer-0.1.4 → data_designer-0.2.0}/docs/quick-start.md +0 -0
  212. {data_designer-0.1.4 → data_designer-0.2.0}/scripts/update_license_headers.py +0 -0
  213. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/__init__.py +0 -0
  214. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/__init__.py +0 -0
  215. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/__init__.py +0 -0
  216. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/models.py +0 -0
  217. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/providers.py +0 -0
  218. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/reset.py +0 -0
  219. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/provider_controller.py +0 -0
  220. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/__init__.py +0 -0
  221. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/builder.py +0 -0
  222. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/form.py +0 -0
  223. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/provider_builder.py +0 -0
  224. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/__init__.py +0 -0
  225. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/base.py +0 -0
  226. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/model_repository.py +0 -0
  227. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/provider_repository.py +0 -0
  228. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/model_service.py +0 -0
  229. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/provider_service.py +0 -0
  230. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/__init__.py +0 -0
  231. {data_designer-0.1.4/src/data_designer/engine → data_designer-0.2.0/src/data_designer/config/analysis}/__init__.py +0 -0
  232. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/utils/errors.py +0 -0
  233. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/dataset_builders.py +0 -0
  234. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/errors.py +0 -0
  235. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/interface.py +0 -0
  236. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/errors.py +0 -0
  237. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/info.py +0 -0
  238. {data_designer-0.1.4/src/data_designer/engine/column_generators → data_designer-0.2.0/src/data_designer/engine}/__init__.py +0 -0
  239. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  240. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/errors.py +0 -0
  241. {data_designer-0.1.4/src/data_designer/engine/column_generators/generators → data_designer-0.2.0/src/data_designer/engine/column_generators}/__init__.py +0 -0
  242. {data_designer-0.1.4/src/data_designer/engine/dataset_builders/utils → data_designer-0.2.0/src/data_designer/engine/column_generators/generators}/__init__.py +0 -0
  243. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  244. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  245. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  246. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  247. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  248. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  249. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  250. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  251. {data_designer-0.1.4/src/data_designer/engine/models → data_designer-0.2.0/src/data_designer/engine/dataset_builders/utils}/__init__.py +0 -0
  252. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  253. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  254. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  255. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  256. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/errors.py +0 -0
  257. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/model_provider.py +0 -0
  258. {data_designer-0.1.4/src/data_designer/engine/models/parsers → data_designer-0.2.0/src/data_designer/engine/models}/__init__.py +0 -0
  259. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/errors.py +0 -0
  260. {data_designer-0.1.4/src/data_designer/engine/processing/ginja → data_designer-0.2.0/src/data_designer/engine/models/parsers}/__init__.py +0 -0
  261. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  262. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/recipes/base.py +0 -0
  263. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  264. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/utils.py +0 -0
  265. {data_designer-0.1.4/src/data_designer/engine/processing/gsonschema → data_designer-0.2.0/src/data_designer/engine/processing/ginja}/__init__.py +0 -0
  266. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  267. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  268. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/record.py +0 -0
  269. {data_designer-0.1.4/src/data_designer/engine/sampling_gen/entities → data_designer-0.2.0/src/data_designer/engine/processing/gsonschema}/__init__.py +0 -0
  270. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  271. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  272. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  273. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  274. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/base.py +0 -0
  275. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  276. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/errors.py +0 -0
  277. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  278. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  279. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_storage.py +0 -0
  280. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/resource_provider.py +0 -0
  281. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/seed_dataset_data_store.py +0 -0
  282. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/column.py +0 -0
  283. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  284. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  285. {data_designer-0.1.4/src/data_designer/interface → data_designer-0.2.0/src/data_designer/engine/sampling_gen/entities}/__init__.py +0 -0
  286. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  287. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
  288. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  289. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  290. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  291. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  292. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  293. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  294. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  295. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  296. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  297. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  298. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  299. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/secret_resolver.py +0 -0
  300. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/__init__.py +0 -0
  301. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/local_callable.py +0 -0
  302. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/python.py +0 -0
  303. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/remote.py +0 -0
  304. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/sql.py +0 -0
  305. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/errors.py +0 -0
  306. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/essentials/__init__.py +0 -0
  307. {data_designer-0.1.4/tests/config/utils → data_designer-0.2.0/src/data_designer/interface}/__init__.py +0 -0
  308. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/errors.py +0 -0
  309. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/__init__.py +0 -0
  310. {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/errors.py +0 -0
  311. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_list_command.py +0 -0
  312. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_models_command.py +0 -0
  313. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_providers_command.py +0 -0
  314. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_reset_command.py +0 -0
  315. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/controllers/test_provider_controller.py +0 -0
  316. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_form.py +0 -0
  317. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_provider_builder.py +0 -0
  318. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/repositories/test_provider_repository.py +0 -0
  319. {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/services/test_provider_service.py +0 -0
  320. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
  321. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/utils/test_reporting.py +0 -0
  322. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_data_designer_config.py +0 -0
  323. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_datastore.py +0 -0
  324. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_sampler_constraints.py +0 -0
  325. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_sampler_params.py +0 -0
  326. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_seed.py +0 -0
  327. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_validator_params.py +0 -0
  328. {data_designer-0.1.4/tests/engine/column_generators/generators → data_designer-0.2.0/tests/config/utils}/__init__.py +0 -0
  329. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_code_lang.py +0 -0
  330. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_info.py +0 -0
  331. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_io_helpers.py +0 -0
  332. {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_misc.py +0 -0
  333. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  334. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  335. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  336. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  337. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_errors.py +0 -0
  338. {data_designer-0.1.4/tests/engine/processing → data_designer-0.2.0/tests/engine/column_generators/generators}/__init__.py +0 -0
  339. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  340. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_expression.py +0 -0
  341. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  342. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  343. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_validation.py +0 -0
  344. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  345. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  346. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/conftest.py +0 -0
  347. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  348. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  349. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  350. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  351. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  352. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  353. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_parser.py +0 -0
  354. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  355. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  356. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  357. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  358. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  359. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/stub_secrets.json +0 -0
  360. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_litellm_overrides.py +0 -0
  361. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_errors.py +0 -0
  362. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_utils.py +0 -0
  363. {data_designer-0.1.4/tests/engine/processing/ginja → data_designer-0.2.0/tests/engine/processing}/__init__.py +0 -0
  364. {data_designer-0.1.4/tests/engine/processing/gsonschema → data_designer-0.2.0/tests/engine/processing/ginja}/__init__.py +0 -0
  365. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_ast.py +0 -0
  366. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_environment.py +0 -0
  367. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  368. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_record.py +0 -0
  369. {data_designer-0.1.4/tests/engine/processing/processors → data_designer-0.2.0/tests/engine/processing/gsonschema}/__init__.py +0 -0
  370. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  371. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  372. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_types.py +0 -0
  373. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  374. {data_designer-0.1.4/tests/engine/registry → data_designer-0.2.0/tests/engine/processing/processors}/__init__.py +0 -0
  375. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/processors/test_registry.py +0 -0
  376. {data_designer-0.1.4/tests/engine/resources → data_designer-0.2.0/tests/engine/registry}/__init__.py +0 -0
  377. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/conftest.py +0 -0
  378. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_base.py +0 -0
  379. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_data_designer_registry.py +0 -0
  380. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_errors.py +0 -0
  381. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/conftest.py +0 -0
  382. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  383. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  384. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_storage.py +0 -0
  385. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_resource_provider.py +0 -0
  386. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/conftest.py +0 -0
  387. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  388. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  389. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  390. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  391. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  392. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  393. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_column.py +0 -0
  394. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_constraints.py +0 -0
  395. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_generator.py +0 -0
  396. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  397. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  398. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_schema.py +0 -0
  399. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_utils.py +0 -0
  400. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_engine_errors.py +0 -0
  401. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_model_provider.py +0 -0
  402. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_secret_resolver.py +0 -0
  403. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_local_callable.py +0 -0
  404. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_python.py +0 -0
  405. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_remote.py +0 -0
  406. {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_sql.py +0 -0
  407. {data_designer-0.1.4 → data_designer-0.2.0}/tests/interface/test_results.py +0 -0
  408. {data_designer-0.1.4 → data_designer-0.2.0}/tests/plugins/test_plugin.py +0 -0
  409. {data_designer-0.1.4 → data_designer-0.2.0}/tests/test_logging.py +0 -0
  410. {data_designer-0.1.4 → data_designer-0.2.0}/tests/test_plugin_manager.py +0 -0
@@ -0,0 +1,68 @@
1
+ name: Build docs
2
+ on:
3
+ workflow_dispatch:
4
+ release:
5
+ types:
6
+ - published
7
+
8
+ jobs:
9
+ build-notebooks:
10
+ uses: ./.github/workflows/build-notebooks.yml
11
+ secrets: inherit
12
+ deploy:
13
+ needs: build-notebooks
14
+ runs-on: ubuntu-latest
15
+ permissions:
16
+ contents: write
17
+ steps:
18
+ - name: Checkout repository
19
+ uses: actions/checkout@v2
20
+ - name: Install uv
21
+ uses: astral-sh/setup-uv@v6
22
+ with:
23
+ version: "0.9.5"
24
+ - name: Set up Python
25
+ run: uv python install
26
+ - name: Install dependencies for docs
27
+ run: uv sync --group docs
28
+ - name: Download artifact from previous step
29
+ uses: actions/download-artifact@v5
30
+ with:
31
+ name: notebooks
32
+ path: docs/notebooks
33
+ - name: Find the latest existing release tag
34
+ id: get_release
35
+ run: |
36
+ if [ "${{ github.event_name }}" == "release" ]; then
37
+ LATEST_TAG="${{ github.event.release.tag_name }}"
38
+ else
39
+ echo "::notice::Running manually via workflow_dispatch. Fetching latest release tag..."
40
+
41
+ gh auth status || echo "GitHub CLI is not authenticated, relying on GITHUB_TOKEN."
42
+
43
+ # We use tr -d '\n' to remove the trailing newline for a clean tag string
44
+ LATEST_TAG=$(gh release view --json tagName -q .tagName 2>/dev/null)
45
+
46
+ if [ -z "$LATEST_TAG" ]; then
47
+ echo "::error::Could not find the latest published release tag. Ensure a release exists."
48
+ exit 1
49
+ fi
50
+ fi
51
+
52
+ echo "Latest release tag found: $LATEST_TAG"
53
+ echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_ENV
54
+ env:
55
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
56
+ - name: Extract version from release tag
57
+ run: |
58
+ # Remove the 'v' prefix and any suffix after a space
59
+ VERSION=$(echo ${{ env.LATEST_TAG }} | sed 's/^v//' | sed 's/ .*$//')
60
+ echo "::notice::Extracted version: $VERSION"
61
+ echo "VERSION=$VERSION" >> $GITHUB_ENV
62
+ - name: Setup doc deploy
63
+ run: |
64
+ git fetch origin gh-pages --depth=1
65
+ git config --global user.name "github-actions[bot]"
66
+ git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
67
+ - name: Build and deploy docs
68
+ run: uv run mike deploy --push --update-aliases ${{ env.VERSION }} latest
@@ -0,0 +1,55 @@
1
+ name: Check Colab notebooks
2
+
3
+ on:
4
+ push:
5
+ branches: [ main ]
6
+ paths:
7
+ - 'docs/notebook_source/*.py'
8
+ pull_request:
9
+ branches: [ main ]
10
+ paths:
11
+ - 'docs/notebook_source/*.py'
12
+ workflow_dispatch:
13
+
14
+ jobs:
15
+ check-colab-notebooks:
16
+ name: Check Colab Notebooks
17
+ runs-on: ubuntu-latest
18
+
19
+ steps:
20
+ - name: Checkout code
21
+ uses: actions/checkout@v4
22
+
23
+ - name: Install uv
24
+ uses: astral-sh/setup-uv@v5
25
+ with:
26
+ version: "latest"
27
+ python-version: "3.11"
28
+ enable-cache: true
29
+
30
+ - name: Install dependencies
31
+ run: |
32
+ uv sync --group notebooks --group docs
33
+
34
+ - name: Generate Colab notebooks
35
+ run: |
36
+ make generate-colab-notebooks
37
+
38
+ - name: Check for differences
39
+ run: |
40
+ # Get the diff, filtering out cell ID changes (which are randomly generated)
41
+ # Filter out: file markers (--- and +++), and "id" lines
42
+ MEANINGFUL_DIFF=$(git diff docs/colab_notebooks/ | grep -E '^[+-]' | grep -v '^---' | grep -v '^+++' | grep -vE '^[+-]\s*"id": "[0-9a-fA-F]+",?$' || true)
43
+
44
+ if [ -z "$MEANINGFUL_DIFF" ]; then
45
+ echo "✅ Colab notebooks are up-to-date (ignoring cell ID changes)"
46
+ else
47
+ echo "❌ Colab notebooks are out of sync with source files"
48
+ echo ""
49
+ echo "The generated notebooks differ from the committed ones."
50
+ echo "Please run 'make generate-colab-notebooks' locally and commit the changes."
51
+ echo ""
52
+ echo "Differences found:"
53
+ echo "$MEANINGFUL_DIFF"
54
+ exit 1
55
+ fi
@@ -90,4 +90,4 @@ src/data_designer/_version.py
90
90
  docs/notebooks/
91
91
  docs/notebook_source/*.ipynb
92
92
  docs/notebook_source/*.csv
93
- docs/notebook_source/artifacts/
93
+ docs/**/artifacts/
@@ -6,6 +6,7 @@ repos:
6
6
  - id: trailing-whitespace
7
7
  - id: end-of-file-fixer
8
8
  - id: check-yaml
9
+ exclude: ^mkdocs\.yml$
9
10
  - id: check-added-large-files
10
11
  - id: check-json
11
12
  - id: check-toml
@@ -36,6 +36,7 @@ help:
36
36
  @echo "🛠️ Utilities:"
37
37
  @echo " clean - Remove coverage reports and cache files"
38
38
  @echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext"
39
+ @echo " generate-colab-notebooks - Generate Colab-compatible notebooks"
39
40
  @echo " serve-docs-locally - Serve documentation locally"
40
41
  @echo " check-license-headers - Check if all files have license headers"
41
42
  @echo " update-license-headers - Add license headers to all files"
@@ -95,6 +96,11 @@ convert-execute-notebooks:
95
96
  rm docs/notebook_source/*.csv
96
97
  @echo "✅ Notebooks created in docs/notebooks/"
97
98
 
99
+ generate-colab-notebooks:
100
+ @echo "📓 Generating Colab-compatible notebooks..."
101
+ uv run --group notebooks python docs/scripts/generate_colab_notebooks.py
102
+ @echo "✅ Colab notebooks created in docs/colab_notebooks/"
103
+
98
104
  serve-docs-locally:
99
105
  @echo "📝 Building and serving docs..."
100
106
  uv sync --group docs
@@ -125,4 +131,4 @@ install-dev-notebooks:
125
131
  $(call install-pre-commit-hooks)
126
132
  @echo "✅ Dev + notebooks installation complete!"
127
133
 
128
- .PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks
134
+ .PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.1.4
3
+ Version: 0.2.0
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -144,12 +144,12 @@ preview.display_sample_record()
144
144
 
145
145
  ### 📚 Learn more
146
146
 
147
- - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
148
- - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
149
- - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
150
- - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
151
- - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
152
- - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
147
+ - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
148
+ - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
149
+ - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
150
+ - **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
151
+ - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
152
+ - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
153
153
 
154
154
  ### 🔧 Configure models via CLI
155
155
 
@@ -161,7 +161,7 @@ data-designer config list # View current settings
161
161
 
162
162
  ### 🤝 Get involved
163
163
 
164
- - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
164
+ - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
165
165
  - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
166
166
 
167
167
  ---
@@ -178,7 +178,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
178
178
 
179
179
  ```bibtex
180
180
  @misc{nemo-data-designer,
181
- author = {The NeMo Data Designer Team},
181
+ author = {The NeMo Data Designer Team, NVIDIA},
182
182
  title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
183
183
  howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
184
184
  year = {2025},
@@ -95,12 +95,12 @@ preview.display_sample_record()
95
95
 
96
96
  ### 📚 Learn more
97
97
 
98
- - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
99
- - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
100
- - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
101
- - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
102
- - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
103
- - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
98
+ - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
99
+ - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
100
+ - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
101
+ - **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
102
+ - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
103
+ - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
104
104
 
105
105
  ### 🔧 Configure models via CLI
106
106
 
@@ -112,7 +112,7 @@ data-designer config list # View current settings
112
112
 
113
113
  ### 🤝 Get involved
114
114
 
115
- - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
115
+ - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
116
116
  - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
117
117
 
118
118
  ---
@@ -129,7 +129,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
129
129
 
130
130
  ```bibtex
131
131
  @misc{nemo-data-designer,
132
- author = {The NeMo Data Designer Team},
132
+ author = {The NeMo Data Designer Team, NVIDIA},
133
133
  title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
134
134
  howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
135
135
  year = {2025},
@@ -0,0 +1,318 @@
1
+ from pathlib import Path
2
+
3
+ from data_designer.essentials import (
4
+ CategorySamplerParams,
5
+ CodeLang,
6
+ CodeValidatorParams,
7
+ DataDesigner,
8
+ DataDesignerConfigBuilder,
9
+ LLMCodeColumnConfig,
10
+ LLMJudgeColumnConfig,
11
+ LLMTextColumnConfig,
12
+ SamplerColumnConfig,
13
+ SamplerType,
14
+ Score,
15
+ SubcategorySamplerParams,
16
+ ValidationColumnConfig,
17
+ ValidatorType,
18
+ )
19
+ from data_designer.interface.results import DatasetCreationResults
20
+
21
+
22
+ def build_config(model_alias: str) -> DataDesignerConfigBuilder:
23
+ config_builder = DataDesignerConfigBuilder()
24
+
25
+ config_builder.add_column(
26
+ SamplerColumnConfig(
27
+ name="industry_sector",
28
+ sampler_type=SamplerType.CATEGORY,
29
+ params=CategorySamplerParams(
30
+ values=[
31
+ "Healthcare",
32
+ "Finance",
33
+ "Technology",
34
+ ],
35
+ ),
36
+ ),
37
+ )
38
+
39
+ config_builder.add_column(
40
+ SamplerColumnConfig(
41
+ name="topic",
42
+ sampler_type=SamplerType.SUBCATEGORY,
43
+ params=SubcategorySamplerParams(
44
+ category="industry_sector",
45
+ values={
46
+ "Healthcare": [
47
+ "Electronic Health Records (EHR) Systems",
48
+ "Telemedicine Platforms",
49
+ "AI-Powered Diagnostic Tools",
50
+ ],
51
+ "Finance": [
52
+ "Fraud Detection Software",
53
+ "Automated Trading Systems",
54
+ "Personal Finance Apps",
55
+ ],
56
+ "Technology": [
57
+ "Cloud Computing Platforms",
58
+ "Artificial Intelligence and Machine Learning Platforms",
59
+ "DevOps and CI/CD Tools",
60
+ ],
61
+ },
62
+ ),
63
+ ),
64
+ )
65
+
66
+ config_builder.add_column(
67
+ SamplerColumnConfig(
68
+ name="code_complexity",
69
+ sampler_type=SamplerType.CATEGORY,
70
+ params=CategorySamplerParams(
71
+ values=[
72
+ "Beginner",
73
+ "Intermediate",
74
+ "Advanced",
75
+ ],
76
+ ),
77
+ ),
78
+ )
79
+
80
+ config_builder.add_column(
81
+ SamplerColumnConfig(
82
+ name="code_concept",
83
+ sampler_type=SamplerType.SUBCATEGORY,
84
+ params=SubcategorySamplerParams(
85
+ category="code_complexity",
86
+ values={
87
+ "Beginner": [
88
+ "Variables",
89
+ "Data Types",
90
+ "Functions",
91
+ "Loops",
92
+ "Classes",
93
+ ],
94
+ "Intermediate": [
95
+ "List Comprehensions",
96
+ "Object-oriented programming",
97
+ "Lambda Functions",
98
+ "Web frameworks",
99
+ "Pandas",
100
+ ],
101
+ "Advanced": [
102
+ "Multithreading",
103
+ "Context Managers",
104
+ "Generators",
105
+ ],
106
+ },
107
+ ),
108
+ ),
109
+ )
110
+
111
+ config_builder.add_column(
112
+ SamplerColumnConfig(
113
+ name="instruction_phrase",
114
+ sampler_type=SamplerType.CATEGORY,
115
+ params=CategorySamplerParams(
116
+ values=[
117
+ "Write a function that",
118
+ "Create a class that",
119
+ "Implement a script",
120
+ "Can you create a function",
121
+ "Develop a module that",
122
+ ],
123
+ ),
124
+ ),
125
+ )
126
+
127
+ config_builder.add_column(
128
+ LLMTextColumnConfig(
129
+ name="instruction",
130
+ model_alias=model_alias,
131
+ system_prompt=("You are an expert at generating clear and specific programming tasks."),
132
+ prompt=(
133
+ "Generate an instruction to create Python code that solves a specific problem.\n"
134
+ "Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
135
+ "Important Guidelines:\n"
136
+ "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
137
+ "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
138
+ "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
139
+ "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
140
+ ),
141
+ )
142
+ )
143
+
144
+ config_builder.add_column(
145
+ LLMCodeColumnConfig(
146
+ name="code_implementation",
147
+ model_alias=model_alias,
148
+ code_lang=CodeLang.PYTHON,
149
+ system_prompt=(
150
+ "You are an expert Python programmer who writes clean, efficient, and well-documented code."
151
+ ),
152
+ prompt=(
153
+ "Write Python code for the following instruction:\n"
154
+ "Instruction: {{ instruction }}\n\n"
155
+ "Important Guidelines:\n"
156
+ "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
157
+ "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
158
+ "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
159
+ "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{code_concept}}.\n"
160
+ ),
161
+ )
162
+ )
163
+
164
+ config_builder.add_column(
165
+ LLMTextColumnConfig(
166
+ name="instruction",
167
+ model_alias=model_alias,
168
+ system_prompt=("You are an expert at generating clear and specific programming tasks."),
169
+ prompt=(
170
+ "Generate an instruction to create Python code that solves a specific problem.\n"
171
+ "Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
172
+ "Important Guidelines:\n"
173
+ "* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
174
+ "* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
175
+ "* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
176
+ "* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
177
+ ),
178
+ )
179
+ )
180
+
181
+ config_builder.add_column(
182
+ LLMCodeColumnConfig(
183
+ name="code_implementation",
184
+ model_alias=model_alias,
185
+ code_lang=CodeLang.PYTHON,
186
+ system_prompt=(
187
+ "You are an expert Python programmer who writes clean, efficient, and well-documented code."
188
+ ),
189
+ prompt=(
190
+ "Write Python code for the following instruction:\n"
191
+ "Instruction: {{ instruction }}\n\n"
192
+ "Important Guidelines:\n"
193
+ "* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
194
+ "* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
195
+ "* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
196
+ "* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{ code_concept }}.\n"
197
+ ),
198
+ )
199
+ )
200
+
201
+ config_builder.add_column(
202
+ LLMJudgeColumnConfig(
203
+ name="code_judge_result",
204
+ model_alias=model_alias,
205
+ prompt=TEXT_TO_PYTHON_JUDGE_TEMPLATE,
206
+ scores=python_scoring,
207
+ )
208
+ )
209
+
210
+ config_builder.add_column(
211
+ ValidationColumnConfig(
212
+ name="code_validity_result",
213
+ validator_type=ValidatorType.CODE,
214
+ target_columns=["code_implementation"],
215
+ validator_params=CodeValidatorParams(
216
+ code_lang=CodeLang.PYTHON,
217
+ ),
218
+ batch_size=100,
219
+ )
220
+ )
221
+
222
+ return config_builder
223
+
224
+
225
+ def create_dataset(
226
+ config_builder: DataDesignerConfigBuilder,
227
+ num_records: int,
228
+ artifact_path: Path | str | None = None,
229
+ ) -> DatasetCreationResults:
230
+ data_designer = DataDesigner(artifact_path=artifact_path)
231
+ results = data_designer.create(config_builder, num_records=num_records)
232
+ return results
233
+
234
+
235
+ TEXT_TO_PYTHON_JUDGE_TEMPLATE = """\
236
+ You are an expert in Python programming, with specialized knowledge in software engineering, data science, and algorithmic problem-solving.
237
+
238
+ You think about potential flaws and errors in the code. You are a tough critic, but a fair one.
239
+
240
+ Take a deep breath and use the Python Code Quality Rubric below to score the **Generated Python Code** based on the INSTRUCTIONS.
241
+
242
+ #### INSTRUCTIONS
243
+ The Generated Python Code should be a valid response to the Natural Language Prompt below
244
+
245
+ Natural Language Prompt:
246
+ {{ instruction }}
247
+
248
+ Generated Python Code
249
+ {{ code_implementation }}
250
+ """
251
+
252
+
253
+ python_scoring = [
254
+ Score(
255
+ name="Relevance",
256
+ description="Adherence to INSTRUCTIONS and CONTEXT",
257
+ options={
258
+ 4: "Perfectly meets all specified requirements.",
259
+ 3: "Meets most requirements with minor deviations.",
260
+ 2: "Moderate deviation from the instructions.",
261
+ 1: "Significant deviations from the instructions.",
262
+ 0: "Does not adhere to the instructions.",
263
+ },
264
+ ),
265
+ Score(
266
+ name="Pythonic",
267
+ description="Pythonic Code and Best Practices (Does the code follow Python conventions and best practices?)",
268
+ options={
269
+ 4: "The code exemplifies Pythonic principles, making excellent use of Python-specific constructs, standard library modules and programming idioms; follows all relevant PEPs.",
270
+ 3: "The code closely follows Python conventions and adheres to many best practices; good use of Python-specific constructs, standard library modules and programming idioms.",
271
+ 2: "The code generally follows Python conventions but has room for better alignment with Pythonic practices.",
272
+ 1: "The code loosely follows Python conventions, with several deviations from best practices.",
273
+ 0: "The code does not follow Python conventions or best practices, using non-Pythonic approaches.",
274
+ },
275
+ ),
276
+ Score(
277
+ name="Readability",
278
+ description="Readability and Maintainability (Is the Python code easy to understand and maintain?)",
279
+ options={
280
+ 4: (
281
+ "The code is excellently formatted, follows PEP 8 guidelines, is elegantly concise and clear, uses meaningful variable names, "
282
+ "ensuring high readability and ease of maintenance; organizes complex logic well. Docstrings are given in a Google Docstring format."
283
+ ),
284
+ 3: "The code is well-formatted in the sense of code-as-documentation, making it relatively easy to understand and maintain; uses descriptive names and organizes logic clearly.",
285
+ 2: "The code is somewhat readable with basic formatting and some comments, but improvements are needed; needs better use of descriptive names and organization.",
286
+ 1: "The code has minimal formatting, making it hard to understand; lacks meaningful names and organization.",
287
+ 0: "The code is unreadable, with no attempt at formatting or description.",
288
+ },
289
+ ),
290
+ Score(
291
+ name="Efficiency",
292
+ description="Efficiency and Performance (Is the code optimized for performance?)",
293
+ options={
294
+ 4: "The solution is highly efficient, using appropriate data structures and algorithms; avoids unnecessary computations and optimizes for both time and space complexity.",
295
+ 3: "The solution is efficient, with good use of Python's built-in functions and libraries; minor areas for optimization.",
296
+ 2: "The solution is moderately efficient, but misses some opportunities for optimization; uses some inefficient patterns.",
297
+ 1: "The solution shows poor efficiency, with notable performance issues; lacks effective optimization techniques.",
298
+ 0: "The solution is highly inefficient; overlooks fundamental optimization practices, resulting in significant performance issues.",
299
+ },
300
+ ),
301
+ ]
302
+
303
+
304
+ if __name__ == "__main__":
305
+ from argparse import ArgumentParser
306
+
307
+ parser = ArgumentParser()
308
+ parser.add_argument("--model-alias", type=str, default="openai-text")
309
+ parser.add_argument("--num-records", type=int, default=5)
310
+ parser.add_argument("--artifact-path", type=str, default=None)
311
+ args = parser.parse_args()
312
+
313
+ config_builder = build_config(model_alias=args.model_alias)
314
+ results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
315
+
316
+ print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
317
+
318
+ results.load_analysis().to_report()