data-designer 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (360) hide show
  1. data_designer-0.1.1/.github/workflows/pack-tutorials.yml +74 -0
  2. {data_designer-0.1.0 → data_designer-0.1.1}/CONTRIBUTING.md +62 -68
  3. {data_designer-0.1.0 → data_designer-0.1.1}/PKG-INFO +26 -9
  4. {data_designer-0.1.0 → data_designer-0.1.1}/README.md +23 -6
  5. data_designer-0.1.1/docs/concepts/person_sampling.md +36 -0
  6. {data_designer-0.1.0 → data_designer-0.1.1}/docs/models/default-model-settings.md +1 -2
  7. data_designer-0.1.0/docs/notebooks/intro.md → data_designer-0.1.1/docs/notebooks/README.md +11 -19
  8. data_designer-0.1.1/docs/notebooks/pyproject.toml +9 -0
  9. {data_designer-0.1.0 → data_designer-0.1.1}/mkdocs.yml +2 -3
  10. {data_designer-0.1.0 → data_designer-0.1.1}/pyproject.toml +2 -2
  11. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/_version.py +2 -2
  12. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/datastore.py +1 -1
  13. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/default_model_settings.py +12 -8
  14. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/resources/seed_dataset_data_store.py +20 -2
  15. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/interface/data_designer.py +24 -3
  16. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_datastore.py +44 -4
  17. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_default_model_settings.py +8 -2
  18. data_designer-0.1.0/docs/concepts/persons.md +0 -240
  19. {data_designer-0.1.0 → data_designer-0.1.1}/.github/workflows/build-docs.yml +0 -0
  20. {data_designer-0.1.0 → data_designer-0.1.1}/.github/workflows/ci.yml +0 -0
  21. {data_designer-0.1.0 → data_designer-0.1.1}/.github/workflows/dco-assistant.yml +0 -0
  22. {data_designer-0.1.0 → data_designer-0.1.1}/.github/workflows/semantic-pull-requests.yml +0 -0
  23. {data_designer-0.1.0 → data_designer-0.1.1}/.gitignore +0 -0
  24. {data_designer-0.1.0 → data_designer-0.1.1}/.pre-commit-config.yaml +0 -0
  25. {data_designer-0.1.0 → data_designer-0.1.1}/AGENTS.md +0 -0
  26. {data_designer-0.1.0 → data_designer-0.1.1}/CLAUDE.md +0 -0
  27. {data_designer-0.1.0 → data_designer-0.1.1}/CODE_OF_CONDUCT.md +0 -0
  28. {data_designer-0.1.0 → data_designer-0.1.1}/DCO +0 -0
  29. {data_designer-0.1.0 → data_designer-0.1.1}/LICENSE +0 -0
  30. {data_designer-0.1.0 → data_designer-0.1.1}/Makefile +0 -0
  31. {data_designer-0.1.0 → data_designer-0.1.1}/VERSIONING.md +0 -0
  32. {data_designer-0.1.0 → data_designer-0.1.1}/docs/CONTRIBUTING.md +0 -0
  33. {data_designer-0.1.0 → data_designer-0.1.1}/docs/assets/palette-favicon.png +0 -0
  34. {data_designer-0.1.0 → data_designer-0.1.1}/docs/code_reference/column_configs.md +0 -0
  35. {data_designer-0.1.0 → data_designer-0.1.1}/docs/code_reference/config_builder.md +0 -0
  36. {data_designer-0.1.0 → data_designer-0.1.1}/docs/code_reference/data_designer_config.md +0 -0
  37. {data_designer-0.1.0 → data_designer-0.1.1}/docs/code_reference/sampler_params.md +0 -0
  38. {data_designer-0.1.0 → data_designer-0.1.1}/docs/code_reference/validator_params.md +0 -0
  39. {data_designer-0.1.0 → data_designer-0.1.1}/docs/concepts/columns.md +0 -0
  40. {data_designer-0.1.0 → data_designer-0.1.1}/docs/concepts/plugins.md +0 -0
  41. {data_designer-0.1.0 → data_designer-0.1.1}/docs/concepts/validators.md +0 -0
  42. {data_designer-0.1.0 → data_designer-0.1.1}/docs/css/mkdocstrings.css +0 -0
  43. {data_designer-0.1.0 → data_designer-0.1.1}/docs/css/style.css +0 -0
  44. {data_designer-0.1.0 → data_designer-0.1.1}/docs/index.md +0 -0
  45. {data_designer-0.1.0 → data_designer-0.1.1}/docs/installation.md +0 -0
  46. {data_designer-0.1.0 → data_designer-0.1.1}/docs/js/toc-toggle.js +0 -0
  47. {data_designer-0.1.0 → data_designer-0.1.1}/docs/models/configure-model-settings-with-the-cli.md +0 -0
  48. {data_designer-0.1.0 → data_designer-0.1.1}/docs/models/model-configs.md +0 -0
  49. {data_designer-0.1.0 → data_designer-0.1.1}/docs/models/model-providers.md +0 -0
  50. {data_designer-0.1.0 → data_designer-0.1.1}/docs/notebooks/.gitignore +0 -0
  51. {data_designer-0.1.0 → data_designer-0.1.1}/docs/notebooks/1-the-basics.ipynb +0 -0
  52. {data_designer-0.1.0 → data_designer-0.1.1}/docs/notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
  53. {data_designer-0.1.0 → data_designer-0.1.1}/docs/notebooks/3-seeding-with-a-dataset.ipynb +0 -0
  54. {data_designer-0.1.0 → data_designer-0.1.1}/docs/quick-start.md +0 -0
  55. {data_designer-0.1.0 → data_designer-0.1.1}/scripts/update_license_headers.py +0 -0
  56. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/__init__.py +0 -0
  57. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/README.md +0 -0
  58. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/__init__.py +0 -0
  59. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/commands/__init__.py +0 -0
  60. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/commands/list.py +0 -0
  61. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/commands/models.py +0 -0
  62. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/commands/providers.py +0 -0
  63. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/commands/reset.py +0 -0
  64. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/controllers/__init__.py +0 -0
  65. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/controllers/model_controller.py +0 -0
  66. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/controllers/provider_controller.py +0 -0
  67. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/__init__.py +0 -0
  68. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/builder.py +0 -0
  69. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/field.py +0 -0
  70. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/form.py +0 -0
  71. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/model_builder.py +0 -0
  72. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/forms/provider_builder.py +0 -0
  73. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/main.py +0 -0
  74. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/repositories/__init__.py +0 -0
  75. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/repositories/base.py +0 -0
  76. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/repositories/model_repository.py +0 -0
  77. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/repositories/provider_repository.py +0 -0
  78. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/services/__init__.py +0 -0
  79. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/services/model_service.py +0 -0
  80. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/services/provider_service.py +0 -0
  81. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/ui.py +0 -0
  82. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/cli/utils.py +0 -0
  83. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/__init__.py +0 -0
  84. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/analysis/column_profilers.py +0 -0
  85. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/analysis/column_statistics.py +0 -0
  86. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
  87. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/analysis/utils/errors.py +0 -0
  88. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/analysis/utils/reporting.py +0 -0
  89. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/base.py +0 -0
  90. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/column_configs.py +0 -0
  91. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/column_types.py +0 -0
  92. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/config_builder.py +0 -0
  93. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/data_designer_config.py +0 -0
  94. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/dataset_builders.py +0 -0
  95. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/errors.py +0 -0
  96. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/interface.py +0 -0
  97. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/models.py +0 -0
  98. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/preview_results.py +0 -0
  99. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/processors.py +0 -0
  100. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/sampler_constraints.py +0 -0
  101. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/sampler_params.py +0 -0
  102. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/seed.py +0 -0
  103. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/code_lang.py +0 -0
  104. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/constants.py +0 -0
  105. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/errors.py +0 -0
  106. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/info.py +0 -0
  107. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/io_helpers.py +0 -0
  108. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/misc.py +0 -0
  109. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/numerical_helpers.py +0 -0
  110. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/type_helpers.py +0 -0
  111. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/validation.py +0 -0
  112. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/utils/visualization.py +0 -0
  113. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/config/validator_params.py +0 -0
  114. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/__init__.py +0 -0
  115. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
  116. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
  117. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  118. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/column_statistics.py +0 -0
  119. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
  120. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/errors.py +0 -0
  121. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
  122. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
  123. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/__init__.py +0 -0
  124. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
  125. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/base.py +0 -0
  126. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  127. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/llm_generators.py +0 -0
  128. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  129. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  130. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  131. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/registry.py +0 -0
  132. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  133. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
  134. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  135. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/configurable_task.py +0 -0
  136. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
  137. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -0
  138. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  139. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  140. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
  141. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
  142. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  143. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  144. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  145. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  146. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/errors.py +0 -0
  147. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/model_provider.py +0 -0
  148. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/__init__.py +0 -0
  149. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/errors.py +0 -0
  150. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/facade.py +0 -0
  151. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/litellm_overrides.py +0 -0
  152. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/__init__.py +0 -0
  153. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/errors.py +0 -0
  154. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/parser.py +0 -0
  155. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
  156. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  157. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/parsers/types.py +0 -0
  158. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/recipes/base.py +0 -0
  159. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  160. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/registry.py +0 -0
  161. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/usage.py +0 -0
  162. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/models/utils.py +0 -0
  163. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
  164. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/ginja/ast.py +0 -0
  165. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  166. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  167. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/ginja/record.py +0 -0
  168. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
  169. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  170. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  171. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  172. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  173. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/processors/base.py +0 -0
  174. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
  175. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/processors/registry.py +0 -0
  176. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/processing/utils.py +0 -0
  177. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/registry/base.py +0 -0
  178. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  179. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/registry/errors.py +0 -0
  180. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  181. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  182. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/resources/managed_storage.py +0 -0
  183. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/resources/resource_provider.py +0 -0
  184. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/column.py +0 -0
  185. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
  186. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
  187. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  188. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  189. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
  190. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  191. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
  192. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  193. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  194. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  195. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  196. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
  197. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  198. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  199. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  200. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
  201. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  202. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  203. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  204. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  205. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/secret_resolver.py +0 -0
  206. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/__init__.py +0 -0
  207. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/base.py +0 -0
  208. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/local_callable.py +0 -0
  209. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/python.py +0 -0
  210. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/remote.py +0 -0
  211. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/engine/validators/sql.py +0 -0
  212. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/errors.py +0 -0
  213. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/essentials/__init__.py +0 -0
  214. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/interface/__init__.py +0 -0
  215. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/interface/errors.py +0 -0
  216. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/interface/results.py +0 -0
  217. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/logging.py +0 -0
  218. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/plugin_manager.py +0 -0
  219. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/plugins/__init__.py +0 -0
  220. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/plugins/errors.py +0 -0
  221. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/plugins/plugin.py +0 -0
  222. {data_designer-0.1.0 → data_designer-0.1.1}/src/data_designer/plugins/registry.py +0 -0
  223. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/commands/test_list_command.py +0 -0
  224. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/commands/test_models_command.py +0 -0
  225. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/commands/test_providers_command.py +0 -0
  226. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/commands/test_reset_command.py +0 -0
  227. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/conftest.py +0 -0
  228. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/controllers/test_model_controller.py +0 -0
  229. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/controllers/test_provider_controller.py +0 -0
  230. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/forms/test_field.py +0 -0
  231. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/forms/test_form.py +0 -0
  232. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/forms/test_model_builder.py +0 -0
  233. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/forms/test_provider_builder.py +0 -0
  234. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/repositories/test_model_repository.py +0 -0
  235. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/repositories/test_provider_repository.py +0 -0
  236. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/services/test_model_service.py +0 -0
  237. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/services/test_provider_service.py +0 -0
  238. {data_designer-0.1.0 → data_designer-0.1.1}/tests/cli/test_cli_utils.py +0 -0
  239. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/analysis/conftest.py +0 -0
  240. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/analysis/test_column_statistics.py +0 -0
  241. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
  242. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/analysis/utils/test_reporting.py +0 -0
  243. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_columns.py +0 -0
  244. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_config_builder.py +0 -0
  245. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_data_designer_config.py +0 -0
  246. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_models.py +0 -0
  247. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_processors.py +0 -0
  248. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_sampler_constraints.py +0 -0
  249. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_sampler_params.py +0 -0
  250. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_seed.py +0 -0
  251. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/test_validator_params.py +0 -0
  252. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/__init__.py +0 -0
  253. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_code_lang.py +0 -0
  254. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_info.py +0 -0
  255. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_io_helpers.py +0 -0
  256. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_misc.py +0 -0
  257. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_type_helpers.py +0 -0
  258. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_validation.py +0 -0
  259. {data_designer-0.1.0 → data_designer-0.1.1}/tests/config/utils/test_visualization.py +0 -0
  260. {data_designer-0.1.0 → data_designer-0.1.1}/tests/conftest.py +0 -0
  261. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/column_profilers/test_base.py +0 -0
  262. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  263. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/conftest.py +0 -0
  264. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  265. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  266. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  267. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  268. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  269. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/test_errors.py +0 -0
  270. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  271. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  272. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/__init__.py +0 -0
  273. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  274. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_expression.py +0 -0
  275. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_llm_generators.py +0 -0
  276. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  277. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  278. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/generators/test_validation.py +0 -0
  279. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/test_registry.py +0 -0
  280. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  281. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  282. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  283. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/conftest.py +0 -0
  284. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  285. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -0
  286. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  287. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  288. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  289. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  290. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  291. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/conftest.py +0 -0
  292. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/parsers/test_parser.py +0 -0
  293. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  294. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  295. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  296. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  297. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  298. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/stub_secrets.json +0 -0
  299. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_facade.py +0 -0
  300. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_litellm_overrides.py +0 -0
  301. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_model_errors.py +0 -0
  302. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_model_registry.py +0 -0
  303. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_model_utils.py +0 -0
  304. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/models/test_usage.py +0 -0
  305. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/__init__.py +0 -0
  306. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/ginja/__init__.py +0 -0
  307. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/ginja/test_ast.py +0 -0
  308. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/ginja/test_environment.py +0 -0
  309. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  310. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/ginja/test_record.py +0 -0
  311. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/gsonschema/__init__.py +0 -0
  312. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  313. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  314. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/gsonschema/test_types.py +0 -0
  315. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  316. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/processors/__init__.py +0 -0
  317. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/processors/test_drop_columns.py +0 -0
  318. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/processors/test_registry.py +0 -0
  319. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/processing/test_utils.py +0 -0
  320. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/registry/__init__.py +0 -0
  321. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/registry/conftest.py +0 -0
  322. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/registry/test_base.py +0 -0
  323. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/registry/test_data_designer_registry.py +0 -0
  324. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/registry/test_errors.py +0 -0
  325. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/__init__.py +0 -0
  326. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/conftest.py +0 -0
  327. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  328. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  329. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/test_managed_storage.py +0 -0
  330. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/resources/test_resource_provider.py +0 -0
  331. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/conftest.py +0 -0
  332. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  333. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  334. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  335. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  336. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  337. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  338. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_column.py +0 -0
  339. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_constraints.py +0 -0
  340. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_generator.py +0 -0
  341. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  342. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  343. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_schema.py +0 -0
  344. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/sampling_gen/test_utils.py +0 -0
  345. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/test_configurable_task.py +0 -0
  346. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/test_engine_errors.py +0 -0
  347. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/test_model_provider.py +0 -0
  348. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/test_secret_resolver.py +0 -0
  349. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/validators/test_local_callable.py +0 -0
  350. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/validators/test_python.py +0 -0
  351. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/validators/test_remote.py +0 -0
  352. {data_designer-0.1.0 → data_designer-0.1.1}/tests/engine/validators/test_sql.py +0 -0
  353. {data_designer-0.1.0 → data_designer-0.1.1}/tests/essentials/test_init.py +0 -0
  354. {data_designer-0.1.0 → data_designer-0.1.1}/tests/interface/test_data_designer.py +0 -0
  355. {data_designer-0.1.0 → data_designer-0.1.1}/tests/interface/test_results.py +0 -0
  356. {data_designer-0.1.0 → data_designer-0.1.1}/tests/plugins/test_plugin.py +0 -0
  357. {data_designer-0.1.0 → data_designer-0.1.1}/tests/plugins/test_plugin_registry.py +0 -0
  358. {data_designer-0.1.0 → data_designer-0.1.1}/tests/test_logging.py +0 -0
  359. {data_designer-0.1.0 → data_designer-0.1.1}/tests/test_plugin_manager.py +0 -0
  360. {data_designer-0.1.0 → data_designer-0.1.1}/uv.lock +0 -0
@@ -0,0 +1,74 @@
1
+ name: Pack Tutorials
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ release:
6
+ types: [published]
7
+
8
+ jobs:
9
+ zip_and_upload:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ contents: write
13
+
14
+ steps:
15
+ - name: Checkout repository
16
+ uses: actions/checkout@v4
17
+
18
+ - name: Set up environment variables
19
+ id: env_setup
20
+ run: |
21
+ echo "SOURCE_FOLDER_PATH=docs/notebooks" >> $GITHUB_ENV
22
+ echo "TARGET_FOLDER_NAME=data_designer_tutorial" >> $GITHUB_ENV
23
+ echo "ZIP_FILE_NAME=data_designer_tutorial.zip" >> $GITHUB_ENV
24
+
25
+ - name: Check if source folder exists
26
+ run: |
27
+ if [ ! -d "${{ env.SOURCE_FOLDER_PATH }}" ]; then
28
+ echo "::error::Source folder '${{ env.SOURCE_FOLDER_PATH }}' not found. Check the input value."
29
+ exit 1
30
+ fi
31
+
32
+ - name: Rename source folder
33
+ run: mv ${{ env.SOURCE_FOLDER_PATH }} ${{ env.TARGET_FOLDER_NAME }}
34
+
35
+ - name: Zip the target folder
36
+ run: |
37
+ zip -r ${{ env.ZIP_FILE_NAME }} ${{ env.TARGET_FOLDER_NAME }}
38
+ echo "Successfully created ${{ env.ZIP_FILE_NAME }}"
39
+
40
+ - name: Find the latest existing release tag
41
+ id: get_release
42
+ run: |
43
+ if [ "${{ github.event_name }}" == "release" ]; then
44
+ LATEST_TAG="${{ github.event.release.tag_name }}"
45
+ else
46
+ echo "::notice::Running manually via workflow_dispatch. Fetching latest release tag..."
47
+
48
+ gh auth status || echo "GitHub CLI is not authenticated, relying on GITHUB_TOKEN."
49
+
50
+ # We use tr -d '\n' to remove the trailing newline for a clean tag string
51
+ LATEST_TAG=$(gh release view --json tagName -q .tagName 2>/dev/null)
52
+
53
+ if [ -z "$LATEST_TAG" ]; then
54
+ echo "::error::Could not find the latest published release tag. Ensure a release exists."
55
+ exit 1
56
+ fi
57
+ fi
58
+
59
+ echo "Latest release tag found: $LATEST_TAG"
60
+ echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
61
+ env:
62
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
63
+
64
+ - name: Upload zip file as release asset
65
+ uses: softprops/action-gh-release@v2
66
+ with:
67
+ tag_name: ${{ steps.get_release.outputs.tag }}
68
+ files: ${{ env.ZIP_FILE_NAME }}
69
+ draft: false
70
+ prerelease: false
71
+
72
+ - name: Cleanup
73
+ if: always()
74
+ run: rm -f ${{ env.ZIP_FILE_NAME }}
@@ -12,7 +12,6 @@ This guide will help you get started with the contribution process.
12
12
  - [Ways to Contribute](#ways-to-contribute)
13
13
  - [Feature Requests](#feature-requests)
14
14
  - [Development Guide](#development-guide)
15
- - [Code Quality Standards](#code-quality-standards)
16
15
  - [Submitting Changes](#submitting-changes)
17
16
  - [Code of Conduct](#code-of-conduct)
18
17
  - [Signing off on your work](#signing-off-on-your-work)
@@ -88,94 +87,97 @@ Data Designer uses [`uv`](https://github.com/astral-sh/uv) for dependency manage
88
87
  ### Initial Setup
89
88
  0. **Create or find an issue**
90
89
 
91
- Before starting work, ensure there's an issue tracking your contribution:
92
- - For bug fixes: Search [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) or [create a new one](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new)
93
- - For new features: Open a [feature request](#feature-requests) to discuss the approach first
94
- - Comment on the issue to let maintainers know you're working on it
90
+ Before starting work, ensure there's an issue tracking your contribution:
91
+
92
+ - For bug fixes: Search [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) or [create a new one](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new)
93
+ - For new features: Open a [feature request](#feature-requests) to discuss the approach first
94
+ - Comment on the issue to let maintainers know you're working on it
95
95
 
96
96
  1. **Fork and clone the repository**
97
97
 
98
- Start by [forking the Data Designer repository](https://github.com/NVIDIA-NeMo/DataDesigner/fork), then clone your fork and add the upstream remote:
98
+ Start by [forking the Data Designer repository](https://github.com/NVIDIA-NeMo/DataDesigner/fork), then clone your fork and add the upstream remote:
99
99
 
100
- ```bash
101
- git clone https://github.com/YOUR_GITHUB_USERNAME/DataDesigner.git
100
+ ```bash
101
+ git clone https://github.com/YOUR_GITHUB_USERNAME/DataDesigner.git
102
102
 
103
- cd DataDesigner
103
+ cd DataDesigner
104
104
 
105
- git remote add upstream https://github.com/NVIDIA-NeMo/DataDesigner.git
106
- ```
105
+ git remote add upstream https://github.com/NVIDIA-NeMo/DataDesigner.git
106
+ ```
107
107
 
108
108
  2. **Install dependencies**
109
109
 
110
- ```bash
111
- # Install project with dev dependencies
112
- make install-dev
110
+ ```bash
111
+ # Install project with dev dependencies
112
+ make install-dev
113
113
 
114
- # Or, if you use Jupyter / IPython for development
115
- make install-dev-notebooks
116
- ```
114
+ # Or, if you use Jupyter / IPython for development
115
+ make install-dev-notebooks
116
+ ```
117
117
 
118
118
  3. **Verify your setup**
119
119
 
120
- ```bash
121
- make test && make check-all
122
- ```
120
+ ```bash
121
+ make test && make check-all
122
+ ```
123
123
 
124
- If no errors are reported, you're ready to develop 🚀
124
+ If no errors are reported, you're ready to develop 🚀
125
125
 
126
126
  ### Making Changes
127
127
 
128
128
  1. **Create a feature branch**
129
129
 
130
- ```bash
131
- git checkout main
132
- git pull upstream main
133
- git checkout -b <username>/<type-of-change>/<issue-number>-<short-description>
134
- ```
135
-
136
- Example types of change:
137
- - `feat` for new features
138
- - `fix` for bug fixes
139
- - `docs` for documentation updates
140
- - `test` for testing changes
141
- - `refactor` for code refactoring
142
- - `chore` for chore tasks
143
- - `style` for style changes
144
- - `perf` for performance improvements
145
-
146
- Example branch name:
147
- - `johnnygreco/feat/123-add-xyz-generator` for a new feature by @johnnygreco, addressing issue #123
130
+ ```bash
131
+ git checkout main
132
+ git pull upstream main
133
+ git checkout -b <username>/<type-of-change>/<issue-number>-<short-description>
134
+ ```
135
+
136
+ Example types of change:
137
+
138
+ - `feat` for new features
139
+ - `fix` for bug fixes
140
+ - `docs` for documentation updates
141
+ - `test` for testing changes
142
+ - `refactor` for code refactoring
143
+ - `chore` for chore tasks
144
+ - `style` for style changes
145
+ - `perf` for performance improvements
146
+
147
+ Example branch name:
148
+
149
+ - `johnnygreco/feat/123-add-xyz-generator` for a new feature by @johnnygreco, addressing issue #123
148
150
 
149
151
  2. **Develop your changes**
150
152
 
151
- Please follow the patterns and conventions used throughout the codebase, as well as those outlined in [AGENTS.md](AGENTS.md).
153
+ Please follow the patterns and conventions used throughout the codebase, as well as those outlined in [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md).
152
154
 
153
155
  3. **Test and validate**
154
156
 
155
- ```bash
156
- make check-all-fix # Format code and fix linting issues
157
- make test # Run all tests
158
- make coverage # Check test coverage (must be >90%)
159
- ```
157
+ ```bash
158
+ make check-all-fix # Format code and fix linting issues
159
+ make test # Run all tests
160
+ make coverage # Check test coverage (must be >90%)
161
+ ```
160
162
 
161
- **Writing tests**: Place tests in [tests/](tests/) mirroring the source structure. Use fixtures from [tests/conftest.py](tests/conftest.py), mock external services with `unittest.mock` or `pytest-httpx`, and test both success and failure cases. See [AGENTS.md](AGENTS.md) for patterns and examples.
163
+ **Writing tests**: Place tests in [tests/](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/) mirroring the source structure. Use fixtures from [tests/conftest.py](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/conftest.py), mock external services with `unittest.mock` or `pytest-httpx`, and test both success and failure cases. See [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md) for patterns and examples.
162
164
 
163
165
  4. **Commit your work**
164
166
 
165
- Write clear, descriptive commit messages, optionally including a brief summary (50 characters or less) and reference issue numbers when applicable (e.g., "Fixes #123").
167
+ Write clear, descriptive commit messages, optionally including a brief summary (50 characters or less) and reference issue numbers when applicable (e.g., "Fixes #123").
166
168
 
167
- ```bash
168
- git commit -m "Add XYZ generator for synthetic data" -m "Fixes #123"
169
- ```
169
+ ```bash
170
+ git commit -m "Add XYZ generator for synthetic data" -m "Fixes #123"
171
+ ```
170
172
 
171
173
  5. **Stay up to date**
172
174
 
173
- Regularly sync your branch with upstream changes:
175
+ Regularly sync your branch with upstream changes:
174
176
 
175
- ```bash
176
- git fetch upstream
177
- git merge upstream/main
178
- ```
177
+ ```bash
178
+ git fetch upstream
179
+ git merge upstream/main
180
+ ```
179
181
 
180
182
  ## Submitting Changes
181
183
 
@@ -194,9 +196,9 @@ Ensure your changes meet the following criteria:
194
196
 
195
197
  1. **Push your changes** to your fork:
196
198
 
197
- ```bash
198
- git push origin <username>/<type-of-change>/<issue-number>-<short-description>
199
- ```
199
+ ```bash
200
+ git push origin <username>/<type-of-change>/<issue-number>-<short-description>
201
+ ```
200
202
 
201
203
  2. **Open a pull request** on GitHub from your fork to the main repository
202
204
 
@@ -213,7 +215,7 @@ Ensure your changes meet the following criteria:
213
215
  ## Code of Conduct
214
216
  Data Designer follows the Contributor Covenant Code of Conduct. We are committed to providing a welcoming and inclusive environment for all contributors.
215
217
 
216
- **Please read our complete [Code of Conduct](CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
218
+ **Please read our complete [Code of Conduct](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
217
219
 
218
220
  ### License File Headers
219
221
  All code files that are added to this repository must include the appropriate NVIDIA copyright header:
@@ -225,17 +227,9 @@ All code files that are added to this repository must include the appropriate NV
225
227
 
226
228
  Use `make update-license-headers` to add headers automatically.
227
229
 
228
- ## Getting Help
229
- Need help with your contribution?
230
-
231
- - **Documentation**: Check the [documentation](docs/) and [AGENTS.md](AGENTS.md) for additional information
232
- - **Issues**: Browse [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) for similar questions
233
- - **Contact**: Reach out to the core maintainers at [data-designer@nvidia.com](mailto:data-designer@nvidia.com)
234
-
235
-
236
230
  ## Signing off on your work
237
231
 
238
- When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
232
+ When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
239
233
 
240
234
  ---
241
235
 
@@ -1,18 +1,18 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: General framework for synthetic data generation
5
+ License-Expression: Apache-2.0
5
6
  License-File: LICENSE
6
7
  Classifier: Development Status :: 4 - Beta
7
8
  Classifier: Intended Audience :: Developers
8
9
  Classifier: Intended Audience :: Science/Research
9
- Classifier: License :: Other/Proprietary License
10
+ Classifier: License :: OSI Approved :: Apache Software License
10
11
  Classifier: Programming Language :: Python :: 3.10
11
12
  Classifier: Programming Language :: Python :: 3.11
12
13
  Classifier: Programming Language :: Python :: 3.12
13
14
  Classifier: Programming Language :: Python :: 3.13
14
15
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
- Classifier: Topic :: Scientific/Engineering :: Human Machine Interfaces
16
16
  Classifier: Topic :: Software Development
17
17
  Requires-Python: >=3.10
18
18
  Requires-Dist: anyascii<1.0,>=0.3.3
@@ -51,7 +51,7 @@ Description-Content-Type: text/markdown
51
51
 
52
52
  [![CI](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml/badge.svg)](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml)
53
53
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
54
- [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html)
54
+ [![Python 3.10 - 3.13](https://img.shields.io/badge/🐍_Python-3.10_|_3.11_|_3.12_|_3.13-blue.svg)](https://www.python.org/downloads/) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html) [![Code](https://img.shields.io/badge/Code-Documentation-8A2BE2.svg)](https://nvidia-nemo.github.io/DataDesigner/)
55
55
 
56
56
  **Generate high-quality synthetic datasets from scratch or using your own seed data.**
57
57
 
@@ -147,10 +147,12 @@ preview.display_sample_record()
147
147
 
148
148
  ### 📚 Learn more
149
149
 
150
- - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner)** – Detailed walkthrough with more examples
151
- - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/1-the-basics/)** – Step-by-step interactive tutorials
150
+ - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
151
+ - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/intro/)** – Step-by-step interactive tutorials
152
152
  - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
153
+ - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
153
154
  - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
155
+ - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
154
156
 
155
157
  ### 🔧 Configure models via CLI
156
158
 
@@ -162,12 +164,27 @@ data-designer config list # View current settings
162
164
 
163
165
  ### 🤝 Get involved
164
166
 
165
- - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING.md)** – Help improve Data Designer
166
- - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or request features
167
- - **[GitHub Discussions](https://github.com/NVIDIA-NeMo/DataDesigner/discussions)** – Ask questions and share ideas
167
+ - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
168
+ - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
168
169
 
169
170
  ---
170
171
 
171
172
  ## License
172
173
 
173
174
  Apache License 2.0 – see [LICENSE](LICENSE) for details.
175
+
176
+ ---
177
+
178
+ ## Citation
179
+
180
+ If you use NeMo Data Designer in your research, please cite it using the following BibTeX entry:
181
+
182
+ ```bibtex
183
+ @misc{nemo-data-designer,
184
+ author = {The NeMo Data Designer Team},
185
+ title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
186
+ howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
187
+ year = {2025},
188
+ note = {GitHub Repository},
189
+ }
190
+ ```
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![CI](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml/badge.svg)](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml)
4
4
  [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
5
- [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html)
5
+ [![Python 3.10 - 3.13](https://img.shields.io/badge/🐍_Python-3.10_|_3.11_|_3.12_|_3.13-blue.svg)](https://www.python.org/downloads/) [![NeMo Microservices](https://img.shields.io/badge/NeMo-Microservices-76b900)](https://docs.nvidia.com/nemo/microservices/latest/index.html) [![Code](https://img.shields.io/badge/Code-Documentation-8A2BE2.svg)](https://nvidia-nemo.github.io/DataDesigner/)
6
6
 
7
7
  **Generate high-quality synthetic datasets from scratch or using your own seed data.**
8
8
 
@@ -98,10 +98,12 @@ preview.display_sample_record()
98
98
 
99
99
  ### 📚 Learn more
100
100
 
101
- - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner)** – Detailed walkthrough with more examples
102
- - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/1-the-basics/)** – Step-by-step interactive tutorials
101
+ - **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
102
+ - **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/intro/)** – Step-by-step interactive tutorials
103
103
  - **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
104
+ - **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
104
105
  - **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
106
+ - **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
105
107
 
106
108
  ### 🔧 Configure models via CLI
107
109
 
@@ -113,12 +115,27 @@ data-designer config list # View current settings
113
115
 
114
116
  ### 🤝 Get involved
115
117
 
116
- - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING.md)** – Help improve Data Designer
117
- - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or request features
118
- - **[GitHub Discussions](https://github.com/NVIDIA-NeMo/DataDesigner/discussions)** – Ask questions and share ideas
118
+ - **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
119
+ - **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
119
120
 
120
121
  ---
121
122
 
122
123
  ## License
123
124
 
124
125
  Apache License 2.0 – see [LICENSE](LICENSE) for details.
126
+
127
+ ---
128
+
129
+ ## Citation
130
+
131
+ If you use NeMo Data Designer in your research, please cite it using the following BibTeX entry:
132
+
133
+ ```bibtex
134
+ @misc{nemo-data-designer,
135
+ author = {The NeMo Data Designer Team},
136
+ title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
137
+ howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
138
+ year = {2025},
139
+ note = {GitHub Repository},
140
+ }
141
+ ```
@@ -0,0 +1,36 @@
1
+ # Person Sampling in Data Designer
2
+
3
+ Person sampling in Data Designer allows you to generate synthetic person data for your datasets using the Faker library.
4
+
5
+ ## Faker-Based Sampling
6
+
7
+ ### What It Does
8
+ Uses the Faker library to generate random personal information. The data is basic and not demographically accurate, but is useful for quick testing, prototyping, or when realistic demographic distributions are not relevant for your use case.
9
+
10
+ ### Features
11
+ - Gives you access to person attributes that Faker exposes
12
+ - Quick to set up with no additional downloads
13
+ - Generates random names, emails, addresses, phone numbers, etc.
14
+ - Supports [all Faker-supported locales](https://faker.readthedocs.io/en/master/locales.html)
15
+ - **Not demographically grounded** - data patterns don't reflect real-world demographics
16
+
17
+ ### Usage Example
18
+ ```python
19
+ from data_designer.essentials import (
20
+ SamplerColumnConfig,
21
+ SamplerType,
22
+ PersonFromFakerSamplerParams,
23
+ )
24
+
25
+ config_builder.add_column(
26
+ SamplerColumnConfig(
27
+ name="customer",
28
+ sampler_type=SamplerType.PERSON_FROM_FAKER,
29
+ params=PersonFromFakerSamplerParams(
30
+ locale="en_US",
31
+ age_range=[25, 65],
32
+ sex="Female",
33
+ ),
34
+ )
35
+ )
36
+ ```
@@ -92,5 +92,4 @@ Both methods operate on the same files, ensuring consistency across your entire
92
92
 
93
93
  - **[Configure Model Settings With the CLI](configure-model-settings-with-the-cli.md)**: Learn how to use the CLI to manage model settings.
94
94
  - **[Quick Start Guide](../quick-start.md)**: Get started with a simple example
95
- - **[Model Configuration Reference](../code_reference/config_builder.md)**: Detailed API documentation
96
- - **[Column Configurations](../code_reference/column_configs.md)**: Learn about all column types
95
+ - **[Model Configurations](model-configs.md)**: Learn about model configurations
@@ -6,21 +6,15 @@ Welcome to the Data Designer tutorial series! These hands-on notebooks will guid
6
6
 
7
7
  ### Local Setup Best Practices
8
8
 
9
- The notebooks can be downloaded using the link on the top of each of them.
10
- To run them locally, we recommend using a virtual environment to manage dependencies:
9
+ First, download the tutorial [from the release assets](https://github.com/NVIDIA-NeMo/DataDesigner/releases/latest/download/data_designer_tutorial.zip).
10
+ To run the tutorial notebooks locally, we recommend using a virtual environment to manage dependencies:
11
11
 
12
12
  === "uv (Recommended)"
13
13
 
14
14
  ```bash
15
- # Create a new project directory
16
- mkdir data-designer-tutorials
17
- cd data-designer-tutorials
18
-
19
- # Initialize a new uv project
20
- uv init
21
-
22
- # Add data-designer and jupyter
23
- uv add data-designer jupyter
15
+ # Extract tutorial notebooks
16
+ unzip data_designer_tutorial.zip
17
+ cd data_designer_tutorial
24
18
 
25
19
  # Launch Jupyter
26
20
  uv run jupyter notebook
@@ -29,15 +23,13 @@ To run them locally, we recommend using a virtual environment to manage dependen
29
23
  === "pip + venv"
30
24
 
31
25
  ```bash
32
- # Create a new project directory
33
- mkdir data-designer-tutorials
34
- cd data-designer-tutorials
26
+ # Extract tutorial notebooks
27
+ unzip data_designer_tutorial.zip
28
+ cd data_designer_tutorial
35
29
 
36
- # Create and activate a virtual environment
30
+ # Create Python virtual environment and install required packages
37
31
  python -m venv venv
38
32
  source venv/bin/activate
39
-
40
- # Install data-designer and jupyter
41
33
  pip install data-designer jupyter
42
34
 
43
35
  # Launch Jupyter
@@ -108,7 +100,7 @@ Understanding these concepts will help you make the most of the tutorials:
108
100
 
109
101
  - **[Columns](../concepts/columns.md)** - Learn about different column types (Sampler, LLM, Expression, Validation, etc.)
110
102
  - **[Validators](../concepts/validators.md)** - Understand how to validate generated data with Python, SQL, and remote validators
111
- - **[Plugins](../concepts/plugins.md)** - Extend Data Designer with custom functionality
103
+ - **[Person Sampling](../concepts/person_sampling.md)** - Learn how to sample realistic person data with demographic attributes
112
104
 
113
105
  ### Code Reference
114
106
 
@@ -117,4 +109,4 @@ Quick reference guides for the main configuration objects:
117
109
  - **[column_configs](../code_reference/column_configs.md)** - All column configuration types
118
110
  - **[config_builder](../code_reference/config_builder.md)** - The `DataDesignerConfigBuilder` API
119
111
  - **[data_designer_config](../code_reference/data_designer_config.md)** - Main configuration schema
120
- - **[validator_params](../code_reference/validator_params.md)** - Validator configuration options
112
+ - **[validator_params](../code_reference/validator_params.md)** - Validator configuration options
@@ -0,0 +1,9 @@
1
+ [project]
2
+ name = "data_designer_tutorial"
3
+ version = "0.1.0"
4
+ readme = "README.md"
5
+ requires-python = ">=3.11"
6
+ dependencies = [
7
+ "data-designer>=0.1.0",
8
+ "jupyter>=1.1.1",
9
+ ]
@@ -10,15 +10,14 @@ nav:
10
10
  - Concepts:
11
11
  - Columns: concepts/columns.md
12
12
  - Validators: concepts/validators.md
13
- - Persons: concepts/persons.md
14
- # - Plugins: concepts/plugins.md
13
+ - Person Sampling: concepts/person_sampling.md
15
14
  - Models:
16
15
  - Default Model Settings: models/default-model-settings.md
17
16
  - Configure with the CLI: models/configure-model-settings-with-the-cli.md
18
17
  - Model Providers: models/model-providers.md
19
18
  - Model Configs: models/model-configs.md
20
19
  - Tutorials:
21
- - Overview: notebooks/intro.md
20
+ - Overview: notebooks/README.md
22
21
  - The Basics: notebooks/1-the-basics.ipynb
23
22
  - Structured Outputs and Jinja Expressions: notebooks/2-structured-outputs-and-jinja-expressions.ipynb
24
23
  - Seeding with an External Dataset: notebooks/3-seeding-with-a-dataset.ipynb
@@ -4,15 +4,15 @@ dynamic = ["version"]
4
4
  description = "General framework for synthetic data generation"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
7
+ license = "Apache-2.0"
7
8
 
8
9
  classifiers = [
9
10
  "Development Status :: 4 - Beta",
10
11
  "Intended Audience :: Developers",
11
12
  "Intended Audience :: Science/Research",
12
13
  "Topic :: Scientific/Engineering :: Artificial Intelligence",
13
- "Topic :: Scientific/Engineering :: Human Machine Interfaces",
14
14
  "Topic :: Software Development",
15
- "License :: Other/Proprietary License",
15
+ "License :: OSI Approved :: Apache Software License",
16
16
  "Programming Language :: Python :: 3.10",
17
17
  "Programming Language :: Python :: 3.11",
18
18
  "Programming Language :: Python :: 3.12",
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.1.0'
32
- __version_tuple__ = version_tuple = (0, 1, 0)
31
+ __version__ = version = '0.1.1'
32
+ __version_tuple__ = version_tuple = (0, 1, 1)
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -124,7 +124,7 @@ def _fetch_seed_dataset_column_names_from_datastore(
124
124
  raise InvalidFileFormatError(f"🛑 Unsupported file type: {filename!r}")
125
125
 
126
126
  datastore_settings = resolve_datastore_settings(datastore_settings)
127
- fs = HfFileSystem(endpoint=datastore_settings.endpoint, token=datastore_settings.token)
127
+ fs = HfFileSystem(endpoint=datastore_settings.endpoint, token=datastore_settings.token, skip_instance_cache=True)
128
128
 
129
129
  with fs.open(f"datasets/{repo_id}/{filename}") as f:
130
130
  return get_file_column_names(f, file_type)
@@ -4,6 +4,7 @@
4
4
 
5
5
  from functools import lru_cache
6
6
  import logging
7
+ import os
7
8
  from pathlib import Path
8
9
  from typing import Any, Literal, Optional
9
10
 
@@ -15,7 +16,6 @@ from .utils.constants import (
15
16
  PREDEFINED_PROVIDERS,
16
17
  PREDEFINED_PROVIDERS_MODEL_MAP,
17
18
  )
18
- from .utils.info import ConfigBuilderInfo, InfoType, InterfaceInfo
19
19
  from .utils.io_helpers import load_config_file, save_config_file
20
20
 
21
21
  logger = logging.getLogger(__name__)
@@ -75,7 +75,15 @@ def get_default_model_configs() -> list[ModelConfig]:
75
75
  config_dict = load_config_file(MODEL_CONFIGS_FILE_PATH)
76
76
  if "model_configs" in config_dict:
77
77
  return [ModelConfig.model_validate(mc) for mc in config_dict["model_configs"]]
78
- raise FileNotFoundError(f"Default model configs file not found at {str(MODEL_CONFIGS_FILE_PATH)!r}")
78
+ return []
79
+
80
+
81
+ def get_defaul_model_providers_missing_api_keys() -> list[str]:
82
+ missing_api_keys = []
83
+ for predefined_provider in PREDEFINED_PROVIDERS:
84
+ if os.environ.get(predefined_provider["api_key"]) is None:
85
+ missing_api_keys.append(predefined_provider["api_key"])
86
+ return missing_api_keys
79
87
 
80
88
 
81
89
  def get_default_providers() -> list[ModelProvider]:
@@ -91,21 +99,17 @@ def get_default_provider_name() -> Optional[str]:
91
99
 
92
100
  def resolve_seed_default_model_settings() -> None:
93
101
  if not MODEL_CONFIGS_FILE_PATH.exists():
94
- logger.info(
102
+ logger.debug(
95
103
  f"🍾 Default model configs were not found, so writing the following to {str(MODEL_CONFIGS_FILE_PATH)!r}"
96
104
  )
97
- config_builder_info = ConfigBuilderInfo(model_configs=get_builtin_model_configs())
98
- config_builder_info.display(info_type=InfoType.MODEL_CONFIGS)
99
105
  save_config_file(
100
106
  MODEL_CONFIGS_FILE_PATH, {"model_configs": [mc.model_dump() for mc in get_builtin_model_configs()]}
101
107
  )
102
108
 
103
109
  if not MODEL_PROVIDERS_FILE_PATH.exists():
104
- logger.info(
110
+ logger.debug(
105
111
  f"🪄 Default model providers were not found, so writing the following to {str(MODEL_PROVIDERS_FILE_PATH)!r}"
106
112
  )
107
- interface_info = InterfaceInfo(model_providers=get_builtin_model_providers())
108
- interface_info.display(info_type=InfoType.MODEL_PROVIDERS)
109
113
  save_config_file(
110
114
  MODEL_PROVIDERS_FILE_PATH, {"providers": [p.model_dump() for p in get_builtin_model_providers()]}
111
115
  )