data-designer 0.3.3__tar.gz → 0.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (435) hide show
  1. {data_designer-0.3.3 → data_designer-0.3.5}/.gitignore +4 -1
  2. {data_designer-0.3.3 → data_designer-0.3.5}/AGENTS.md +143 -2
  3. {data_designer-0.3.3 → data_designer-0.3.5}/Makefile +53 -12
  4. {data_designer-0.3.3 → data_designer-0.3.5}/PKG-INFO +19 -4
  5. {data_designer-0.3.3 → data_designer-0.3.5}/README.md +18 -3
  6. {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/1-the-basics.ipynb +31 -31
  7. {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +29 -29
  8. {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +27 -27
  9. {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/4-providing-images-as-context.ipynb +35 -35
  10. data_designer-0.3.5/docs/images/top-models.png +0 -0
  11. {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/example.md +9 -32
  12. {data_designer-0.3.3 → data_designer-0.3.5}/docs/scripts/generate_colab_notebooks.py +1 -1
  13. {data_designer-0.3.3 → data_designer-0.3.5}/scripts/update_license_headers.py +1 -1
  14. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/__init__.py +2 -0
  15. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/_version.py +2 -2
  16. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/__init__.py +2 -0
  17. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/download.py +2 -0
  18. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/list.py +2 -0
  19. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/models.py +2 -0
  20. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/providers.py +2 -0
  21. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/reset.py +2 -0
  22. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/__init__.py +2 -0
  23. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/download_controller.py +2 -0
  24. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/model_controller.py +6 -1
  25. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/provider_controller.py +6 -1
  26. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/__init__.py +2 -0
  27. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/builder.py +2 -0
  28. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/field.py +2 -0
  29. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/form.py +2 -0
  30. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/model_builder.py +2 -0
  31. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/provider_builder.py +2 -0
  32. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/main.py +2 -0
  33. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/__init__.py +2 -0
  34. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/base.py +2 -0
  35. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/model_repository.py +2 -0
  36. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/persona_repository.py +2 -0
  37. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/provider_repository.py +2 -0
  38. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/__init__.py +2 -0
  39. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/download_service.py +2 -0
  40. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/model_service.py +2 -0
  41. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/provider_service.py +2 -0
  42. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/ui.py +2 -0
  43. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/utils.py +2 -0
  44. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/column_profilers.py +2 -0
  45. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/column_statistics.py +8 -5
  46. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/dataset_profiler.py +9 -3
  47. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/utils/errors.py +2 -0
  48. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/utils/reporting.py +7 -3
  49. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/base.py +1 -0
  50. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/column_configs.py +77 -7
  51. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/column_types.py +33 -36
  52. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/dataset_builders.py +2 -0
  53. data_designer-0.3.5/src/data_designer/config/dataset_metadata.py +18 -0
  54. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/default_model_settings.py +1 -0
  55. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/errors.py +2 -0
  56. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/exports.py +2 -0
  57. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/interface.py +3 -2
  58. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/models.py +7 -2
  59. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/preview_results.py +9 -1
  60. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/processors.py +2 -0
  61. data_designer-0.3.5/src/data_designer/config/run_config.py +48 -0
  62. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/sampler_constraints.py +2 -0
  63. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/sampler_params.py +7 -2
  64. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed.py +2 -0
  65. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed_source.py +9 -3
  66. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed_source_types.py +2 -0
  67. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/constants.py +2 -0
  68. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/errors.py +2 -0
  69. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/info.py +2 -0
  70. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/io_helpers.py +8 -3
  71. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/misc.py +2 -2
  72. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/numerical_helpers.py +2 -0
  73. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/type_helpers.py +2 -0
  74. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/visualization.py +19 -11
  75. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/validator_params.py +2 -0
  76. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/base.py +9 -8
  77. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
  78. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/registry.py +2 -0
  79. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_statistics.py +5 -2
  80. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/dataset_profiler.py +12 -9
  81. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/errors.py +2 -0
  82. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
  83. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
  84. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/base.py +26 -14
  85. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/embedding.py +4 -11
  86. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/expression.py +7 -16
  87. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/llm_completion.py +13 -47
  88. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/samplers.py +8 -14
  89. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
  90. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/validation.py +9 -20
  91. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/registry.py +2 -0
  92. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/errors.py +2 -0
  93. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/generator_classification.py +2 -0
  94. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
  95. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
  96. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/compiler.py +3 -6
  97. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/configurable_task.py +12 -13
  98. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/artifact_storage.py +87 -8
  99. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/column_wise_builder.py +34 -35
  100. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/errors.py +2 -0
  101. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
  102. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/concurrency.py +13 -4
  103. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
  104. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/dag.py +7 -2
  105. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +35 -25
  106. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/errors.py +2 -0
  107. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/errors.py +2 -0
  108. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/model_provider.py +2 -0
  109. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/errors.py +23 -31
  110. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/facade.py +12 -9
  111. data_designer-0.3.5/src/data_designer/engine/models/factory.py +42 -0
  112. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/litellm_overrides.py +16 -11
  113. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/errors.py +2 -0
  114. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/parser.py +2 -2
  115. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/postprocessors.py +1 -0
  116. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/tag_parsers.py +2 -0
  117. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/types.py +2 -0
  118. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/recipes/base.py +2 -0
  119. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/recipes/response_recipes.py +2 -0
  120. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/registry.py +11 -18
  121. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/telemetry.py +6 -2
  122. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/ast.py +2 -0
  123. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/environment.py +2 -0
  124. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/exceptions.py +2 -0
  125. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/record.py +2 -0
  126. data_designer-0.3.5/src/data_designer/engine/processing/gsonschema/exceptions.py +15 -0
  127. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
  128. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/types.py +2 -0
  129. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/validators.py +10 -6
  130. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/base.py +1 -5
  131. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/drop_columns.py +7 -10
  132. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/registry.py +2 -0
  133. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/schema_transform.py +7 -10
  134. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/utils.py +7 -3
  135. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/base.py +2 -0
  136. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/data_designer_registry.py +2 -0
  137. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/errors.py +2 -0
  138. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_dataset_generator.py +6 -2
  139. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_dataset_repository.py +8 -5
  140. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_storage.py +2 -0
  141. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/resource_provider.py +20 -1
  142. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/seed_reader.py +7 -2
  143. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/column.py +2 -0
  144. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/constraints.py +8 -2
  145. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/base.py +10 -7
  146. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
  147. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
  148. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
  149. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
  150. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/errors.py +2 -0
  151. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
  152. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/person.py +2 -0
  153. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
  154. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/errors.py +2 -0
  155. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/generator.py +5 -4
  156. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/jinja_utils.py +7 -3
  157. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/people_gen.py +7 -7
  158. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/person_constants.py +2 -0
  159. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/schema.py +5 -1
  160. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/schema_builder.py +2 -0
  161. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/utils.py +7 -1
  162. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/secret_resolver.py +2 -0
  163. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validation.py +2 -2
  164. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/__init__.py +2 -0
  165. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/base.py +2 -0
  166. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/local_callable.py +7 -2
  167. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/python.py +7 -1
  168. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/remote.py +7 -1
  169. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/sql.py +8 -3
  170. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/errors.py +2 -0
  171. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/essentials/__init__.py +2 -0
  172. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/data_designer.py +36 -39
  173. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/errors.py +2 -0
  174. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/results.py +9 -2
  175. data_designer-0.3.5/src/data_designer/lazy_heavy_imports.py +54 -0
  176. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/logging.py +2 -0
  177. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/__init__.py +2 -0
  178. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/errors.py +2 -0
  179. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/plugin.py +0 -1
  180. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/registry.py +2 -0
  181. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/__init__.py +2 -0
  182. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/stubs.py +21 -43
  183. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/utils.py +2 -0
  184. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_columns.py +21 -0
  185. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_misc.py +4 -4
  186. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/column_profilers/test_base.py +1 -17
  187. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_dataset_profiler.py +2 -32
  188. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_column_generator_base.py +5 -23
  189. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_embedding.py +5 -7
  190. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_expression.py +0 -8
  191. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_llm_completion_generators.py +22 -21
  192. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_seed_dataset.py +2 -3
  193. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_validation.py +3 -0
  194. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/conftest.py +1 -0
  195. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_artifact_storage.py +142 -22
  196. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_column_wise_builder.py +58 -43
  197. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_concurrency.py +44 -0
  198. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +30 -2
  199. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/conftest.py +2 -1
  200. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_registry.py +3 -2
  201. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_drop_columns.py +0 -7
  202. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_schema_transform.py +0 -7
  203. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/conftest.py +0 -1
  204. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_resource_provider.py +5 -7
  205. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_compiler.py +3 -3
  206. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_configurable_task.py +6 -41
  207. data_designer-0.3.5/tests/engine/test_dataset_metadata.py +56 -0
  208. {data_designer-0.3.3 → data_designer-0.3.5}/tests/interface/test_data_designer.py +21 -20
  209. {data_designer-0.3.3 → data_designer-0.3.5}/tests/interface/test_results.py +45 -20
  210. data_designer-0.3.5/tests/test_import_perf.py +64 -0
  211. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/config.py +12 -0
  212. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/impl.py +2 -14
  213. {data_designer-0.3.3 → data_designer-0.3.5}/uv.lock +82 -82
  214. data_designer-0.3.3/src/data_designer/config/run_config.py +0 -34
  215. data_designer-0.3.3/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -8
  216. {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
  217. {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  218. {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/development-task.yml +0 -0
  219. {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
  220. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/build-docs.yml +0 -0
  221. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/build-notebooks.yml +0 -0
  222. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/check-colab-notebooks.yml +0 -0
  223. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/ci.yml +0 -0
  224. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/dco-assistant.yml +0 -0
  225. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/pack-tutorials.yml +0 -0
  226. {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/semantic-pull-requests.yml +0 -0
  227. {data_designer-0.3.3 → data_designer-0.3.5}/.pre-commit-config.yaml +0 -0
  228. {data_designer-0.3.3 → data_designer-0.3.5}/CLAUDE.md +0 -0
  229. {data_designer-0.3.3 → data_designer-0.3.5}/CODE_OF_CONDUCT.md +0 -0
  230. {data_designer-0.3.3 → data_designer-0.3.5}/CONTRIBUTING.md +0 -0
  231. {data_designer-0.3.3 → data_designer-0.3.5}/DCO +0 -0
  232. {data_designer-0.3.3 → data_designer-0.3.5}/LICENSE +0 -0
  233. {data_designer-0.3.3 → data_designer-0.3.5}/VERSIONING.md +0 -0
  234. {data_designer-0.3.3 → data_designer-0.3.5}/docs/CONTRIBUTING.md +0 -0
  235. {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/palette-favicon.png +0 -0
  236. {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
  237. {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
  238. {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
  239. {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
  240. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/analysis.md +0 -0
  241. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/column_configs.md +0 -0
  242. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/config_builder.md +0 -0
  243. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/data_designer_config.md +0 -0
  244. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/models.md +0 -0
  245. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/processors.md +0 -0
  246. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/sampler_params.md +0 -0
  247. {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/validator_params.md +0 -0
  248. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/columns.md +0 -0
  249. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
  250. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/custom-model-settings.md +0 -0
  251. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/default-model-settings.md +0 -0
  252. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/inference-parameters.md +0 -0
  253. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/model-configs.md +0 -0
  254. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/model-providers.md +0 -0
  255. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/person_sampling.md +0 -0
  256. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/processors.md +0 -0
  257. {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/validators.md +0 -0
  258. {data_designer-0.3.3 → data_designer-0.3.5}/docs/css/mkdocstrings.css +0 -0
  259. {data_designer-0.3.3 → data_designer-0.3.5}/docs/css/style.css +0 -0
  260. {data_designer-0.3.3 → data_designer-0.3.5}/docs/index.md +0 -0
  261. {data_designer-0.3.3 → data_designer-0.3.5}/docs/installation.md +0 -0
  262. {data_designer-0.3.3 → data_designer-0.3.5}/docs/js/toc-toggle.js +0 -0
  263. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/1-the-basics.py +0 -0
  264. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
  265. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
  266. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/4-providing-images-as-context.py +0 -0
  267. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/README.md +0 -0
  268. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/_README.md +0 -0
  269. {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/_pyproject.toml +0 -0
  270. {data_designer-0.3.3 → data_designer-0.3.5}/docs/overrides/main.html +0 -0
  271. {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/available.md +0 -0
  272. {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/overview.md +0 -0
  273. {data_designer-0.3.3 → data_designer-0.3.5}/docs/quick-start.md +0 -0
  274. {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/cards.md +0 -0
  275. {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/code_generation/text_to_python.md +0 -0
  276. {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/code_generation/text_to_sql.md +0 -0
  277. {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
  278. {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
  279. {data_designer-0.3.3 → data_designer-0.3.5}/mkdocs.yml +0 -0
  280. {data_designer-0.3.3 → data_designer-0.3.5}/pyproject.toml +1 -1
  281. {data_designer-0.3.3 → data_designer-0.3.5}/scripts/test_license_headers.py +0 -0
  282. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/README.md +0 -0
  283. {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins → data_designer-0.3.5/src/data_designer/cli/commands}/__init__.py +0 -0
  284. {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins/column_generator → data_designer-0.3.5/src/data_designer/config}/__init__.py +0 -0
  285. {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins/seed_reader → data_designer-0.3.5/src/data_designer/config/analysis}/__init__.py +0 -0
  286. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/config_builder.py +0 -0
  287. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/data_designer_config.py +0 -0
  288. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/code_lang.py +0 -0
  289. {data_designer-0.3.3/src/data_designer/cli/commands → data_designer-0.3.5/src/data_designer/engine}/__init__.py +0 -0
  290. {data_designer-0.3.3/src/data_designer/config → data_designer-0.3.5/src/data_designer/engine/column_generators}/__init__.py +0 -0
  291. {data_designer-0.3.3/src/data_designer/config/analysis → data_designer-0.3.5/src/data_designer/engine/column_generators/generators}/__init__.py +0 -0
  292. {data_designer-0.3.3/src/data_designer/engine → data_designer-0.3.5/src/data_designer/engine/dataset_builders/utils}/__init__.py +0 -0
  293. {data_designer-0.3.3/src/data_designer/engine/column_generators → data_designer-0.3.5/src/data_designer/engine/models}/__init__.py +0 -0
  294. {data_designer-0.3.3/src/data_designer/engine/column_generators/generators → data_designer-0.3.5/src/data_designer/engine/models/parsers}/__init__.py +0 -0
  295. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/usage.py +0 -0
  296. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/utils.py +0 -0
  297. {data_designer-0.3.3/src/data_designer/engine/dataset_builders/utils → data_designer-0.3.5/src/data_designer/engine/processing/ginja}/__init__.py +0 -0
  298. {data_designer-0.3.3/src/data_designer/engine/models → data_designer-0.3.5/src/data_designer/engine/processing/gsonschema}/__init__.py +0 -0
  299. {data_designer-0.3.3/src/data_designer/engine/models/parsers → data_designer-0.3.5/src/data_designer/engine/sampling_gen/entities}/__init__.py +0 -0
  300. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  301. {data_designer-0.3.3/src/data_designer/engine/processing/ginja → data_designer-0.3.5/src/data_designer/interface}/__init__.py +0 -0
  302. {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugin_manager.py +0 -0
  303. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_download_command.py +0 -0
  304. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_list_command.py +0 -0
  305. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_models_command.py +0 -0
  306. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_providers_command.py +0 -0
  307. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_reset_command.py +0 -0
  308. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/conftest.py +0 -0
  309. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_download_controller.py +0 -0
  310. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_model_controller.py +0 -0
  311. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_provider_controller.py +0 -0
  312. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_field.py +0 -0
  313. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_form.py +0 -0
  314. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_model_builder.py +0 -0
  315. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_provider_builder.py +0 -0
  316. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_model_repository.py +0 -0
  317. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_persona_repository.py +0 -0
  318. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_provider_repository.py +0 -0
  319. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_download_service.py +0 -0
  320. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_model_service.py +0 -0
  321. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_provider_service.py +0 -0
  322. {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/test_cli_utils.py +0 -0
  323. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/conftest.py +0 -0
  324. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/test_column_statistics.py +0 -0
  325. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
  326. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/utils/test_reporting.py +0 -0
  327. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_config_builder.py +0 -0
  328. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_data_designer_config.py +0 -0
  329. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_default_model_settings.py +0 -0
  330. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_models.py +0 -0
  331. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_processors.py +0 -0
  332. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_sampler_constraints.py +0 -0
  333. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_sampler_params.py +0 -0
  334. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_seed.py +0 -0
  335. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_seed_source.py +0 -0
  336. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_validator_params.py +0 -0
  337. {data_designer-0.3.3/src/data_designer/engine/processing/gsonschema → data_designer-0.3.5/tests/config/utils}/__init__.py +0 -0
  338. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_code_lang.py +0 -0
  339. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_info.py +0 -0
  340. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_io_helpers.py +0 -0
  341. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_type_helpers.py +0 -0
  342. {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_visualization.py +0 -0
  343. {data_designer-0.3.3 → data_designer-0.3.5}/tests/conftest.py +0 -0
  344. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  345. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/conftest.py +0 -0
  346. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  347. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  348. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  349. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  350. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_errors.py +0 -0
  351. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  352. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  353. {data_designer-0.3.3/src/data_designer/engine/sampling_gen/entities → data_designer-0.3.5/tests/engine/column_generators/generators}/__init__.py +0 -0
  354. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  355. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/test_registry.py +0 -0
  356. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  357. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
  358. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  359. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  360. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  361. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  362. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  363. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_parser.py +0 -0
  364. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  365. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  366. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  367. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  368. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  369. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/stub_secrets.json +0 -0
  370. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_facade.py +0 -0
  371. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_litellm_overrides.py +0 -0
  372. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_errors.py +0 -0
  373. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_utils.py +0 -0
  374. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_usage.py +0 -0
  375. {data_designer-0.3.3/src/data_designer/interface → data_designer-0.3.5/tests/engine/processing}/__init__.py +0 -0
  376. {data_designer-0.3.3/tests/config/utils → data_designer-0.3.5/tests/engine/processing/ginja}/__init__.py +0 -0
  377. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_ast.py +0 -0
  378. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_environment.py +0 -0
  379. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  380. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_record.py +0 -0
  381. {data_designer-0.3.3/tests/engine/column_generators/generators → data_designer-0.3.5/tests/engine/processing/gsonschema}/__init__.py +0 -0
  382. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  383. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  384. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_types.py +0 -0
  385. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  386. {data_designer-0.3.3/tests/engine/processing → data_designer-0.3.5/tests/engine/processing/processors}/__init__.py +0 -0
  387. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_registry.py +0 -0
  388. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/test_utils.py +0 -0
  389. {data_designer-0.3.3/tests/engine/processing/ginja → data_designer-0.3.5/tests/engine/registry}/__init__.py +0 -0
  390. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/conftest.py +0 -0
  391. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_base.py +0 -0
  392. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_data_designer_registry.py +0 -0
  393. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_errors.py +0 -0
  394. {data_designer-0.3.3/tests/engine/processing/gsonschema → data_designer-0.3.5/tests/engine/resources}/__init__.py +0 -0
  395. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  396. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  397. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_storage.py +0 -0
  398. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_seed_reader.py +0 -0
  399. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/conftest.py +0 -0
  400. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  401. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  402. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  403. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  404. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  405. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  406. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_column.py +0 -0
  407. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_constraints.py +0 -0
  408. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_generator.py +0 -0
  409. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  410. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  411. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_schema.py +0 -0
  412. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_utils.py +0 -0
  413. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_engine_errors.py +0 -0
  414. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_model_provider.py +0 -0
  415. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_secret_resolver.py +0 -0
  416. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_validation.py +0 -0
  417. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_local_callable.py +0 -0
  418. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_python.py +0 -0
  419. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_remote.py +0 -0
  420. {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_sql.py +0 -0
  421. {data_designer-0.3.3 → data_designer-0.3.5}/tests/essentials/test_init.py +0 -0
  422. {data_designer-0.3.3 → data_designer-0.3.5}/tests/plugins/test_plugin.py +0 -0
  423. {data_designer-0.3.3 → data_designer-0.3.5}/tests/plugins/test_plugin_registry.py +0 -0
  424. {data_designer-0.3.3 → data_designer-0.3.5}/tests/test_logging.py +0 -0
  425. {data_designer-0.3.3 → data_designer-0.3.5}/tests/test_plugin_manager.py +0 -0
  426. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/pyproject.toml +0 -0
  427. {data_designer-0.3.3/tests/engine/processing/processors → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins}/__init__.py +0 -0
  428. {data_designer-0.3.3/tests/engine/registry → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator}/__init__.py +0 -0
  429. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -0
  430. {data_designer-0.3.3/tests/engine/resources → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader}/__init__.py +0 -0
  431. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -0
  432. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -0
  433. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -0
  434. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/tests/test_e2e.py +0 -0
  435. {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/tests/test_seed.csv +0 -0
@@ -92,4 +92,7 @@ docs/notebook_source/*.ipynb
92
92
  docs/notebook_source/*.csv
93
93
  docs/**/artifacts/
94
94
 
95
- e2e_tests/uv.lock
95
+ tests_e2e/uv.lock
96
+
97
+ # Performance profiling
98
+ perf_*.txt
@@ -158,12 +158,13 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
158
158
  ### Import Style
159
159
 
160
160
  - **ALWAYS** use absolute imports, never relative imports
161
- - Place imports at module level, not inside functions
161
+ - Place imports at module level, not inside functions (exception: it is unavoidable for performance reasons)
162
162
  - Import sorting is handled by `ruff`'s `isort` - imports should be grouped and sorted:
163
163
  1. Standard library imports
164
- 2. Third-party imports
164
+ 2. Third-party imports (use `lazy_heavy_imports` for heavy libraries)
165
165
  3. First-party imports (`data_designer`)
166
166
  - Use standard import conventions (enforced by `ICN`)
167
+ - See [Lazy Loading and TYPE_CHECKING](#lazy-loading-and-type_checking) section for optimization guidelines
167
168
 
168
169
  ```python
169
170
  # Good
@@ -184,6 +185,146 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
184
185
  path = Path(filename)
185
186
  ```
186
187
 
188
+ ### Lazy Loading and TYPE_CHECKING
189
+
190
+ This project uses lazy loading for heavy third-party dependencies to optimize import performance.
191
+
192
+ #### When to Use Lazy Loading
193
+
194
+ **Heavy third-party libraries** (>100ms import cost) should be lazy-loaded via `lazy_heavy_imports.py`:
195
+
196
+ ```python
197
+ # ❌ Don't import directly
198
+ import pandas as pd
199
+ import numpy as np
200
+
201
+ # ✅ Use lazy loading with IDE support
202
+ from typing import TYPE_CHECKING
203
+ from data_designer.lazy_heavy_imports import pd, np
204
+
205
+ if TYPE_CHECKING:
206
+ import pandas as pd # For IDE autocomplete and type hints
207
+ import numpy as np
208
+ ```
209
+
210
+ This pattern provides:
211
+ - Runtime lazy loading (fast startup)
212
+ - Full IDE support (autocomplete, type hints)
213
+ - Type checker validation
214
+
215
+ **See [lazy_heavy_imports.py](src/data_designer/lazy_heavy_imports.py) for the current list of lazy-loaded libraries.**
216
+
217
+ #### Adding New Heavy Dependencies
218
+
219
+ If you add a new dependency with significant import cost (>100ms):
220
+
221
+ 1. **Add to `lazy_heavy_imports.py`:**
222
+ ```python
223
+ _LAZY_IMPORTS = {
224
+ # ... existing entries ...
225
+ "your_lib": "your_library_name",
226
+ }
227
+ ```
228
+
229
+ 2. **Update imports across codebase:**
230
+ ```python
231
+ from typing import TYPE_CHECKING
232
+ from data_designer.lazy_heavy_imports import your_lib
233
+
234
+ if TYPE_CHECKING:
235
+ import your_library_name as your_lib # For IDE support
236
+ ```
237
+
238
+ 3. **Verify with performance test:**
239
+ ```bash
240
+ make perf-import CLEAN=1
241
+ ```
242
+
243
+ #### Using TYPE_CHECKING Blocks
244
+
245
+ `TYPE_CHECKING` blocks defer imports that are only needed for type hints, preventing circular dependencies and reducing import time.
246
+
247
+ **For internal data_designer imports:**
248
+
249
+ ```python
250
+ from __future__ import annotations # Always include at top
251
+
252
+ from typing import TYPE_CHECKING
253
+
254
+ # Runtime imports
255
+ from pathlib import Path
256
+ from data_designer.config.base import ConfigBase
257
+
258
+ if TYPE_CHECKING:
259
+ # Type-only imports - only visible to type checkers
260
+ from data_designer.engine.models.facade import ModelFacade
261
+
262
+ def get_model(model: ModelFacade) -> str:
263
+ return model.name
264
+ ```
265
+
266
+ **For lazy-loaded libraries (see pattern in "When to Use Lazy Loading" above):**
267
+ - Import from `lazy_heavy_imports` for runtime
268
+ - Add full import in `TYPE_CHECKING` block for IDE support
269
+
270
+ **Rules for TYPE_CHECKING:**
271
+
272
+ ✅ **DO put in TYPE_CHECKING:**
273
+ - Internal `data_designer` imports used **only** in type hints
274
+ - Imports that would cause circular dependencies
275
+ - **Full imports of lazy-loaded libraries for IDE support** (e.g., `import pandas as pd` in addition to runtime `from data_designer.lazy_heavy_imports import pd`)
276
+
277
+ ❌ **DON'T put in TYPE_CHECKING:**
278
+ - **Standard library imports** (`Path`, `Any`, `Callable`, `Literal`, `TypeAlias`, etc.)
279
+ - **Pydantic model types** used in field definitions (needed at runtime for validation)
280
+ - **Types used in discriminated unions** (Pydantic needs them at runtime)
281
+ - **Any import used at runtime** (instantiation, method calls, base classes, etc.)
282
+
283
+ **Examples:**
284
+
285
+ ```python
286
+ # ✅ CORRECT - Lazy-loaded library with IDE support
287
+ from typing import TYPE_CHECKING
288
+ from data_designer.lazy_heavy_imports import pd
289
+
290
+ if TYPE_CHECKING:
291
+ import pandas as pd # IDE gets full type hints
292
+
293
+ def load_data(path: str) -> pd.DataFrame: # IDE understands pd.DataFrame
294
+ return pd.read_csv(path)
295
+
296
+ # ✅ CORRECT - Standard library NOT in TYPE_CHECKING
297
+ from pathlib import Path
298
+ from typing import Any
299
+
300
+ def process_file(path: Path) -> Any:
301
+ return path.read_text()
302
+
303
+ # ✅ CORRECT - Internal type-only import
304
+ from typing import TYPE_CHECKING
305
+
306
+ if TYPE_CHECKING:
307
+ from data_designer.engine.models.facade import ModelFacade
308
+
309
+ def get_model(model: ModelFacade) -> str: # Only used in type hint
310
+ return model.name
311
+
312
+ # ❌ INCORRECT - Pydantic field type in TYPE_CHECKING
313
+ from typing import TYPE_CHECKING
314
+
315
+ if TYPE_CHECKING:
316
+ from data_designer.config.models import ModelConfig # Wrong!
317
+
318
+ class MyConfig(BaseModel):
319
+ model: ModelConfig # Pydantic needs this at runtime!
320
+
321
+ # ✅ CORRECT - Pydantic field type at runtime
322
+ from data_designer.config.models import ModelConfig
323
+
324
+ class MyConfig(BaseModel):
325
+ model: ModelConfig
326
+ ```
327
+
187
328
  ### Naming Conventions (PEP 8)
188
329
 
189
330
  Follow PEP 8 naming conventions:
@@ -45,14 +45,25 @@ help:
45
45
  @echo " check-license-headers - Check if all files have license headers"
46
46
  @echo " update-license-headers - Add license headers to all files"
47
47
  @echo ""
48
+ @echo "⚡ Performance:"
49
+ @echo " perf-import - Profile import time and show summary"
50
+ @echo " perf-import CLEAN=1 - Clean cache, then profile import time"
51
+ @echo " perf-import NOFILE=1 - Profile without writing to file (for CI)"
52
+ @echo ""
48
53
  @echo "═════════════════════════════════════════════════════════════"
49
54
  @echo "💡 Tip: Run 'make <command>' to execute any command above"
50
55
  @echo ""
51
56
 
52
- clean:
53
- @echo "🧹 Cleaning up coverage reports and cache files..."
54
- rm -rf htmlcov .coverage .pytest_cache
57
+ clean-pycache:
58
+ @echo "🧹 Cleaning up Python cache files..."
55
59
  find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
60
+ find . -type f -name "*.pyc" -delete 2>/dev/null || true
61
+ @echo "✅ Cache cleaned!"
62
+
63
+ clean: clean-pycache
64
+ @echo "🧹 Cleaning up coverage reports and test cache..."
65
+ rm -rf htmlcov .coverage .pytest_cache
66
+ @echo "✅ Cleaned!"
56
67
 
57
68
  coverage:
58
69
  @echo "📊 Running tests with coverage analysis..."
@@ -67,22 +78,22 @@ check-all-fix: format lint-fix
67
78
 
68
79
  format:
69
80
  @echo "📐 Formatting code with ruff..."
70
- uv run ruff format src/ tests/ scripts/ e2e_tests/ --exclude '**/src/data_designer/_version.py'
81
+ uv run ruff format src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
71
82
  @echo "✅ Formatting complete!"
72
83
 
73
84
  format-check:
74
85
  @echo "📐 Checking code formatting with ruff..."
75
- uv run ruff format --check src/ tests/ scripts/ e2e_tests/ --exclude '**/src/data_designer/_version.py'
86
+ uv run ruff format --check src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
76
87
  @echo "✅ Formatting check complete! Run 'make format' to auto-fix issues."
77
88
 
78
89
  lint:
79
90
  @echo "🔍 Linting code with ruff..."
80
- uv run ruff check --output-format=full src/ tests/ scripts/ e2e_tests/ --exclude '**/src/data_designer/_version.py'
91
+ uv run ruff check --output-format=full src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
81
92
  @echo "✅ Linting complete! Run 'make lint-fix' to auto-fix issues."
82
93
 
83
94
  lint-fix:
84
95
  @echo "🔍 Fixing linting issues with ruff..."
85
- uv run ruff check --fix src/ tests/ scripts/ e2e_tests/ --exclude '**/src/data_designer/_version.py'
96
+ uv run ruff check --fix src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
86
97
  @echo "✅ Linting with autofix complete!"
87
98
 
88
99
  test:
@@ -91,9 +102,9 @@ test:
91
102
 
92
103
  test-e2e:
93
104
  @echo "🧹 Cleaning e2e test environment..."
94
- rm -rf e2e_tests/uv.lock e2e_tests/.pycache e2e_tests/.venv
105
+ rm -rf tests_e2e/uv.lock tests_e2e/.pycache tests_e2e/.venv
95
106
  @echo "🧪 Running e2e tests..."
96
- uv run --no-cache --refresh --directory e2e_tests pytest -s
107
+ uv run --no-cache --refresh --directory tests_e2e pytest -s
97
108
 
98
109
  test-run-tutorials:
99
110
  @echo "🧪 Running tutorials as e2e tests..."
@@ -101,7 +112,7 @@ test-run-tutorials:
101
112
  trap "rm -rf $$TUTORIAL_WORKDIR" EXIT; \
102
113
  for f in docs/notebook_source/*.py; do \
103
114
  echo " 📓 Running $$f..."; \
104
- (cd "$$TUTORIAL_WORKDIR" && uv run python "$(REPO_PATH)/$$f") || exit 1; \
115
+ (cd "$$TUTORIAL_WORKDIR" && uv run --project "$(REPO_PATH)" --group notebooks python "$(REPO_PATH)/$$f") || exit 1; \
105
116
  done; \
106
117
  echo "🧹 Cleaning up tutorial artifacts..."; \
107
118
  rm -rf "$$TUTORIAL_WORKDIR"; \
@@ -113,7 +124,7 @@ test-run-recipes:
113
124
  trap "rm -rf $$RECIPE_WORKDIR" EXIT; \
114
125
  for f in docs/assets/recipes/**/*.py; do \
115
126
  echo " 📜 Running $$f..."; \
116
- (cd "$$RECIPE_WORKDIR" && uv run python "$(REPO_PATH)/$$f" --model-alias nvidia-text --artifact-path "$$RECIPE_WORKDIR" --num-records 5) || exit 1; \
127
+ (cd "$$RECIPE_WORKDIR" && uv run --project "$(REPO_PATH)" --group notebooks python "$(REPO_PATH)/$$f" --model-alias nvidia-text --artifact-path "$$RECIPE_WORKDIR" --num-records 5) || exit 1; \
117
128
  done; \
118
129
  echo "🧹 Cleaning up recipe artifacts..."; \
119
130
  rm -rf "$$RECIPE_WORKDIR"; \
@@ -168,4 +179,34 @@ install-dev-notebooks:
168
179
  $(call install-pre-commit-hooks)
169
180
  @echo "✅ Dev + notebooks installation complete!"
170
181
 
171
- .PHONY: clean coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
182
+ perf-import:
183
+ ifdef CLEAN
184
+ @$(MAKE) clean-pycache
185
+ endif
186
+ @echo "⚡ Profiling import time for data_designer.essentials..."
187
+ ifdef NOFILE
188
+ @PERF_OUTPUT=$$(uv run python -X importtime -c "import data_designer.essentials" 2>&1); \
189
+ echo "$$PERF_OUTPUT"; \
190
+ echo ""; \
191
+ echo "Summary:"; \
192
+ echo "$$PERF_OUTPUT" | tail -1 | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
193
+ echo ""; \
194
+ echo "💡 Top 10 slowest imports:"; \
195
+ printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
196
+ printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
197
+ echo "$$PERF_OUTPUT" | grep "import time:" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
198
+ else
199
+ @PERF_FILE="perf_import_$$(date +%Y%m%d_%H%M%S).txt"; \
200
+ uv run python -X importtime -c "import data_designer.essentials" > "$$PERF_FILE" 2>&1; \
201
+ echo "📊 Import profile saved to $$PERF_FILE"; \
202
+ echo ""; \
203
+ echo "Summary:"; \
204
+ tail -1 "$$PERF_FILE" | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
205
+ echo ""; \
206
+ echo "💡 Top 10 slowest imports:"; \
207
+ printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
208
+ printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
209
+ grep "import time:" "$$PERF_FILE" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
210
+ endif
211
+
212
+ .PHONY: clean clean-pycache coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks perf-import
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer
3
- Version: 0.3.3
3
+ Version: 0.3.5
4
4
  Summary: General framework for synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  License-File: LICENSE
@@ -87,12 +87,19 @@ make install
87
87
 
88
88
  ### 2. Set your API key
89
89
 
90
- Get your API key from [build.nvidia.com](https://build.nvidia.com) or [OpenAI](https://platform.openai.com/api-keys):
90
+ Start with one of our default model providers:
91
91
 
92
+ - [NVIDIA Build API](https://build.nvidia.com)
93
+ - [OpenAI](https://platform.openai.com/api-keys)
94
+ - [OpenRouter](https://openrouter.ai)
95
+
96
+ Grab your API key(s) using the above links and set one or more of the following environment variables:
92
97
  ```bash
93
98
  export NVIDIA_API_KEY="your-api-key-here"
94
- # Or use OpenAI
99
+
95
100
  export OPENAI_API_KEY="your-openai-api-key-here"
101
+
102
+ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
96
103
  ```
97
104
 
98
105
  ### 3. Start generating data!
@@ -127,7 +134,7 @@ config_builder.add_column(
127
134
  LLMTextColumnConfig(
128
135
  name="review",
129
136
  model_alias="nvidia-text",
130
- prompt="""Write a brief product review for a {{ product_category }} item you recently purchased.""",
137
+ prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
131
138
  )
132
139
  )
133
140
 
@@ -193,6 +200,14 @@ The value `openai/gpt-oss-20b` would be collected.
193
200
 
194
201
  To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
195
202
 
203
+ ### Top Models
204
+
205
+ This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
206
+
207
+ ![Top models used for synthetic data generation](docs/images/top-models.png)
208
+
209
+ _Last updated on 1/14/2026_
210
+
196
211
  ---
197
212
 
198
213
  ## License
@@ -40,12 +40,19 @@ make install
40
40
 
41
41
  ### 2. Set your API key
42
42
 
43
- Get your API key from [build.nvidia.com](https://build.nvidia.com) or [OpenAI](https://platform.openai.com/api-keys):
43
+ Start with one of our default model providers:
44
44
 
45
+ - [NVIDIA Build API](https://build.nvidia.com)
46
+ - [OpenAI](https://platform.openai.com/api-keys)
47
+ - [OpenRouter](https://openrouter.ai)
48
+
49
+ Grab your API key(s) using the above links and set one or more of the following environment variables:
45
50
  ```bash
46
51
  export NVIDIA_API_KEY="your-api-key-here"
47
- # Or use OpenAI
52
+
48
53
  export OPENAI_API_KEY="your-openai-api-key-here"
54
+
55
+ export OPENROUTER_API_KEY="your-openrouter-api-key-here"
49
56
  ```
50
57
 
51
58
  ### 3. Start generating data!
@@ -80,7 +87,7 @@ config_builder.add_column(
80
87
  LLMTextColumnConfig(
81
88
  name="review",
82
89
  model_alias="nvidia-text",
83
- prompt="""Write a brief product review for a {{ product_category }} item you recently purchased.""",
90
+ prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
84
91
  )
85
92
  )
86
93
 
@@ -146,6 +153,14 @@ The value `openai/gpt-oss-20b` would be collected.
146
153
 
147
154
  To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
148
155
 
156
+ ### Top Models
157
+
158
+ This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
159
+
160
+ ![Top models used for synthetic data generation](docs/images/top-models.png)
161
+
162
+ _Last updated on 1/14/2026_
163
+
149
164
  ---
150
165
 
151
166
  ## License
@@ -2,7 +2,7 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "markdown",
5
- "id": "adb0c3cd",
5
+ "id": "fcbfacc7",
6
6
  "metadata": {},
7
7
  "source": [
8
8
  "# 🎨 Data Designer Tutorial: The Basics\n",
@@ -14,7 +14,7 @@
14
14
  },
15
15
  {
16
16
  "cell_type": "markdown",
17
- "id": "73d42bda",
17
+ "id": "e25b987d",
18
18
  "metadata": {},
19
19
  "source": [
20
20
  "### ⚡ Colab Setup\n",
@@ -25,7 +25,7 @@
25
25
  {
26
26
  "cell_type": "code",
27
27
  "execution_count": null,
28
- "id": "c6675487",
28
+ "id": "d8194911",
29
29
  "metadata": {},
30
30
  "outputs": [],
31
31
  "source": [
@@ -36,7 +36,7 @@
36
36
  {
37
37
  "cell_type": "code",
38
38
  "execution_count": null,
39
- "id": "b98ebeee",
39
+ "id": "00ffb95b",
40
40
  "metadata": {},
41
41
  "outputs": [],
42
42
  "source": [
@@ -53,7 +53,7 @@
53
53
  },
54
54
  {
55
55
  "cell_type": "markdown",
56
- "id": "6e2355bb",
56
+ "id": "2c5c31e2",
57
57
  "metadata": {},
58
58
  "source": [
59
59
  "### 📦 Import the essentials\n",
@@ -64,7 +64,7 @@
64
64
  {
65
65
  "cell_type": "code",
66
66
  "execution_count": null,
67
- "id": "95bf5ae3",
67
+ "id": "a7a9489f",
68
68
  "metadata": {},
69
69
  "outputs": [],
70
70
  "source": [
@@ -85,7 +85,7 @@
85
85
  },
86
86
  {
87
87
  "cell_type": "markdown",
88
- "id": "ee906a92",
88
+ "id": "141b77e2",
89
89
  "metadata": {},
90
90
  "source": [
91
91
  "### ⚙️ Initialize the Data Designer interface\n",
@@ -98,7 +98,7 @@
98
98
  {
99
99
  "cell_type": "code",
100
100
  "execution_count": null,
101
- "id": "15b35cb8",
101
+ "id": "2bca9124",
102
102
  "metadata": {},
103
103
  "outputs": [],
104
104
  "source": [
@@ -107,7 +107,7 @@
107
107
  },
108
108
  {
109
109
  "cell_type": "markdown",
110
- "id": "999ce38a",
110
+ "id": "d4142887",
111
111
  "metadata": {},
112
112
  "source": [
113
113
  "### 🎛️ Define model configurations\n",
@@ -124,7 +124,7 @@
124
124
  {
125
125
  "cell_type": "code",
126
126
  "execution_count": null,
127
- "id": "f5ab65d5",
127
+ "id": "b3762937",
128
128
  "metadata": {},
129
129
  "outputs": [],
130
130
  "source": [
@@ -154,7 +154,7 @@
154
154
  },
155
155
  {
156
156
  "cell_type": "markdown",
157
- "id": "912d7954",
157
+ "id": "fec3a11f",
158
158
  "metadata": {},
159
159
  "source": [
160
160
  "### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -169,7 +169,7 @@
169
169
  {
170
170
  "cell_type": "code",
171
171
  "execution_count": null,
172
- "id": "0ba83688",
172
+ "id": "5324dbec",
173
173
  "metadata": {},
174
174
  "outputs": [],
175
175
  "source": [
@@ -178,7 +178,7 @@
178
178
  },
179
179
  {
180
180
  "cell_type": "markdown",
181
- "id": "ee2de21d",
181
+ "id": "9acab1b8",
182
182
  "metadata": {},
183
183
  "source": [
184
184
  "## 🎲 Getting started with sampler columns\n",
@@ -195,7 +195,7 @@
195
195
  {
196
196
  "cell_type": "code",
197
197
  "execution_count": null,
198
- "id": "3009cfa5",
198
+ "id": "9916a82e",
199
199
  "metadata": {},
200
200
  "outputs": [],
201
201
  "source": [
@@ -204,7 +204,7 @@
204
204
  },
205
205
  {
206
206
  "cell_type": "markdown",
207
- "id": "c42c890f",
207
+ "id": "8452819f",
208
208
  "metadata": {},
209
209
  "source": [
210
210
  "Let's start designing our product review dataset by adding product category and subcategory columns.\n"
@@ -213,7 +213,7 @@
213
213
  {
214
214
  "cell_type": "code",
215
215
  "execution_count": null,
216
- "id": "16251687",
216
+ "id": "3c99a9fe",
217
217
  "metadata": {},
218
218
  "outputs": [],
219
219
  "source": [
@@ -294,7 +294,7 @@
294
294
  },
295
295
  {
296
296
  "cell_type": "markdown",
297
- "id": "ff41c919",
297
+ "id": "9028e845",
298
298
  "metadata": {},
299
299
  "source": [
300
300
  "Next, let's add samplers to generate data related to the customer and their review.\n"
@@ -303,7 +303,7 @@
303
303
  {
304
304
  "cell_type": "code",
305
305
  "execution_count": null,
306
- "id": "78ec15fc",
306
+ "id": "b64910fc",
307
307
  "metadata": {},
308
308
  "outputs": [],
309
309
  "source": [
@@ -340,7 +340,7 @@
340
340
  },
341
341
  {
342
342
  "cell_type": "markdown",
343
- "id": "8ac2131e",
343
+ "id": "3c56ff50",
344
344
  "metadata": {},
345
345
  "source": [
346
346
  "## 🦜 LLM-generated columns\n",
@@ -355,7 +355,7 @@
355
355
  {
356
356
  "cell_type": "code",
357
357
  "execution_count": null,
358
- "id": "d8232495",
358
+ "id": "db03dbae",
359
359
  "metadata": {},
360
360
  "outputs": [],
361
361
  "source": [
@@ -391,7 +391,7 @@
391
391
  },
392
392
  {
393
393
  "cell_type": "markdown",
394
- "id": "9105ddf2",
394
+ "id": "ee137104",
395
395
  "metadata": {},
396
396
  "source": [
397
397
  "### 🔁 Iteration is key – preview the dataset!\n",
@@ -408,7 +408,7 @@
408
408
  {
409
409
  "cell_type": "code",
410
410
  "execution_count": null,
411
- "id": "a6b02b74",
411
+ "id": "d318e88f",
412
412
  "metadata": {},
413
413
  "outputs": [],
414
414
  "source": [
@@ -418,7 +418,7 @@
418
418
  {
419
419
  "cell_type": "code",
420
420
  "execution_count": null,
421
- "id": "74a4a386",
421
+ "id": "a6327bd4",
422
422
  "metadata": {},
423
423
  "outputs": [],
424
424
  "source": [
@@ -429,7 +429,7 @@
429
429
  {
430
430
  "cell_type": "code",
431
431
  "execution_count": null,
432
- "id": "b1be4d95",
432
+ "id": "9d9f8ba0",
433
433
  "metadata": {},
434
434
  "outputs": [],
435
435
  "source": [
@@ -439,7 +439,7 @@
439
439
  },
440
440
  {
441
441
  "cell_type": "markdown",
442
- "id": "c2871df4",
442
+ "id": "5b1df1d7",
443
443
  "metadata": {},
444
444
  "source": [
445
445
  "### 📊 Analyze the generated data\n",
@@ -452,7 +452,7 @@
452
452
  {
453
453
  "cell_type": "code",
454
454
  "execution_count": null,
455
- "id": "b2beb476",
455
+ "id": "0c19da33",
456
456
  "metadata": {},
457
457
  "outputs": [],
458
458
  "source": [
@@ -462,7 +462,7 @@
462
462
  },
463
463
  {
464
464
  "cell_type": "markdown",
465
- "id": "765fb038",
465
+ "id": "0e35f0f5",
466
466
  "metadata": {},
467
467
  "source": [
468
468
  "### 🆙 Scale up!\n",
@@ -475,7 +475,7 @@
475
475
  {
476
476
  "cell_type": "code",
477
477
  "execution_count": null,
478
- "id": "f4fc625e",
478
+ "id": "07bdf600",
479
479
  "metadata": {},
480
480
  "outputs": [],
481
481
  "source": [
@@ -485,7 +485,7 @@
485
485
  {
486
486
  "cell_type": "code",
487
487
  "execution_count": null,
488
- "id": "58989731",
488
+ "id": "27959f17",
489
489
  "metadata": {},
490
490
  "outputs": [],
491
491
  "source": [
@@ -498,7 +498,7 @@
498
498
  {
499
499
  "cell_type": "code",
500
500
  "execution_count": null,
501
- "id": "80351a24",
501
+ "id": "953b6749",
502
502
  "metadata": {},
503
503
  "outputs": [],
504
504
  "source": [
@@ -510,7 +510,7 @@
510
510
  },
511
511
  {
512
512
  "cell_type": "markdown",
513
- "id": "cc79ae74",
513
+ "id": "471fb9a5",
514
514
  "metadata": {},
515
515
  "source": [
516
516
  "## ⏭️ Next Steps\n",