data-designer 0.3.3__tar.gz → 0.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer-0.3.3 → data_designer-0.3.5}/.gitignore +4 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/AGENTS.md +143 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/Makefile +53 -12
- {data_designer-0.3.3 → data_designer-0.3.5}/PKG-INFO +19 -4
- {data_designer-0.3.3 → data_designer-0.3.5}/README.md +18 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/1-the-basics.ipynb +31 -31
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +29 -29
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +27 -27
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/colab_notebooks/4-providing-images-as-context.ipynb +35 -35
- data_designer-0.3.5/docs/images/top-models.png +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/example.md +9 -32
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/scripts/generate_colab_notebooks.py +1 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/scripts/update_license_headers.py +1 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/_version.py +2 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/download.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/list.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/models.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/providers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/commands/reset.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/download_controller.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/model_controller.py +6 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/controllers/provider_controller.py +6 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/builder.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/field.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/form.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/model_builder.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/forms/provider_builder.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/main.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/base.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/model_repository.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/persona_repository.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/repositories/provider_repository.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/download_service.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/model_service.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/services/provider_service.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/ui.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/utils.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/column_profilers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/column_statistics.py +8 -5
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/dataset_profiler.py +9 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/utils/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/analysis/utils/reporting.py +7 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/base.py +1 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/column_configs.py +77 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/column_types.py +33 -36
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/dataset_builders.py +2 -0
- data_designer-0.3.5/src/data_designer/config/dataset_metadata.py +18 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/default_model_settings.py +1 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/exports.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/interface.py +3 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/models.py +7 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/preview_results.py +9 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/processors.py +2 -0
- data_designer-0.3.5/src/data_designer/config/run_config.py +48 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/sampler_constraints.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/sampler_params.py +7 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed_source.py +9 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/seed_source_types.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/constants.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/info.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/io_helpers.py +8 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/misc.py +2 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/numerical_helpers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/type_helpers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/visualization.py +19 -11
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/validator_params.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/base.py +9 -8
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +15 -19
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_profilers/registry.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/column_statistics.py +5 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/dataset_profiler.py +12 -9
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +7 -4
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/analysis/utils/judge_score_processing.py +7 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/base.py +26 -14
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/embedding.py +4 -11
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/expression.py +7 -16
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/llm_completion.py +13 -47
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/samplers.py +8 -14
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/seed_dataset.py +9 -15
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/generators/validation.py +9 -20
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/registry.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/generator_classification.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +4 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/compiler.py +3 -6
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/configurable_task.py +12 -13
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/artifact_storage.py +87 -8
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/column_wise_builder.py +34 -35
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/multi_column_configs.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/concurrency.py +13 -4
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/dag.py +7 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +35 -25
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/dataset_builders/utils/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/model_provider.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/errors.py +23 -31
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/facade.py +12 -9
- data_designer-0.3.5/src/data_designer/engine/models/factory.py +42 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/litellm_overrides.py +16 -11
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/parser.py +2 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/postprocessors.py +1 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/tag_parsers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/parsers/types.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/recipes/base.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/recipes/response_recipes.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/registry.py +11 -18
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/telemetry.py +6 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/ast.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/environment.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/exceptions.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/ginja/record.py +2 -0
- data_designer-0.3.5/src/data_designer/engine/processing/gsonschema/exceptions.py +15 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/types.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/gsonschema/validators.py +10 -6
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/base.py +1 -5
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/drop_columns.py +7 -10
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/registry.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/processors/schema_transform.py +7 -10
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/processing/utils.py +7 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/base.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/data_designer_registry.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/registry/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_dataset_generator.py +6 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_dataset_repository.py +8 -5
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/managed_storage.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/resource_provider.py +20 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/resources/seed_reader.py +7 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/column.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/constraints.py +8 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/base.py +10 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/data_sources/sources.py +27 -22
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +2 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/person.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/phone_number.py +8 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/generator.py +5 -4
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/jinja_utils.py +7 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/people_gen.py +7 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/person_constants.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/schema.py +5 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/schema_builder.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/utils.py +7 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/secret_resolver.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validation.py +2 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/base.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/local_callable.py +7 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/python.py +7 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/remote.py +7 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/validators/sql.py +8 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/essentials/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/data_designer.py +36 -39
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/interface/results.py +9 -2
- data_designer-0.3.5/src/data_designer/lazy_heavy_imports.py +54 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/logging.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/errors.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/plugin.py +0 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/registry.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/__init__.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/stubs.py +21 -43
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugins/testing/utils.py +2 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_columns.py +21 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_misc.py +4 -4
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/column_profilers/test_base.py +1 -17
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_dataset_profiler.py +2 -32
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_column_generator_base.py +5 -23
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_embedding.py +5 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_expression.py +0 -8
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_llm_completion_generators.py +22 -21
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_seed_dataset.py +2 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_validation.py +3 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/conftest.py +1 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_artifact_storage.py +142 -22
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_column_wise_builder.py +58 -43
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_concurrency.py +44 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +30 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/conftest.py +2 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_registry.py +3 -2
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_drop_columns.py +0 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_schema_transform.py +0 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/conftest.py +0 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_resource_provider.py +5 -7
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_compiler.py +3 -3
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_configurable_task.py +6 -41
- data_designer-0.3.5/tests/engine/test_dataset_metadata.py +56 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/interface/test_data_designer.py +21 -20
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/interface/test_results.py +45 -20
- data_designer-0.3.5/tests/test_import_perf.py +64 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/config.py +12 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/impl.py +2 -14
- {data_designer-0.3.3 → data_designer-0.3.5}/uv.lock +82 -82
- data_designer-0.3.3/src/data_designer/config/run_config.py +0 -34
- data_designer-0.3.3/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -8
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/development-task.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/build-docs.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/build-notebooks.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/check-colab-notebooks.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/ci.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/pack-tutorials.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/.pre-commit-config.yaml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/CLAUDE.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/CONTRIBUTING.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/DCO +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/LICENSE +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/VERSIONING.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/analysis.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/models.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/processors.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/columns.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/custom-model-settings.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/default-model-settings.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/inference-parameters.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/model-configs.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/models/model-providers.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/person_sampling.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/processors.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/concepts/validators.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/css/style.css +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/index.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/installation.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/js/toc-toggle.js +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/1-the-basics.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/4-providing-images-as-context.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/README.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/_README.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/notebook_source/_pyproject.toml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/overrides/main.html +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/available.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/plugins/overview.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/quick-start.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/cards.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/code_generation/text_to_python.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/code_generation/text_to_sql.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/mkdocs.yml +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/pyproject.toml +1 -1
- {data_designer-0.3.3 → data_designer-0.3.5}/scripts/test_license_headers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/cli/README.md +0 -0
- {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins → data_designer-0.3.5/src/data_designer/cli/commands}/__init__.py +0 -0
- {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins/column_generator → data_designer-0.3.5/src/data_designer/config}/__init__.py +0 -0
- {data_designer-0.3.3/e2e_tests/src/data_designer_e2e_tests/plugins/seed_reader → data_designer-0.3.5/src/data_designer/config/analysis}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/config_builder.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/config/utils/code_lang.py +0 -0
- {data_designer-0.3.3/src/data_designer/cli/commands → data_designer-0.3.5/src/data_designer/engine}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/config → data_designer-0.3.5/src/data_designer/engine/column_generators}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/config/analysis → data_designer-0.3.5/src/data_designer/engine/column_generators/generators}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine → data_designer-0.3.5/src/data_designer/engine/dataset_builders/utils}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/column_generators → data_designer-0.3.5/src/data_designer/engine/models}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/column_generators/generators → data_designer-0.3.5/src/data_designer/engine/models/parsers}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/dataset_builders/utils → data_designer-0.3.5/src/data_designer/engine/processing/ginja}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/models → data_designer-0.3.5/src/data_designer/engine/processing/gsonschema}/__init__.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/models/parsers → data_designer-0.3.5/src/data_designer/engine/sampling_gen/entities}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.3.3/src/data_designer/engine/processing/ginja → data_designer-0.3.5/src/data_designer/interface}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/src/data_designer/plugin_manager.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_download_command.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_download_controller.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_model_controller.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_field.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_model_builder.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_model_repository.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_persona_repository.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_download_service.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_model_service.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/cli/test_cli_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_config_builder.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_default_model_settings.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_models.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_processors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_seed.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_seed_source.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/processing/gsonschema → data_designer-0.3.5/tests/config/utils}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/config/utils/test_visualization.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer-0.3.3/src/data_designer/engine/sampling_gen/entities → data_designer-0.3.5/tests/engine/column_generators/generators}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_facade.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/models/test_usage.py +0 -0
- {data_designer-0.3.3/src/data_designer/interface → data_designer-0.3.5/tests/engine/processing}/__init__.py +0 -0
- {data_designer-0.3.3/tests/config/utils → data_designer-0.3.5/tests/engine/processing/ginja}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.3.3/tests/engine/column_generators/generators → data_designer-0.3.5/tests/engine/processing/gsonschema}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.3.3/tests/engine/processing → data_designer-0.3.5/tests/engine/processing/processors}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/processing/test_utils.py +0 -0
- {data_designer-0.3.3/tests/engine/processing/ginja → data_designer-0.3.5/tests/engine/registry}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.3.3/tests/engine/processing/gsonschema → data_designer-0.3.5/tests/engine/resources}/__init__.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/resources/test_seed_reader.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/test_validation.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/essentials/test_init.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/plugins/test_plugin_registry.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/test_logging.py +0 -0
- {data_designer-0.3.3 → data_designer-0.3.5}/tests/test_plugin_manager.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/pyproject.toml +0 -0
- {data_designer-0.3.3/tests/engine/processing/processors → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins}/__init__.py +0 -0
- {data_designer-0.3.3/tests/engine/registry → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator}/__init__.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -0
- {data_designer-0.3.3/tests/engine/resources → data_designer-0.3.5/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader}/__init__.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/tests/test_e2e.py +0 -0
- {data_designer-0.3.3/e2e_tests → data_designer-0.3.5/tests_e2e}/tests/test_seed.csv +0 -0
|
@@ -158,12 +158,13 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
|
|
|
158
158
|
### Import Style
|
|
159
159
|
|
|
160
160
|
- **ALWAYS** use absolute imports, never relative imports
|
|
161
|
-
- Place imports at module level, not inside functions
|
|
161
|
+
- Place imports at module level, not inside functions (exception: it is unavoidable for performance reasons)
|
|
162
162
|
- Import sorting is handled by `ruff`'s `isort` - imports should be grouped and sorted:
|
|
163
163
|
1. Standard library imports
|
|
164
|
-
2. Third-party imports
|
|
164
|
+
2. Third-party imports (use `lazy_heavy_imports` for heavy libraries)
|
|
165
165
|
3. First-party imports (`data_designer`)
|
|
166
166
|
- Use standard import conventions (enforced by `ICN`)
|
|
167
|
+
- See [Lazy Loading and TYPE_CHECKING](#lazy-loading-and-type_checking) section for optimization guidelines
|
|
167
168
|
|
|
168
169
|
```python
|
|
169
170
|
# Good
|
|
@@ -184,6 +185,146 @@ Type annotations are REQUIRED for all code in this project. This is strictly enf
|
|
|
184
185
|
path = Path(filename)
|
|
185
186
|
```
|
|
186
187
|
|
|
188
|
+
### Lazy Loading and TYPE_CHECKING
|
|
189
|
+
|
|
190
|
+
This project uses lazy loading for heavy third-party dependencies to optimize import performance.
|
|
191
|
+
|
|
192
|
+
#### When to Use Lazy Loading
|
|
193
|
+
|
|
194
|
+
**Heavy third-party libraries** (>100ms import cost) should be lazy-loaded via `lazy_heavy_imports.py`:
|
|
195
|
+
|
|
196
|
+
```python
|
|
197
|
+
# ❌ Don't import directly
|
|
198
|
+
import pandas as pd
|
|
199
|
+
import numpy as np
|
|
200
|
+
|
|
201
|
+
# ✅ Use lazy loading with IDE support
|
|
202
|
+
from typing import TYPE_CHECKING
|
|
203
|
+
from data_designer.lazy_heavy_imports import pd, np
|
|
204
|
+
|
|
205
|
+
if TYPE_CHECKING:
|
|
206
|
+
import pandas as pd # For IDE autocomplete and type hints
|
|
207
|
+
import numpy as np
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
This pattern provides:
|
|
211
|
+
- Runtime lazy loading (fast startup)
|
|
212
|
+
- Full IDE support (autocomplete, type hints)
|
|
213
|
+
- Type checker validation
|
|
214
|
+
|
|
215
|
+
**See [lazy_heavy_imports.py](src/data_designer/lazy_heavy_imports.py) for the current list of lazy-loaded libraries.**
|
|
216
|
+
|
|
217
|
+
#### Adding New Heavy Dependencies
|
|
218
|
+
|
|
219
|
+
If you add a new dependency with significant import cost (>100ms):
|
|
220
|
+
|
|
221
|
+
1. **Add to `lazy_heavy_imports.py`:**
|
|
222
|
+
```python
|
|
223
|
+
_LAZY_IMPORTS = {
|
|
224
|
+
# ... existing entries ...
|
|
225
|
+
"your_lib": "your_library_name",
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
2. **Update imports across codebase:**
|
|
230
|
+
```python
|
|
231
|
+
from typing import TYPE_CHECKING
|
|
232
|
+
from data_designer.lazy_heavy_imports import your_lib
|
|
233
|
+
|
|
234
|
+
if TYPE_CHECKING:
|
|
235
|
+
import your_library_name as your_lib # For IDE support
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
3. **Verify with performance test:**
|
|
239
|
+
```bash
|
|
240
|
+
make perf-import CLEAN=1
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
#### Using TYPE_CHECKING Blocks
|
|
244
|
+
|
|
245
|
+
`TYPE_CHECKING` blocks defer imports that are only needed for type hints, preventing circular dependencies and reducing import time.
|
|
246
|
+
|
|
247
|
+
**For internal data_designer imports:**
|
|
248
|
+
|
|
249
|
+
```python
|
|
250
|
+
from __future__ import annotations # Always include at top
|
|
251
|
+
|
|
252
|
+
from typing import TYPE_CHECKING
|
|
253
|
+
|
|
254
|
+
# Runtime imports
|
|
255
|
+
from pathlib import Path
|
|
256
|
+
from data_designer.config.base import ConfigBase
|
|
257
|
+
|
|
258
|
+
if TYPE_CHECKING:
|
|
259
|
+
# Type-only imports - only visible to type checkers
|
|
260
|
+
from data_designer.engine.models.facade import ModelFacade
|
|
261
|
+
|
|
262
|
+
def get_model(model: ModelFacade) -> str:
|
|
263
|
+
return model.name
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**For lazy-loaded libraries (see pattern in "When to Use Lazy Loading" above):**
|
|
267
|
+
- Import from `lazy_heavy_imports` for runtime
|
|
268
|
+
- Add full import in `TYPE_CHECKING` block for IDE support
|
|
269
|
+
|
|
270
|
+
**Rules for TYPE_CHECKING:**
|
|
271
|
+
|
|
272
|
+
✅ **DO put in TYPE_CHECKING:**
|
|
273
|
+
- Internal `data_designer` imports used **only** in type hints
|
|
274
|
+
- Imports that would cause circular dependencies
|
|
275
|
+
- **Full imports of lazy-loaded libraries for IDE support** (e.g., `import pandas as pd` in addition to runtime `from data_designer.lazy_heavy_imports import pd`)
|
|
276
|
+
|
|
277
|
+
❌ **DON'T put in TYPE_CHECKING:**
|
|
278
|
+
- **Standard library imports** (`Path`, `Any`, `Callable`, `Literal`, `TypeAlias`, etc.)
|
|
279
|
+
- **Pydantic model types** used in field definitions (needed at runtime for validation)
|
|
280
|
+
- **Types used in discriminated unions** (Pydantic needs them at runtime)
|
|
281
|
+
- **Any import used at runtime** (instantiation, method calls, base classes, etc.)
|
|
282
|
+
|
|
283
|
+
**Examples:**
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
# ✅ CORRECT - Lazy-loaded library with IDE support
|
|
287
|
+
from typing import TYPE_CHECKING
|
|
288
|
+
from data_designer.lazy_heavy_imports import pd
|
|
289
|
+
|
|
290
|
+
if TYPE_CHECKING:
|
|
291
|
+
import pandas as pd # IDE gets full type hints
|
|
292
|
+
|
|
293
|
+
def load_data(path: str) -> pd.DataFrame: # IDE understands pd.DataFrame
|
|
294
|
+
return pd.read_csv(path)
|
|
295
|
+
|
|
296
|
+
# ✅ CORRECT - Standard library NOT in TYPE_CHECKING
|
|
297
|
+
from pathlib import Path
|
|
298
|
+
from typing import Any
|
|
299
|
+
|
|
300
|
+
def process_file(path: Path) -> Any:
|
|
301
|
+
return path.read_text()
|
|
302
|
+
|
|
303
|
+
# ✅ CORRECT - Internal type-only import
|
|
304
|
+
from typing import TYPE_CHECKING
|
|
305
|
+
|
|
306
|
+
if TYPE_CHECKING:
|
|
307
|
+
from data_designer.engine.models.facade import ModelFacade
|
|
308
|
+
|
|
309
|
+
def get_model(model: ModelFacade) -> str: # Only used in type hint
|
|
310
|
+
return model.name
|
|
311
|
+
|
|
312
|
+
# ❌ INCORRECT - Pydantic field type in TYPE_CHECKING
|
|
313
|
+
from typing import TYPE_CHECKING
|
|
314
|
+
|
|
315
|
+
if TYPE_CHECKING:
|
|
316
|
+
from data_designer.config.models import ModelConfig # Wrong!
|
|
317
|
+
|
|
318
|
+
class MyConfig(BaseModel):
|
|
319
|
+
model: ModelConfig # Pydantic needs this at runtime!
|
|
320
|
+
|
|
321
|
+
# ✅ CORRECT - Pydantic field type at runtime
|
|
322
|
+
from data_designer.config.models import ModelConfig
|
|
323
|
+
|
|
324
|
+
class MyConfig(BaseModel):
|
|
325
|
+
model: ModelConfig
|
|
326
|
+
```
|
|
327
|
+
|
|
187
328
|
### Naming Conventions (PEP 8)
|
|
188
329
|
|
|
189
330
|
Follow PEP 8 naming conventions:
|
|
@@ -45,14 +45,25 @@ help:
|
|
|
45
45
|
@echo " check-license-headers - Check if all files have license headers"
|
|
46
46
|
@echo " update-license-headers - Add license headers to all files"
|
|
47
47
|
@echo ""
|
|
48
|
+
@echo "⚡ Performance:"
|
|
49
|
+
@echo " perf-import - Profile import time and show summary"
|
|
50
|
+
@echo " perf-import CLEAN=1 - Clean cache, then profile import time"
|
|
51
|
+
@echo " perf-import NOFILE=1 - Profile without writing to file (for CI)"
|
|
52
|
+
@echo ""
|
|
48
53
|
@echo "═════════════════════════════════════════════════════════════"
|
|
49
54
|
@echo "💡 Tip: Run 'make <command>' to execute any command above"
|
|
50
55
|
@echo ""
|
|
51
56
|
|
|
52
|
-
clean:
|
|
53
|
-
@echo "🧹 Cleaning up
|
|
54
|
-
rm -rf htmlcov .coverage .pytest_cache
|
|
57
|
+
clean-pycache:
|
|
58
|
+
@echo "🧹 Cleaning up Python cache files..."
|
|
55
59
|
find . -type d -name __pycache__ -exec rm -rf {} + 2>/dev/null || true
|
|
60
|
+
find . -type f -name "*.pyc" -delete 2>/dev/null || true
|
|
61
|
+
@echo "✅ Cache cleaned!"
|
|
62
|
+
|
|
63
|
+
clean: clean-pycache
|
|
64
|
+
@echo "🧹 Cleaning up coverage reports and test cache..."
|
|
65
|
+
rm -rf htmlcov .coverage .pytest_cache
|
|
66
|
+
@echo "✅ Cleaned!"
|
|
56
67
|
|
|
57
68
|
coverage:
|
|
58
69
|
@echo "📊 Running tests with coverage analysis..."
|
|
@@ -67,22 +78,22 @@ check-all-fix: format lint-fix
|
|
|
67
78
|
|
|
68
79
|
format:
|
|
69
80
|
@echo "📐 Formatting code with ruff..."
|
|
70
|
-
uv run ruff format src/ tests/ scripts/
|
|
81
|
+
uv run ruff format src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
|
|
71
82
|
@echo "✅ Formatting complete!"
|
|
72
83
|
|
|
73
84
|
format-check:
|
|
74
85
|
@echo "📐 Checking code formatting with ruff..."
|
|
75
|
-
uv run ruff format --check src/ tests/ scripts/
|
|
86
|
+
uv run ruff format --check src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
|
|
76
87
|
@echo "✅ Formatting check complete! Run 'make format' to auto-fix issues."
|
|
77
88
|
|
|
78
89
|
lint:
|
|
79
90
|
@echo "🔍 Linting code with ruff..."
|
|
80
|
-
uv run ruff check --output-format=full src/ tests/ scripts/
|
|
91
|
+
uv run ruff check --output-format=full src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
|
|
81
92
|
@echo "✅ Linting complete! Run 'make lint-fix' to auto-fix issues."
|
|
82
93
|
|
|
83
94
|
lint-fix:
|
|
84
95
|
@echo "🔍 Fixing linting issues with ruff..."
|
|
85
|
-
uv run ruff check --fix src/ tests/ scripts/
|
|
96
|
+
uv run ruff check --fix src/ tests/ scripts/ tests_e2e/ --exclude '**/src/data_designer/_version.py'
|
|
86
97
|
@echo "✅ Linting with autofix complete!"
|
|
87
98
|
|
|
88
99
|
test:
|
|
@@ -91,9 +102,9 @@ test:
|
|
|
91
102
|
|
|
92
103
|
test-e2e:
|
|
93
104
|
@echo "🧹 Cleaning e2e test environment..."
|
|
94
|
-
rm -rf
|
|
105
|
+
rm -rf tests_e2e/uv.lock tests_e2e/.pycache tests_e2e/.venv
|
|
95
106
|
@echo "🧪 Running e2e tests..."
|
|
96
|
-
uv run --no-cache --refresh --directory
|
|
107
|
+
uv run --no-cache --refresh --directory tests_e2e pytest -s
|
|
97
108
|
|
|
98
109
|
test-run-tutorials:
|
|
99
110
|
@echo "🧪 Running tutorials as e2e tests..."
|
|
@@ -101,7 +112,7 @@ test-run-tutorials:
|
|
|
101
112
|
trap "rm -rf $$TUTORIAL_WORKDIR" EXIT; \
|
|
102
113
|
for f in docs/notebook_source/*.py; do \
|
|
103
114
|
echo " 📓 Running $$f..."; \
|
|
104
|
-
(cd "$$TUTORIAL_WORKDIR" && uv run python "$(REPO_PATH)/$$f") || exit 1; \
|
|
115
|
+
(cd "$$TUTORIAL_WORKDIR" && uv run --project "$(REPO_PATH)" --group notebooks python "$(REPO_PATH)/$$f") || exit 1; \
|
|
105
116
|
done; \
|
|
106
117
|
echo "🧹 Cleaning up tutorial artifacts..."; \
|
|
107
118
|
rm -rf "$$TUTORIAL_WORKDIR"; \
|
|
@@ -113,7 +124,7 @@ test-run-recipes:
|
|
|
113
124
|
trap "rm -rf $$RECIPE_WORKDIR" EXIT; \
|
|
114
125
|
for f in docs/assets/recipes/**/*.py; do \
|
|
115
126
|
echo " 📜 Running $$f..."; \
|
|
116
|
-
(cd "$$RECIPE_WORKDIR" && uv run python "$(REPO_PATH)/$$f" --model-alias nvidia-text --artifact-path "$$RECIPE_WORKDIR" --num-records 5) || exit 1; \
|
|
127
|
+
(cd "$$RECIPE_WORKDIR" && uv run --project "$(REPO_PATH)" --group notebooks python "$(REPO_PATH)/$$f" --model-alias nvidia-text --artifact-path "$$RECIPE_WORKDIR" --num-records 5) || exit 1; \
|
|
117
128
|
done; \
|
|
118
129
|
echo "🧹 Cleaning up recipe artifacts..."; \
|
|
119
130
|
rm -rf "$$RECIPE_WORKDIR"; \
|
|
@@ -168,4 +179,34 @@ install-dev-notebooks:
|
|
|
168
179
|
$(call install-pre-commit-hooks)
|
|
169
180
|
@echo "✅ Dev + notebooks installation complete!"
|
|
170
181
|
|
|
171
|
-
|
|
182
|
+
perf-import:
|
|
183
|
+
ifdef CLEAN
|
|
184
|
+
@$(MAKE) clean-pycache
|
|
185
|
+
endif
|
|
186
|
+
@echo "⚡ Profiling import time for data_designer.essentials..."
|
|
187
|
+
ifdef NOFILE
|
|
188
|
+
@PERF_OUTPUT=$$(uv run python -X importtime -c "import data_designer.essentials" 2>&1); \
|
|
189
|
+
echo "$$PERF_OUTPUT"; \
|
|
190
|
+
echo ""; \
|
|
191
|
+
echo "Summary:"; \
|
|
192
|
+
echo "$$PERF_OUTPUT" | tail -1 | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
|
|
193
|
+
echo ""; \
|
|
194
|
+
echo "💡 Top 10 slowest imports:"; \
|
|
195
|
+
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
|
|
196
|
+
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
|
|
197
|
+
echo "$$PERF_OUTPUT" | grep "import time:" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
|
|
198
|
+
else
|
|
199
|
+
@PERF_FILE="perf_import_$$(date +%Y%m%d_%H%M%S).txt"; \
|
|
200
|
+
uv run python -X importtime -c "import data_designer.essentials" > "$$PERF_FILE" 2>&1; \
|
|
201
|
+
echo "📊 Import profile saved to $$PERF_FILE"; \
|
|
202
|
+
echo ""; \
|
|
203
|
+
echo "Summary:"; \
|
|
204
|
+
tail -1 "$$PERF_FILE" | awk '{printf " Total: %.3fs\n", $$5/1000000}'; \
|
|
205
|
+
echo ""; \
|
|
206
|
+
echo "💡 Top 10 slowest imports:"; \
|
|
207
|
+
printf "%-12s %-12s %s\n" "Self (s)" "Cumulative (s)" "Module"; \
|
|
208
|
+
printf "%-12s %-12s %s\n" "--------" "--------------" "------"; \
|
|
209
|
+
grep "import time:" "$$PERF_FILE" | sort -rn -k5 | head -10 | awk '{printf "%-12.3f %-12.3f %s", $$3/1000000, $$5/1000000, $$7; for(i=8;i<=NF;i++) printf " %s", $$i; printf "\n"}'
|
|
210
|
+
endif
|
|
211
|
+
|
|
212
|
+
.PHONY: clean clean-pycache coverage format format-check lint lint-fix test test-e2e test-run-tutorials test-run-recipes test-run-all-examples check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks perf-import
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.5
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -87,12 +87,19 @@ make install
|
|
|
87
87
|
|
|
88
88
|
### 2. Set your API key
|
|
89
89
|
|
|
90
|
-
|
|
90
|
+
Start with one of our default model providers:
|
|
91
91
|
|
|
92
|
+
- [NVIDIA Build API](https://build.nvidia.com)
|
|
93
|
+
- [OpenAI](https://platform.openai.com/api-keys)
|
|
94
|
+
- [OpenRouter](https://openrouter.ai)
|
|
95
|
+
|
|
96
|
+
Grab your API key(s) using the above links and set one or more of the following environment variables:
|
|
92
97
|
```bash
|
|
93
98
|
export NVIDIA_API_KEY="your-api-key-here"
|
|
94
|
-
|
|
99
|
+
|
|
95
100
|
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
101
|
+
|
|
102
|
+
export OPENROUTER_API_KEY="your-openrouter-api-key-here"
|
|
96
103
|
```
|
|
97
104
|
|
|
98
105
|
### 3. Start generating data!
|
|
@@ -127,7 +134,7 @@ config_builder.add_column(
|
|
|
127
134
|
LLMTextColumnConfig(
|
|
128
135
|
name="review",
|
|
129
136
|
model_alias="nvidia-text",
|
|
130
|
-
prompt="
|
|
137
|
+
prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
|
|
131
138
|
)
|
|
132
139
|
)
|
|
133
140
|
|
|
@@ -193,6 +200,14 @@ The value `openai/gpt-oss-20b` would be collected.
|
|
|
193
200
|
|
|
194
201
|
To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
|
|
195
202
|
|
|
203
|
+
### Top Models
|
|
204
|
+
|
|
205
|
+
This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
|
|
206
|
+
|
|
207
|
+

|
|
208
|
+
|
|
209
|
+
_Last updated on 1/14/2026_
|
|
210
|
+
|
|
196
211
|
---
|
|
197
212
|
|
|
198
213
|
## License
|
|
@@ -40,12 +40,19 @@ make install
|
|
|
40
40
|
|
|
41
41
|
### 2. Set your API key
|
|
42
42
|
|
|
43
|
-
|
|
43
|
+
Start with one of our default model providers:
|
|
44
44
|
|
|
45
|
+
- [NVIDIA Build API](https://build.nvidia.com)
|
|
46
|
+
- [OpenAI](https://platform.openai.com/api-keys)
|
|
47
|
+
- [OpenRouter](https://openrouter.ai)
|
|
48
|
+
|
|
49
|
+
Grab your API key(s) using the above links and set one or more of the following environment variables:
|
|
45
50
|
```bash
|
|
46
51
|
export NVIDIA_API_KEY="your-api-key-here"
|
|
47
|
-
|
|
52
|
+
|
|
48
53
|
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
54
|
+
|
|
55
|
+
export OPENROUTER_API_KEY="your-openrouter-api-key-here"
|
|
49
56
|
```
|
|
50
57
|
|
|
51
58
|
### 3. Start generating data!
|
|
@@ -80,7 +87,7 @@ config_builder.add_column(
|
|
|
80
87
|
LLMTextColumnConfig(
|
|
81
88
|
name="review",
|
|
82
89
|
model_alias="nvidia-text",
|
|
83
|
-
prompt="
|
|
90
|
+
prompt="Write a brief product review for a {{ product_category }} item you recently purchased.",
|
|
84
91
|
)
|
|
85
92
|
)
|
|
86
93
|
|
|
@@ -146,6 +153,14 @@ The value `openai/gpt-oss-20b` would be collected.
|
|
|
146
153
|
|
|
147
154
|
To disable telemetry capture, set `NEMO_TELEMETRY_ENABLED=false`.
|
|
148
155
|
|
|
156
|
+
### Top Models
|
|
157
|
+
|
|
158
|
+
This chart represents the breakdown of models used for Data Designer across all synthetic data generation jobs from 12/18/2025 to 1/14/2026.
|
|
159
|
+
|
|
160
|
+

|
|
161
|
+
|
|
162
|
+
_Last updated on 1/14/2026_
|
|
163
|
+
|
|
149
164
|
---
|
|
150
165
|
|
|
151
166
|
## License
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"cells": [
|
|
3
3
|
{
|
|
4
4
|
"cell_type": "markdown",
|
|
5
|
-
"id": "
|
|
5
|
+
"id": "fcbfacc7",
|
|
6
6
|
"metadata": {},
|
|
7
7
|
"source": [
|
|
8
8
|
"# 🎨 Data Designer Tutorial: The Basics\n",
|
|
@@ -14,7 +14,7 @@
|
|
|
14
14
|
},
|
|
15
15
|
{
|
|
16
16
|
"cell_type": "markdown",
|
|
17
|
-
"id": "
|
|
17
|
+
"id": "e25b987d",
|
|
18
18
|
"metadata": {},
|
|
19
19
|
"source": [
|
|
20
20
|
"### ⚡ Colab Setup\n",
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
{
|
|
26
26
|
"cell_type": "code",
|
|
27
27
|
"execution_count": null,
|
|
28
|
-
"id": "
|
|
28
|
+
"id": "d8194911",
|
|
29
29
|
"metadata": {},
|
|
30
30
|
"outputs": [],
|
|
31
31
|
"source": [
|
|
@@ -36,7 +36,7 @@
|
|
|
36
36
|
{
|
|
37
37
|
"cell_type": "code",
|
|
38
38
|
"execution_count": null,
|
|
39
|
-
"id": "
|
|
39
|
+
"id": "00ffb95b",
|
|
40
40
|
"metadata": {},
|
|
41
41
|
"outputs": [],
|
|
42
42
|
"source": [
|
|
@@ -53,7 +53,7 @@
|
|
|
53
53
|
},
|
|
54
54
|
{
|
|
55
55
|
"cell_type": "markdown",
|
|
56
|
-
"id": "
|
|
56
|
+
"id": "2c5c31e2",
|
|
57
57
|
"metadata": {},
|
|
58
58
|
"source": [
|
|
59
59
|
"### 📦 Import the essentials\n",
|
|
@@ -64,7 +64,7 @@
|
|
|
64
64
|
{
|
|
65
65
|
"cell_type": "code",
|
|
66
66
|
"execution_count": null,
|
|
67
|
-
"id": "
|
|
67
|
+
"id": "a7a9489f",
|
|
68
68
|
"metadata": {},
|
|
69
69
|
"outputs": [],
|
|
70
70
|
"source": [
|
|
@@ -85,7 +85,7 @@
|
|
|
85
85
|
},
|
|
86
86
|
{
|
|
87
87
|
"cell_type": "markdown",
|
|
88
|
-
"id": "
|
|
88
|
+
"id": "141b77e2",
|
|
89
89
|
"metadata": {},
|
|
90
90
|
"source": [
|
|
91
91
|
"### ⚙️ Initialize the Data Designer interface\n",
|
|
@@ -98,7 +98,7 @@
|
|
|
98
98
|
{
|
|
99
99
|
"cell_type": "code",
|
|
100
100
|
"execution_count": null,
|
|
101
|
-
"id": "
|
|
101
|
+
"id": "2bca9124",
|
|
102
102
|
"metadata": {},
|
|
103
103
|
"outputs": [],
|
|
104
104
|
"source": [
|
|
@@ -107,7 +107,7 @@
|
|
|
107
107
|
},
|
|
108
108
|
{
|
|
109
109
|
"cell_type": "markdown",
|
|
110
|
-
"id": "
|
|
110
|
+
"id": "d4142887",
|
|
111
111
|
"metadata": {},
|
|
112
112
|
"source": [
|
|
113
113
|
"### 🎛️ Define model configurations\n",
|
|
@@ -124,7 +124,7 @@
|
|
|
124
124
|
{
|
|
125
125
|
"cell_type": "code",
|
|
126
126
|
"execution_count": null,
|
|
127
|
-
"id": "
|
|
127
|
+
"id": "b3762937",
|
|
128
128
|
"metadata": {},
|
|
129
129
|
"outputs": [],
|
|
130
130
|
"source": [
|
|
@@ -154,7 +154,7 @@
|
|
|
154
154
|
},
|
|
155
155
|
{
|
|
156
156
|
"cell_type": "markdown",
|
|
157
|
-
"id": "
|
|
157
|
+
"id": "fec3a11f",
|
|
158
158
|
"metadata": {},
|
|
159
159
|
"source": [
|
|
160
160
|
"### 🏗️ Initialize the Data Designer Config Builder\n",
|
|
@@ -169,7 +169,7 @@
|
|
|
169
169
|
{
|
|
170
170
|
"cell_type": "code",
|
|
171
171
|
"execution_count": null,
|
|
172
|
-
"id": "
|
|
172
|
+
"id": "5324dbec",
|
|
173
173
|
"metadata": {},
|
|
174
174
|
"outputs": [],
|
|
175
175
|
"source": [
|
|
@@ -178,7 +178,7 @@
|
|
|
178
178
|
},
|
|
179
179
|
{
|
|
180
180
|
"cell_type": "markdown",
|
|
181
|
-
"id": "
|
|
181
|
+
"id": "9acab1b8",
|
|
182
182
|
"metadata": {},
|
|
183
183
|
"source": [
|
|
184
184
|
"## 🎲 Getting started with sampler columns\n",
|
|
@@ -195,7 +195,7 @@
|
|
|
195
195
|
{
|
|
196
196
|
"cell_type": "code",
|
|
197
197
|
"execution_count": null,
|
|
198
|
-
"id": "
|
|
198
|
+
"id": "9916a82e",
|
|
199
199
|
"metadata": {},
|
|
200
200
|
"outputs": [],
|
|
201
201
|
"source": [
|
|
@@ -204,7 +204,7 @@
|
|
|
204
204
|
},
|
|
205
205
|
{
|
|
206
206
|
"cell_type": "markdown",
|
|
207
|
-
"id": "
|
|
207
|
+
"id": "8452819f",
|
|
208
208
|
"metadata": {},
|
|
209
209
|
"source": [
|
|
210
210
|
"Let's start designing our product review dataset by adding product category and subcategory columns.\n"
|
|
@@ -213,7 +213,7 @@
|
|
|
213
213
|
{
|
|
214
214
|
"cell_type": "code",
|
|
215
215
|
"execution_count": null,
|
|
216
|
-
"id": "
|
|
216
|
+
"id": "3c99a9fe",
|
|
217
217
|
"metadata": {},
|
|
218
218
|
"outputs": [],
|
|
219
219
|
"source": [
|
|
@@ -294,7 +294,7 @@
|
|
|
294
294
|
},
|
|
295
295
|
{
|
|
296
296
|
"cell_type": "markdown",
|
|
297
|
-
"id": "
|
|
297
|
+
"id": "9028e845",
|
|
298
298
|
"metadata": {},
|
|
299
299
|
"source": [
|
|
300
300
|
"Next, let's add samplers to generate data related to the customer and their review.\n"
|
|
@@ -303,7 +303,7 @@
|
|
|
303
303
|
{
|
|
304
304
|
"cell_type": "code",
|
|
305
305
|
"execution_count": null,
|
|
306
|
-
"id": "
|
|
306
|
+
"id": "b64910fc",
|
|
307
307
|
"metadata": {},
|
|
308
308
|
"outputs": [],
|
|
309
309
|
"source": [
|
|
@@ -340,7 +340,7 @@
|
|
|
340
340
|
},
|
|
341
341
|
{
|
|
342
342
|
"cell_type": "markdown",
|
|
343
|
-
"id": "
|
|
343
|
+
"id": "3c56ff50",
|
|
344
344
|
"metadata": {},
|
|
345
345
|
"source": [
|
|
346
346
|
"## 🦜 LLM-generated columns\n",
|
|
@@ -355,7 +355,7 @@
|
|
|
355
355
|
{
|
|
356
356
|
"cell_type": "code",
|
|
357
357
|
"execution_count": null,
|
|
358
|
-
"id": "
|
|
358
|
+
"id": "db03dbae",
|
|
359
359
|
"metadata": {},
|
|
360
360
|
"outputs": [],
|
|
361
361
|
"source": [
|
|
@@ -391,7 +391,7 @@
|
|
|
391
391
|
},
|
|
392
392
|
{
|
|
393
393
|
"cell_type": "markdown",
|
|
394
|
-
"id": "
|
|
394
|
+
"id": "ee137104",
|
|
395
395
|
"metadata": {},
|
|
396
396
|
"source": [
|
|
397
397
|
"### 🔁 Iteration is key – preview the dataset!\n",
|
|
@@ -408,7 +408,7 @@
|
|
|
408
408
|
{
|
|
409
409
|
"cell_type": "code",
|
|
410
410
|
"execution_count": null,
|
|
411
|
-
"id": "
|
|
411
|
+
"id": "d318e88f",
|
|
412
412
|
"metadata": {},
|
|
413
413
|
"outputs": [],
|
|
414
414
|
"source": [
|
|
@@ -418,7 +418,7 @@
|
|
|
418
418
|
{
|
|
419
419
|
"cell_type": "code",
|
|
420
420
|
"execution_count": null,
|
|
421
|
-
"id": "
|
|
421
|
+
"id": "a6327bd4",
|
|
422
422
|
"metadata": {},
|
|
423
423
|
"outputs": [],
|
|
424
424
|
"source": [
|
|
@@ -429,7 +429,7 @@
|
|
|
429
429
|
{
|
|
430
430
|
"cell_type": "code",
|
|
431
431
|
"execution_count": null,
|
|
432
|
-
"id": "
|
|
432
|
+
"id": "9d9f8ba0",
|
|
433
433
|
"metadata": {},
|
|
434
434
|
"outputs": [],
|
|
435
435
|
"source": [
|
|
@@ -439,7 +439,7 @@
|
|
|
439
439
|
},
|
|
440
440
|
{
|
|
441
441
|
"cell_type": "markdown",
|
|
442
|
-
"id": "
|
|
442
|
+
"id": "5b1df1d7",
|
|
443
443
|
"metadata": {},
|
|
444
444
|
"source": [
|
|
445
445
|
"### 📊 Analyze the generated data\n",
|
|
@@ -452,7 +452,7 @@
|
|
|
452
452
|
{
|
|
453
453
|
"cell_type": "code",
|
|
454
454
|
"execution_count": null,
|
|
455
|
-
"id": "
|
|
455
|
+
"id": "0c19da33",
|
|
456
456
|
"metadata": {},
|
|
457
457
|
"outputs": [],
|
|
458
458
|
"source": [
|
|
@@ -462,7 +462,7 @@
|
|
|
462
462
|
},
|
|
463
463
|
{
|
|
464
464
|
"cell_type": "markdown",
|
|
465
|
-
"id": "
|
|
465
|
+
"id": "0e35f0f5",
|
|
466
466
|
"metadata": {},
|
|
467
467
|
"source": [
|
|
468
468
|
"### 🆙 Scale up!\n",
|
|
@@ -475,7 +475,7 @@
|
|
|
475
475
|
{
|
|
476
476
|
"cell_type": "code",
|
|
477
477
|
"execution_count": null,
|
|
478
|
-
"id": "
|
|
478
|
+
"id": "07bdf600",
|
|
479
479
|
"metadata": {},
|
|
480
480
|
"outputs": [],
|
|
481
481
|
"source": [
|
|
@@ -485,7 +485,7 @@
|
|
|
485
485
|
{
|
|
486
486
|
"cell_type": "code",
|
|
487
487
|
"execution_count": null,
|
|
488
|
-
"id": "
|
|
488
|
+
"id": "27959f17",
|
|
489
489
|
"metadata": {},
|
|
490
490
|
"outputs": [],
|
|
491
491
|
"source": [
|
|
@@ -498,7 +498,7 @@
|
|
|
498
498
|
{
|
|
499
499
|
"cell_type": "code",
|
|
500
500
|
"execution_count": null,
|
|
501
|
-
"id": "
|
|
501
|
+
"id": "953b6749",
|
|
502
502
|
"metadata": {},
|
|
503
503
|
"outputs": [],
|
|
504
504
|
"source": [
|
|
@@ -510,7 +510,7 @@
|
|
|
510
510
|
},
|
|
511
511
|
{
|
|
512
512
|
"cell_type": "markdown",
|
|
513
|
-
"id": "
|
|
513
|
+
"id": "471fb9a5",
|
|
514
514
|
"metadata": {},
|
|
515
515
|
"source": [
|
|
516
516
|
"## ⏭️ Next Steps\n",
|