data-designer 0.2.1__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer-0.2.1 → data_designer-0.2.2}/PKG-INFO +27 -29
- {data_designer-0.2.1 → data_designer-0.2.2}/pyproject.toml +44 -45
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/_version.py +2 -2
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/config_builder.py +22 -1
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/base.py +11 -8
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/python.py +28 -25
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_config_builder.py +95 -8
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/conftest.py +1 -1
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_llm_completion_generators.py +1 -1
- {data_designer-0.2.1 → data_designer-0.2.2}/uv.lock +1255 -1090
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/build-docs.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/build-notebooks.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/check-colab-notebooks.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/ci.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/pack-tutorials.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.gitignore +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/.pre-commit-config.yaml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/AGENTS.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/CLAUDE.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/CONTRIBUTING.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/DCO +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/LICENSE +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/Makefile +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/README.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/VERSIONING.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/analysis.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/models.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/processors.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/colab_notebooks/1-the-basics.ipynb +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/columns.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/custom-model-settings.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/default-model-settings.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/inference-parameters.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/model-configs.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/models/model-providers.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/person_sampling.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/processors.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/concepts/validators.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/css/style.css +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/index.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/installation.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/js/toc-toggle.js +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/1-the-basics.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/4-providing-images-as-context.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/README.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/_README.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/notebook_source/_pyproject.toml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/overrides/main.html +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/plugins/available.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/plugins/example.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/plugins/overview.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/quick-start.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/recipes/cards.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/recipes/code_generation/text_to_python.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/recipes/code_generation/text_to_sql.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/docs/scripts/generate_colab_notebooks.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/mkdocs.yml +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/scripts/update_license_headers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/README.md +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/download.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/list.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/models.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/providers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/commands/reset.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/controllers/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/controllers/download_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/controllers/model_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/controllers/provider_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/field.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/form.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/model_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/forms/provider_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/main.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/repositories/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/repositories/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/repositories/model_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/repositories/persona_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/repositories/provider_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/services/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/services/download_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/services/model_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/services/provider_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/ui.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/cli/utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/column_profilers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/column_statistics.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/analysis/utils/reporting.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/column_configs.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/column_types.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/datastore.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/default_model_settings.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/exports.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/interface.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/models.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/preview_results.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/processors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/sampler_constraints.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/sampler_params.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/seed.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/code_lang.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/constants.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/info.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/io_helpers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/misc.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/numerical_helpers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/type_helpers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/validation.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/utils/visualization.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/config/validator_params.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/llm_completion.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/facade.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/telemetry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/resources/seed_dataset_data_store.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/essentials/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/interface/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/interface/data_designer.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/interface/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/interface/results.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/logging.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/plugin_manager.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/plugins/errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/plugins/plugin.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/src/data_designer/plugins/registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/commands/test_download_command.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/controllers/test_download_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/controllers/test_model_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/forms/test_field.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/forms/test_model_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/repositories/test_model_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/repositories/test_persona_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/services/test_download_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/services/test_model_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/cli/test_cli_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/analysis/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_columns.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_datastore.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_default_model_settings.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_models.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_processors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_seed.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_misc.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_validation.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/config/utils/test_visualization.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/column_profilers/test_base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_embedding.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_facade.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/models/test_usage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/processors/test_schema_transform.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/processing/test_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/registry/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/__init__.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/test_configurable_task.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/essentials/test_init.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/interface/test_data_designer.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/interface/test_results.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/plugins/test_plugin_registry.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/test_logging.py +0 -0
- {data_designer-0.2.1 → data_designer-0.2.2}/tests/test_plugin_manager.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -15,36 +15,34 @@ Classifier: Programming Language :: Python :: 3.13
|
|
|
15
15
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
16
|
Classifier: Topic :: Software Development
|
|
17
17
|
Requires-Python: >=3.10
|
|
18
|
-
Requires-Dist: anyascii<1
|
|
19
|
-
Requires-Dist:
|
|
20
|
-
Requires-Dist:
|
|
21
|
-
Requires-Dist:
|
|
22
|
-
Requires-Dist: httpx
|
|
23
|
-
Requires-Dist:
|
|
24
|
-
Requires-Dist: huggingface-hub>=0.34.4
|
|
18
|
+
Requires-Dist: anyascii<1,>=0.3.3
|
|
19
|
+
Requires-Dist: duckdb<2,>=1.1.3
|
|
20
|
+
Requires-Dist: faker<21,>=20.1.0
|
|
21
|
+
Requires-Dist: httpx-retries<1,>=0.4.2
|
|
22
|
+
Requires-Dist: httpx<1,>=0.27.2
|
|
23
|
+
Requires-Dist: huggingface-hub<2,>=1.0.1
|
|
25
24
|
Requires-Dist: jinja2<4,>=3.1.6
|
|
26
|
-
Requires-Dist: json-repair
|
|
27
|
-
Requires-Dist: jsonpath-rust-bindings
|
|
28
|
-
Requires-Dist: litellm
|
|
29
|
-
Requires-Dist: lxml
|
|
30
|
-
Requires-Dist: marko
|
|
31
|
-
Requires-Dist: networkx
|
|
32
|
-
Requires-Dist: numpy
|
|
33
|
-
Requires-Dist: pandas
|
|
34
|
-
Requires-Dist: prompt-toolkit
|
|
35
|
-
Requires-Dist: pyarrow
|
|
36
|
-
Requires-Dist: pydantic
|
|
37
|
-
Requires-Dist:
|
|
38
|
-
Requires-Dist:
|
|
39
|
-
Requires-Dist:
|
|
40
|
-
Requires-Dist: pyyaml>=6.0.1
|
|
25
|
+
Requires-Dist: json-repair<1,>=0.48.0
|
|
26
|
+
Requires-Dist: jsonpath-rust-bindings<2,>=1.0
|
|
27
|
+
Requires-Dist: litellm<2,>=1.73.6
|
|
28
|
+
Requires-Dist: lxml<7,>=6.0.2
|
|
29
|
+
Requires-Dist: marko<3,>=2.1.2
|
|
30
|
+
Requires-Dist: networkx<4,>=3.0
|
|
31
|
+
Requires-Dist: numpy<3,>=1.23.5
|
|
32
|
+
Requires-Dist: pandas<3,>=2.3.3
|
|
33
|
+
Requires-Dist: prompt-toolkit<4,>=3.0.0
|
|
34
|
+
Requires-Dist: pyarrow<20,>=19.0.1
|
|
35
|
+
Requires-Dist: pydantic[email]<3,>=2.9.2
|
|
36
|
+
Requires-Dist: pygments<3,>=2.19.2
|
|
37
|
+
Requires-Dist: python-json-logger<4,>=3
|
|
38
|
+
Requires-Dist: pyyaml<7,>=6.0.1
|
|
41
39
|
Requires-Dist: requests<3,>=2.32.2
|
|
42
|
-
Requires-Dist: rich
|
|
43
|
-
Requires-Dist: ruff
|
|
44
|
-
Requires-Dist: scipy
|
|
45
|
-
Requires-Dist: sqlfluff
|
|
46
|
-
Requires-Dist: tiktoken
|
|
47
|
-
Requires-Dist: typer
|
|
40
|
+
Requires-Dist: rich<14,>=13.7.1
|
|
41
|
+
Requires-Dist: ruff<1,>=0.14.10
|
|
42
|
+
Requires-Dist: scipy<2,>=1.11.0
|
|
43
|
+
Requires-Dist: sqlfluff<4,>=3.2.0
|
|
44
|
+
Requires-Dist: tiktoken<1,>=0.8.0
|
|
45
|
+
Requires-Dist: typer<1,>=0.12.0
|
|
48
46
|
Description-Content-Type: text/markdown
|
|
49
47
|
|
|
50
48
|
# 🎨 NeMo Data Designer
|
|
@@ -20,36 +20,34 @@ classifiers = [
|
|
|
20
20
|
]
|
|
21
21
|
|
|
22
22
|
dependencies = [
|
|
23
|
-
"
|
|
23
|
+
"anyascii>=0.3.3,<1",
|
|
24
|
+
"duckdb>=1.1.3,<2",
|
|
25
|
+
"faker>=20.1.0,<21",
|
|
26
|
+
"httpx-retries>=0.4.2,<1",
|
|
27
|
+
"httpx>=0.27.2,<1",
|
|
28
|
+
"huggingface-hub>=1.0.1,<2",
|
|
24
29
|
"jinja2>=3.1.6,<4",
|
|
25
|
-
"
|
|
26
|
-
"
|
|
27
|
-
"
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"
|
|
32
|
-
"
|
|
30
|
+
"json-repair>=0.48.0,<1",
|
|
31
|
+
"jsonpath-rust-bindings>=1.0,<2",
|
|
32
|
+
"litellm>=1.73.6,<2",
|
|
33
|
+
"lxml>=6.0.2,<7",
|
|
34
|
+
"marko>=2.1.2,<3",
|
|
35
|
+
"networkx>=3.0,<4",
|
|
36
|
+
"numpy>=1.23.5,<3",
|
|
37
|
+
"pandas>=2.3.3,<3",
|
|
38
|
+
"prompt-toolkit>=3.0.0,<4",
|
|
39
|
+
"pyarrow>=19.0.1,<20",
|
|
40
|
+
"pydantic[email]>=2.9.2,<3",
|
|
41
|
+
"pygments>=2.19.2,<3",
|
|
42
|
+
"python-json-logger>=3,<4",
|
|
43
|
+
"pyyaml>=6.0.1,<7",
|
|
33
44
|
"requests<3,>=2.32.2",
|
|
34
|
-
"rich>=13.7.1",
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"httpx>=0.27.2",
|
|
41
|
-
"httpx-retries>=0.4.2",
|
|
42
|
-
"json-repair==0.48.0",
|
|
43
|
-
"jsonpath-rust-bindings>=1.0",
|
|
44
|
-
"litellm==1.73.6",
|
|
45
|
-
"lxml>=6.0.2",
|
|
46
|
-
"marko==2.1.2",
|
|
47
|
-
"networkx==3.0",
|
|
48
|
-
"pydantic[email]>=2.9.2",
|
|
49
|
-
"scipy>=1.11.0",
|
|
50
|
-
"sqlfluff==3.2.0",
|
|
51
|
-
"tiktoken>=0.8.0",
|
|
52
|
-
"ruff==0.12.3",
|
|
45
|
+
"rich>=13.7.1,<14",
|
|
46
|
+
"ruff>=0.14.10,<1",
|
|
47
|
+
"scipy>=1.11.0,<2",
|
|
48
|
+
"sqlfluff>=3.2.0,<4",
|
|
49
|
+
"tiktoken>=0.8.0,<1",
|
|
50
|
+
"typer>=0.12.0,<1",
|
|
53
51
|
]
|
|
54
52
|
|
|
55
53
|
[project.scripts]
|
|
@@ -57,27 +55,28 @@ data-designer = "data_designer.cli:main"
|
|
|
57
55
|
|
|
58
56
|
[dependency-groups]
|
|
59
57
|
dev = [
|
|
60
|
-
"jsonpath-ng
|
|
61
|
-
"pre-commit>=4.0.0",
|
|
62
|
-
"pytest>=
|
|
63
|
-
"pytest-
|
|
64
|
-
"pytest-
|
|
65
|
-
"pytest-
|
|
66
|
-
"pytest
|
|
58
|
+
"jsonpath-ng>=1.5.3,<2",
|
|
59
|
+
"pre-commit>=4.0.0,<5",
|
|
60
|
+
"pytest-asyncio>=0.24.0,<1",
|
|
61
|
+
"pytest-cov>=7.0.0,<8",
|
|
62
|
+
"pytest-env>=1.2.0,<2",
|
|
63
|
+
"pytest-httpx>=0.35.0,<1",
|
|
64
|
+
"pytest>=8.3.3,<9",
|
|
67
65
|
]
|
|
68
66
|
docs = [
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"mkdocs-material>=9.6.22",
|
|
73
|
-
"mkdocs-
|
|
74
|
-
"mkdocs
|
|
75
|
-
"
|
|
67
|
+
"datasets>=4.0.0,<5",
|
|
68
|
+
"mike>=2.1.3,<3",
|
|
69
|
+
"mkdocs-jupyter>=0.25.1,<1",
|
|
70
|
+
"mkdocs-material>=9.6.22,<10",
|
|
71
|
+
"mkdocs-redirects>=1.2.2,<2",
|
|
72
|
+
"mkdocs>=1.6.1,<2",
|
|
73
|
+
"mkdocstrings-python>=1.18.2,<2",
|
|
74
|
+
"mkdocstrings>=0.30.1,<1",
|
|
76
75
|
]
|
|
77
76
|
notebooks = [
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"pillow>=12.0.0",
|
|
77
|
+
"ipykernel>=6.29.0,<7",
|
|
78
|
+
"jupyter>=1.0.0,<2",
|
|
79
|
+
"pillow>=12.0.0,<13",
|
|
81
80
|
]
|
|
82
81
|
|
|
83
82
|
[build-system]
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.2.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 2,
|
|
31
|
+
__version__ = version = '0.2.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 2, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -224,6 +224,9 @@ class DataDesignerConfigBuilder:
|
|
|
224
224
|
|
|
225
225
|
Returns:
|
|
226
226
|
The current Data Designer config builder instance.
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
BuilderConfigurationError: If the column name collides with an existing seed dataset column.
|
|
227
230
|
"""
|
|
228
231
|
if column_config is None:
|
|
229
232
|
if name is None or column_type is None:
|
|
@@ -240,6 +243,13 @@ class DataDesignerConfigBuilder:
|
|
|
240
243
|
f"{', '.join([t.__name__ for t in allowed_column_configs])}"
|
|
241
244
|
)
|
|
242
245
|
|
|
246
|
+
existing_config = self._column_configs.get(column_config.name)
|
|
247
|
+
if existing_config is not None and isinstance(existing_config, SeedDatasetColumnConfig):
|
|
248
|
+
raise BuilderConfigurationError(
|
|
249
|
+
f"🛑 Column {column_config.name!r} already exists as a seed dataset column. "
|
|
250
|
+
"Please use a different column name or update the seed dataset."
|
|
251
|
+
)
|
|
252
|
+
|
|
243
253
|
self._column_configs[column_config.name] = column_config
|
|
244
254
|
return self
|
|
245
255
|
|
|
@@ -578,7 +588,18 @@ class DataDesignerConfigBuilder:
|
|
|
578
588
|
|
|
579
589
|
Returns:
|
|
580
590
|
The current Data Designer config builder instance.
|
|
591
|
+
|
|
592
|
+
Raises:
|
|
593
|
+
BuilderConfigurationError: If any seed dataset column name collides with an existing column.
|
|
581
594
|
"""
|
|
595
|
+
seed_column_names = fetch_seed_dataset_column_names(dataset_reference)
|
|
596
|
+
colliding_columns = [name for name in seed_column_names if name in self._column_configs]
|
|
597
|
+
if colliding_columns:
|
|
598
|
+
raise BuilderConfigurationError(
|
|
599
|
+
f"🛑 Seed dataset column(s) {colliding_columns} collide with existing column(s). "
|
|
600
|
+
"Please remove the conflicting columns or use a seed dataset with different column names."
|
|
601
|
+
)
|
|
602
|
+
|
|
582
603
|
self._seed_config = SeedConfig(
|
|
583
604
|
dataset=dataset_reference.dataset,
|
|
584
605
|
sampling_strategy=sampling_strategy,
|
|
@@ -587,7 +608,7 @@ class DataDesignerConfigBuilder:
|
|
|
587
608
|
self.set_seed_datastore_settings(
|
|
588
609
|
dataset_reference.datastore_settings if hasattr(dataset_reference, "datastore_settings") else None
|
|
589
610
|
)
|
|
590
|
-
for column_name in
|
|
611
|
+
for column_name in seed_column_names:
|
|
591
612
|
self._column_configs[column_name] = SeedDatasetColumnConfig(name=column_name)
|
|
592
613
|
return self
|
|
593
614
|
|
|
@@ -1,23 +1,27 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
4
6
|
import functools
|
|
5
7
|
import logging
|
|
6
8
|
from abc import ABC, abstractmethod
|
|
7
|
-
from
|
|
9
|
+
from enum import Enum
|
|
10
|
+
from typing import TYPE_CHECKING, overload
|
|
8
11
|
|
|
9
12
|
import pandas as pd
|
|
10
13
|
|
|
11
|
-
from data_designer.config.column_types import COLUMN_TYPE_EMOJI_MAP
|
|
12
|
-
from data_designer.config.models import BaseInferenceParams, ModelConfig
|
|
13
|
-
from data_designer.config.utils.type_helpers import StrEnum
|
|
14
14
|
from data_designer.engine.configurable_task import ConfigurableTask, ConfigurableTaskMetadata, DataT, TaskConfigT
|
|
15
|
-
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from data_designer.config.models import BaseInferenceParams, ModelConfig
|
|
18
|
+
from data_designer.engine.models.facade import ModelFacade
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
logger = logging.getLogger(__name__)
|
|
18
22
|
|
|
19
23
|
|
|
20
|
-
class GenerationStrategy(
|
|
24
|
+
class GenerationStrategy(str, Enum):
|
|
21
25
|
CELL_BY_CELL = "cell_by_cell"
|
|
22
26
|
FULL_COLUMN = "full_column"
|
|
23
27
|
|
|
@@ -82,8 +86,7 @@ class WithModelGeneration:
|
|
|
82
86
|
return self.model_config.inference_parameters
|
|
83
87
|
|
|
84
88
|
def log_pre_generation(self) -> None:
|
|
85
|
-
|
|
86
|
-
logger.info(f"{emoji} Preparing {self.config.column_type} column generation")
|
|
89
|
+
logger.info(f"Preparing {self.config.column_type} column generation")
|
|
87
90
|
logger.info(f" |-- column name: {self.config.name!r}")
|
|
88
91
|
logger.info(f" |-- model config:\n{self.model_config.model_dump_json(indent=4)}")
|
|
89
92
|
if self.model_config.provider is None:
|
|
@@ -2,8 +2,8 @@
|
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
import ast
|
|
5
|
+
import json
|
|
5
6
|
import logging
|
|
6
|
-
import re
|
|
7
7
|
import subprocess
|
|
8
8
|
import tempfile
|
|
9
9
|
from collections import defaultdict
|
|
@@ -179,9 +179,8 @@ class PythonValidator(BaseValidator):
|
|
|
179
179
|
for file in Path(codebase_path).glob("*.py"):
|
|
180
180
|
processed[file.stem] = PythonLinterMessages()
|
|
181
181
|
|
|
182
|
-
# Run ruff linter
|
|
182
|
+
# Run ruff linter with JSON output
|
|
183
183
|
ruff_bin = find_ruff_bin()
|
|
184
|
-
env = {"NO_COLOR": "1"}
|
|
185
184
|
|
|
186
185
|
ruff_exec = subprocess.run(
|
|
187
186
|
[
|
|
@@ -189,9 +188,9 @@ class PythonValidator(BaseValidator):
|
|
|
189
188
|
"check",
|
|
190
189
|
"--select",
|
|
191
190
|
"E,F6,F7,F8,SIM,PLC,PLE,PLR,PLW",
|
|
191
|
+
"--output-format=json",
|
|
192
192
|
codebase_path,
|
|
193
193
|
],
|
|
194
|
-
env=env,
|
|
195
194
|
text=True,
|
|
196
195
|
capture_output=True,
|
|
197
196
|
check=False,
|
|
@@ -199,30 +198,34 @@ class PythonValidator(BaseValidator):
|
|
|
199
198
|
)
|
|
200
199
|
ruff_output = ruff_exec.stdout
|
|
201
200
|
|
|
202
|
-
# Parse
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
errors = re.findall(pattern, ruff_output)
|
|
201
|
+
# Parse JSON output
|
|
202
|
+
try:
|
|
203
|
+
diagnostics = json.loads(ruff_output)
|
|
204
|
+
except json.JSONDecodeError as e:
|
|
205
|
+
raise RuntimeError(f"Failed to parse ruff JSON output: {e}")
|
|
208
206
|
|
|
209
|
-
if
|
|
210
|
-
|
|
207
|
+
if not diagnostics:
|
|
208
|
+
return processed # no errors or warnings
|
|
211
209
|
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
210
|
+
for diagnostic in diagnostics:
|
|
211
|
+
filename = diagnostic["filename"]
|
|
212
|
+
code = diagnostic["code"]
|
|
213
|
+
location = diagnostic["location"]
|
|
214
|
+
message = diagnostic["message"]
|
|
215
|
+
|
|
216
|
+
# Extract alphabetic prefix from code for type mapping
|
|
217
|
+
alpha_prefix = "".join(c for c in code if c.isalpha())
|
|
218
|
+
error_type = TYPE_FROM_SYMBOL.get(alpha_prefix, "warning")
|
|
219
|
+
|
|
220
|
+
processed[Path(filename).stem].add(
|
|
221
|
+
PythonLinterMessage(
|
|
222
|
+
type=error_type,
|
|
223
|
+
symbol=code,
|
|
224
|
+
line=location["row"],
|
|
225
|
+
column=location["column"],
|
|
226
|
+
message=message,
|
|
223
227
|
)
|
|
224
|
-
|
|
225
|
-
raise RuntimeError("ruff's output not in expected format")
|
|
228
|
+
)
|
|
226
229
|
|
|
227
230
|
return processed
|
|
228
231
|
|
|
@@ -42,14 +42,14 @@ class DummyStructuredModel(BaseModel):
|
|
|
42
42
|
@pytest.fixture
|
|
43
43
|
def mock_fetch_seed_dataset_column_names():
|
|
44
44
|
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch_seed:
|
|
45
|
-
mock_fetch_seed.return_value = ["id", "name", "
|
|
45
|
+
mock_fetch_seed.return_value = ["id", "name", "city", "country"]
|
|
46
46
|
yield mock_fetch_seed
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
@pytest.fixture
|
|
50
50
|
def stub_data_designer_builder(stub_data_designer_builder_config_str):
|
|
51
51
|
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch_seed:
|
|
52
|
-
mock_fetch_seed.return_value = ["id", "name", "
|
|
52
|
+
mock_fetch_seed.return_value = ["id", "name", "city", "country"]
|
|
53
53
|
yield DataDesignerConfigBuilder.from_config(config=stub_data_designer_builder_config_str)
|
|
54
54
|
|
|
55
55
|
|
|
@@ -404,25 +404,25 @@ def test_delete_constraints(stub_data_designer_builder):
|
|
|
404
404
|
|
|
405
405
|
|
|
406
406
|
def test_delete_column(stub_data_designer_builder):
|
|
407
|
-
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) ==
|
|
407
|
+
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) == 4
|
|
408
408
|
stub_data_designer_builder.delete_column(column_name="code_id")
|
|
409
|
-
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) ==
|
|
409
|
+
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) == 3
|
|
410
410
|
|
|
411
411
|
with pytest.raises(
|
|
412
412
|
BuilderConfigurationError, match="Seed columns cannot be deleted. Please update the seed dataset instead."
|
|
413
413
|
):
|
|
414
|
-
stub_data_designer_builder.delete_column(column_name="
|
|
414
|
+
stub_data_designer_builder.delete_column(column_name="id")
|
|
415
415
|
|
|
416
416
|
|
|
417
417
|
def test_getters(stub_data_designer_builder):
|
|
418
|
-
assert len(stub_data_designer_builder.get_column_configs()) ==
|
|
418
|
+
assert len(stub_data_designer_builder.get_column_configs()) == 12
|
|
419
419
|
assert stub_data_designer_builder.get_column_config(name="code_id").name == "code_id"
|
|
420
420
|
assert len(stub_data_designer_builder.get_constraints(target_column="age")) == 1
|
|
421
421
|
assert len(stub_data_designer_builder.get_llm_gen_columns()) == 3
|
|
422
|
-
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) ==
|
|
422
|
+
assert len(stub_data_designer_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) == 4
|
|
423
423
|
assert len(stub_data_designer_builder.get_columns_excluding_type(DataDesignerColumnType.SAMPLER)) == 8
|
|
424
424
|
assert stub_data_designer_builder.get_seed_config().dataset == "test-repo/testing/data.csv"
|
|
425
|
-
assert stub_data_designer_builder.num_columns_of_type(DataDesignerColumnType.SAMPLER) ==
|
|
425
|
+
assert stub_data_designer_builder.num_columns_of_type(DataDesignerColumnType.SAMPLER) == 4
|
|
426
426
|
|
|
427
427
|
|
|
428
428
|
def test_write_config(stub_data_designer_builder):
|
|
@@ -759,3 +759,90 @@ def test_delete_model_config(stub_empty_builder):
|
|
|
759
759
|
|
|
760
760
|
assert result is stub_empty_builder
|
|
761
761
|
assert len(stub_empty_builder.model_configs) == 2
|
|
762
|
+
|
|
763
|
+
|
|
764
|
+
def test_add_column_collision_with_seed_dataset(stub_empty_builder: DataDesignerConfigBuilder) -> None:
|
|
765
|
+
"""Test that adding a column that collides with a seed dataset column raises an error."""
|
|
766
|
+
datastore_settings = DatastoreSettings(endpoint="https://huggingface.co", token="test-token")
|
|
767
|
+
|
|
768
|
+
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch:
|
|
769
|
+
mock_fetch.return_value = ["id", "name", "age"]
|
|
770
|
+
stub_empty_builder.with_seed_dataset(
|
|
771
|
+
DatastoreSeedDatasetReference(dataset="test-repo/test-data.parquet", datastore_settings=datastore_settings)
|
|
772
|
+
)
|
|
773
|
+
|
|
774
|
+
with pytest.raises(
|
|
775
|
+
BuilderConfigurationError,
|
|
776
|
+
match="Column 'id' already exists as a seed dataset column",
|
|
777
|
+
):
|
|
778
|
+
stub_empty_builder.add_column(
|
|
779
|
+
name="id",
|
|
780
|
+
column_type=DataDesignerColumnType.SAMPLER,
|
|
781
|
+
sampler_type=SamplerType.UUID,
|
|
782
|
+
)
|
|
783
|
+
|
|
784
|
+
with pytest.raises(
|
|
785
|
+
BuilderConfigurationError,
|
|
786
|
+
match="Column 'name' already exists as a seed dataset column",
|
|
787
|
+
):
|
|
788
|
+
stub_empty_builder.add_column(
|
|
789
|
+
LLMTextColumnConfig(
|
|
790
|
+
name="name",
|
|
791
|
+
prompt="Write a name",
|
|
792
|
+
model_alias="stub-model",
|
|
793
|
+
)
|
|
794
|
+
)
|
|
795
|
+
|
|
796
|
+
|
|
797
|
+
def test_with_seed_dataset_collision_with_existing_columns(stub_empty_builder: DataDesignerConfigBuilder) -> None:
|
|
798
|
+
"""Test that adding a seed dataset with columns that collide with existing columns raises an error."""
|
|
799
|
+
stub_empty_builder.add_column(
|
|
800
|
+
name="name",
|
|
801
|
+
column_type=DataDesignerColumnType.LLM_TEXT,
|
|
802
|
+
prompt="Write a name",
|
|
803
|
+
model_alias="stub-model",
|
|
804
|
+
)
|
|
805
|
+
stub_empty_builder.add_column(
|
|
806
|
+
name="age",
|
|
807
|
+
column_type=DataDesignerColumnType.SAMPLER,
|
|
808
|
+
sampler_type=SamplerType.UNIFORM,
|
|
809
|
+
params={"low": 1, "high": 100},
|
|
810
|
+
)
|
|
811
|
+
|
|
812
|
+
datastore_settings = DatastoreSettings(endpoint="https://huggingface.co", token="test-token")
|
|
813
|
+
|
|
814
|
+
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch:
|
|
815
|
+
mock_fetch.return_value = ["id", "name", "age", "city"]
|
|
816
|
+
with pytest.raises(
|
|
817
|
+
BuilderConfigurationError,
|
|
818
|
+
match=r"Seed dataset column\(s\) \['name', 'age'\] collide with existing column\(s\)",
|
|
819
|
+
):
|
|
820
|
+
stub_empty_builder.with_seed_dataset(
|
|
821
|
+
DatastoreSeedDatasetReference(
|
|
822
|
+
dataset="test-repo/test-data.parquet", datastore_settings=datastore_settings
|
|
823
|
+
)
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
assert stub_empty_builder.get_seed_config() is None
|
|
827
|
+
assert len(stub_empty_builder.get_columns_of_type(DataDesignerColumnType.SEED_DATASET)) == 0
|
|
828
|
+
|
|
829
|
+
|
|
830
|
+
def test_with_seed_dataset_no_collision(stub_empty_builder: DataDesignerConfigBuilder) -> None:
|
|
831
|
+
"""Test that adding a seed dataset with non-colliding columns works fine."""
|
|
832
|
+
stub_empty_builder.add_column(
|
|
833
|
+
name="unique_column",
|
|
834
|
+
column_type=DataDesignerColumnType.SAMPLER,
|
|
835
|
+
sampler_type=SamplerType.UUID,
|
|
836
|
+
)
|
|
837
|
+
|
|
838
|
+
datastore_settings = DatastoreSettings(endpoint="https://huggingface.co", token="test-token")
|
|
839
|
+
|
|
840
|
+
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch:
|
|
841
|
+
mock_fetch.return_value = ["id", "name", "age"]
|
|
842
|
+
stub_empty_builder.with_seed_dataset(
|
|
843
|
+
DatastoreSeedDatasetReference(dataset="test-repo/test-data.parquet", datastore_settings=datastore_settings)
|
|
844
|
+
)
|
|
845
|
+
|
|
846
|
+
assert stub_empty_builder.get_seed_config() is not None
|
|
847
|
+
assert len(stub_empty_builder.get_columns_of_type(DataDesignerColumnType.SEED_DATASET)) == 3
|
|
848
|
+
assert len(stub_empty_builder.get_columns_of_type(DataDesignerColumnType.SAMPLER)) == 1
|
|
@@ -164,7 +164,7 @@ def stub_empty_builder(stub_model_configs: list[ModelConfig]) -> DataDesignerCon
|
|
|
164
164
|
@pytest.fixture
|
|
165
165
|
def stub_complete_builder(stub_data_designer_builder_config_str: str) -> DataDesignerConfigBuilder:
|
|
166
166
|
with patch("data_designer.config.config_builder.fetch_seed_dataset_column_names") as mock_fetch:
|
|
167
|
-
mock_fetch.return_value = ["id", "name", "
|
|
167
|
+
mock_fetch.return_value = ["id", "name", "city", "country"]
|
|
168
168
|
return DataDesignerConfigBuilder.from_config(config=stub_data_designer_builder_config_str)
|
|
169
169
|
|
|
170
170
|
|
|
@@ -102,7 +102,7 @@ def test_log_pre_generation(mock_logger):
|
|
|
102
102
|
generator.log_pre_generation()
|
|
103
103
|
|
|
104
104
|
assert mock_logger.info.call_count == 3
|
|
105
|
-
mock_logger.info.assert_any_call("
|
|
105
|
+
mock_logger.info.assert_any_call("Preparing llm-text column generation")
|
|
106
106
|
mock_logger.info.assert_any_call(" |-- column name: 'test_column'")
|
|
107
107
|
mock_logger.info.assert_any_call(' |-- model config:\n{"test": "config"}')
|
|
108
108
|
|