data-designer 0.3.8__tar.gz → 0.3.8rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.gitignore +2 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/PKG-INFO +1 -1
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/recipes/cards.md +0 -3
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/mkdocs.yml +0 -1
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/_version.py +2 -2
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/default_model_settings.py +6 -14
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/run_config.py +0 -3
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/constants.py +0 -2
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/column_wise_builder.py +5 -2
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/concurrency.py +3 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +3 -7
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/interface/data_designer.py +5 -8
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/controllers/test_download_controller.py +3 -7
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/repositories/test_persona_repository.py +3 -3
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/services/test_download_service.py +1 -3
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_default_model_settings.py +6 -76
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/test_column_wise_builder.py +6 -4
- data_designer-0.3.8/.claude/agents/docs-searcher.md +0 -74
- data_designer-0.3.8/.claude/agents/github-searcher.md +0 -81
- data_designer-0.3.8/.claude/settings.json +0 -1
- data_designer-0.3.8/.claude/settings.local.json +0 -22
- data_designer-0.3.8/.claude/skills/new-sdg/SKILL.md +0 -117
- data_designer-0.3.8/.claude/skills/search-docs/SKILL.md +0 -16
- data_designer-0.3.8/.claude/skills/search-github/SKILL.md +0 -16
- data_designer-0.3.8/docs/code_reference/run_config.md +0 -6
- data_designer-0.3.8/packages/data-designer/src/data_designer/interface/_version.py +0 -34
- data_designer-0.3.8/packages/data-designer-config/src/data_designer/config/_version.py +0 -34
- data_designer-0.3.8/packages/data-designer-engine/src/data_designer/engine/_version.py +0 -34
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/ISSUE_TEMPLATE/development-task.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/build-docs.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/build-notebooks.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/check-colab-notebooks.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/ci.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/pack-tutorials.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/.pre-commit-config.yaml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/AGENTS.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/CLAUDE.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/CONTRIBUTING.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/DCO +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/LICENSE +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/Makefile +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/README.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/VERSIONING.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/analysis.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/models.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/processors.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/colab_notebooks/1-the-basics.ipynb +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/columns.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/custom-model-settings.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/default-model-settings.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/inference-parameters.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/model-configs.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/models/model-providers.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/person_sampling.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/processors.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/concepts/validators.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/css/style.css +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/images/top-models.png +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/index.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/installation.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/js/toc-toggle.js +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/1-the-basics.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/4-providing-images-as-context.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/README.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/_README.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/notebook_source/_pyproject.toml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/overrides/main.html +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/plugins/available.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/plugins/example.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/plugins/overview.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/quick-start.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/recipes/code_generation/text_to_python.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/recipes/code_generation/text_to_sql.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/docs/scripts/generate_colab_notebooks.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/pyproject.toml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/scripts/test_license_headers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/scripts/update_license_headers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/README.md +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/download.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/list.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/models.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/providers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/commands/reset.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/controllers/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/controllers/download_controller.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/controllers/model_controller.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/controllers/provider_controller.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/field.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/form.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/model_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/forms/provider_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/main.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/repositories/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/repositories/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/repositories/model_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/repositories/persona_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/repositories/provider_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/services/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/services/download_service.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/services/model_service.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/services/provider_service.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/ui.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/cli/utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/column_profilers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/column_statistics.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/analysis/utils/reporting.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/column_configs.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/column_types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/config_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/dataset_metadata.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/exports.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/interface.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/models.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/preview_results.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/processors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/sampler_constraints.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/sampler_params.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/seed.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/seed_source.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/seed_source_types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/code_lang.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/info.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/io_helpers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/misc.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/numerical_helpers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/type_helpers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/utils/visualization.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/validator_params.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/llm_completion.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/compiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/facade.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/factory.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/telemetry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/resources/seed_reader.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validation.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/essentials/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/interface/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/interface/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/interface/results.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/lazy_heavy_imports.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/logging.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugin_manager.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/plugin.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/testing/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/testing/stubs.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/plugins/testing/utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/commands/test_download_command.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/controllers/test_model_controller.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/forms/test_field.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/forms/test_model_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/repositories/test_model_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/services/test_model_service.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/test_cli_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/analysis/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_columns.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_config_builder.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_models.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_processors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_seed.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_seed_source.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_misc.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/config/utils/test_visualization.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/column_profilers/test_base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_embedding.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_llm_completion_generators.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_facade.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/models/test_usage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/processors/test_schema_transform.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/processing/test_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/registry/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/resources/test_seed_reader.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_compiler.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_configurable_task.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_dataset_metadata.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/test_validation.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/essentials/test_init.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/interface/test_data_designer.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/interface/test_results.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/plugins/test_plugin_registry.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/test_import_perf.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/test_logging.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/test_plugin_manager.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/pyproject.toml +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/config.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/impl.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/__init__.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/tests/test_e2e.py +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/tests_e2e/tests/test_seed.csv +0 -0
- {data_designer-0.3.8 → data_designer-0.3.8rc1}/uv.lock +0 -0
|
@@ -6,9 +6,6 @@ Each recipe is a self-contained example that can be run independently.
|
|
|
6
6
|
!!! question "New to Data Designer?"
|
|
7
7
|
Recipes provide working code for specific use cases without detailed explanations. If you're learning Data Designer for the first time, we recommend starting with our [tutorial notebooks](../../notebooks/), which offer step-by-step guidance and explain core concepts. Once you're familiar with the basics, return here for practical, ready-to-use implementations.
|
|
8
8
|
|
|
9
|
-
!!! tip Prerequisite
|
|
10
|
-
These recipes use the Open AI model provider by default. Ensure your OpenAI model provider has been set up using the Data Designer CLI before running a recipe.
|
|
11
|
-
|
|
12
9
|
<div class="grid cards" markdown>
|
|
13
10
|
|
|
14
11
|
- :material-snake:{ .lg .middle } **Text to Python**
|
|
@@ -43,7 +43,6 @@ nav:
|
|
|
43
43
|
- column_configs: code_reference/column_configs.md
|
|
44
44
|
- config_builder: code_reference/config_builder.md
|
|
45
45
|
- data_designer_config: code_reference/data_designer_config.md
|
|
46
|
-
- run_config: code_reference/run_config.md
|
|
47
46
|
- sampler_params: code_reference/sampler_params.md
|
|
48
47
|
- validator_params: code_reference/validator_params.md
|
|
49
48
|
- processors: code_reference/processors.md
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3, 8)
|
|
31
|
+
__version__ = version = '0.3.8rc1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc1')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
{data_designer-0.3.8 → data_designer-0.3.8rc1}/src/data_designer/config/default_model_settings.py
RENAMED
|
@@ -71,20 +71,12 @@ def get_default_model_configs() -> list[ModelConfig]:
|
|
|
71
71
|
return []
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
providers_with_missing_keys.append(provider)
|
|
81
|
-
elif provider.api_key.isupper() and "_" in provider.api_key:
|
|
82
|
-
# Looks like an environment variable name, check if it's set
|
|
83
|
-
if os.environ.get(provider.api_key) is None:
|
|
84
|
-
providers_with_missing_keys.append(provider)
|
|
85
|
-
# else: It's an actual API key value (not an env var), so it's valid
|
|
86
|
-
|
|
87
|
-
return providers_with_missing_keys
|
|
74
|
+
def get_default_model_providers_missing_api_keys() -> list[str]:
|
|
75
|
+
missing_api_keys = []
|
|
76
|
+
for predefined_provider in PREDEFINED_PROVIDERS:
|
|
77
|
+
if os.environ.get(predefined_provider["api_key"]) is None:
|
|
78
|
+
missing_api_keys.append(predefined_provider["api_key"])
|
|
79
|
+
return missing_api_keys
|
|
88
80
|
|
|
89
81
|
|
|
90
82
|
def get_default_providers() -> list[ModelProvider]:
|
|
@@ -26,8 +26,6 @@ class RunConfig(ConfigBase):
|
|
|
26
26
|
buffer_size: Number of records to process in each batch during dataset generation.
|
|
27
27
|
A batch is processed end-to-end (column generation, post-batch processors, and writing the batch
|
|
28
28
|
to artifact storage) before moving on to the next batch. Must be > 0. Default is 1000.
|
|
29
|
-
non_inference_max_parallel_workers: Maximum number of worker threads used for non-inference
|
|
30
|
-
cell-by-cell generators. Must be >= 1. Default is 4.
|
|
31
29
|
max_conversation_restarts: Maximum number of full conversation restarts permitted when
|
|
32
30
|
generation tasks call `ModelFacade.generate(...)`. Must be >= 0. Default is 5.
|
|
33
31
|
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
@@ -39,7 +37,6 @@ class RunConfig(ConfigBase):
|
|
|
39
37
|
shutdown_error_rate: float = Field(default=0.5, ge=0.0, le=1.0)
|
|
40
38
|
shutdown_error_window: int = Field(default=10, ge=0)
|
|
41
39
|
buffer_size: int = Field(default=1000, gt=0)
|
|
42
|
-
non_inference_max_parallel_workers: int = Field(default=4, ge=1)
|
|
43
40
|
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
44
41
|
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
45
42
|
|
|
@@ -353,11 +353,9 @@ PREDEFINED_PROVIDERS_MODEL_MAP = {
|
|
|
353
353
|
NEMOTRON_PERSONAS_DATASET_SIZES = {
|
|
354
354
|
"en_US": "1.24 GB",
|
|
355
355
|
"en_IN": "2.39 GB",
|
|
356
|
-
"en_SG": "0.30 GB",
|
|
357
356
|
"hi_Deva_IN": "4.14 GB",
|
|
358
357
|
"hi_Latn_IN": "2.7 GB",
|
|
359
358
|
"ja_JP": "1.69 GB",
|
|
360
|
-
"pt_BR": "2.33 GB",
|
|
361
359
|
}
|
|
362
360
|
|
|
363
361
|
LOCALES_WITH_MANAGED_DATASETS = list[str](NEMOTRON_PERSONAS_DATASET_SIZES.keys())
|
|
@@ -31,7 +31,10 @@ from data_designer.engine.compiler import compile_data_designer_config
|
|
|
31
31
|
from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage
|
|
32
32
|
from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError
|
|
33
33
|
from data_designer.engine.dataset_builders.multi_column_configs import MultiColumnConfig
|
|
34
|
-
from data_designer.engine.dataset_builders.utils.concurrency import
|
|
34
|
+
from data_designer.engine.dataset_builders.utils.concurrency import (
|
|
35
|
+
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR,
|
|
36
|
+
ConcurrentThreadExecutor,
|
|
37
|
+
)
|
|
35
38
|
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
36
39
|
from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
|
|
37
40
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
|
|
@@ -199,7 +202,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
199
202
|
self.batch_manager.add_records(df.to_dict(orient="records"))
|
|
200
203
|
|
|
201
204
|
def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None:
|
|
202
|
-
max_workers =
|
|
205
|
+
max_workers = MAX_CONCURRENCY_PER_NON_LLM_GENERATOR
|
|
203
206
|
if isinstance(generator, ColumnGeneratorWithModel):
|
|
204
207
|
max_workers = generator.inference_parameters.max_parallel_requests
|
|
205
208
|
self._fan_out_with_threads(generator, max_workers=max_workers)
|
|
@@ -16,6 +16,9 @@ from data_designer.engine.errors import DataDesignerRuntimeError, ErrorTrap
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
+
# Constants
|
|
20
|
+
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR = 4
|
|
21
|
+
|
|
19
22
|
|
|
20
23
|
class ExecutorResults(BaseModel):
|
|
21
24
|
failure_threshold: float = 0.0 # Error rate threshold
|
|
@@ -40,16 +40,13 @@ PII_FIELDS = [
|
|
|
40
40
|
"state",
|
|
41
41
|
"email_address",
|
|
42
42
|
"phone_number",
|
|
43
|
-
# Brazil-specific fields
|
|
44
|
-
"race",
|
|
45
43
|
# Japan-specific fields
|
|
46
44
|
"area",
|
|
47
45
|
"prefecture",
|
|
48
46
|
"zone",
|
|
49
|
-
# Brazil and India shared fields
|
|
50
|
-
"religion",
|
|
51
47
|
# India-specific fields
|
|
52
48
|
"district",
|
|
49
|
+
"religion",
|
|
53
50
|
"education_degree",
|
|
54
51
|
"first_language",
|
|
55
52
|
"second_language",
|
|
@@ -81,10 +78,9 @@ PERSONA_FIELDS = [
|
|
|
81
78
|
# Japan-specific persona fields
|
|
82
79
|
"aspects",
|
|
83
80
|
"digital_skills",
|
|
84
|
-
# Brazil and India shared persona fields
|
|
85
|
-
"religious_persona",
|
|
86
|
-
"religious_background",
|
|
87
81
|
# India-specific persona fields
|
|
88
82
|
"linguistic_persona",
|
|
83
|
+
"religious_persona",
|
|
89
84
|
"linguistic_background",
|
|
85
|
+
"religious_background",
|
|
90
86
|
]
|
|
@@ -12,9 +12,9 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
|
12
12
|
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
13
13
|
from data_designer.config.default_model_settings import (
|
|
14
14
|
get_default_model_configs,
|
|
15
|
+
get_default_model_providers_missing_api_keys,
|
|
15
16
|
get_default_provider_name,
|
|
16
17
|
get_default_providers,
|
|
17
|
-
get_providers_with_missing_api_keys,
|
|
18
18
|
)
|
|
19
19
|
from data_designer.config.interface import DataDesignerInterface
|
|
20
20
|
from data_designer.config.models import (
|
|
@@ -28,6 +28,7 @@ from data_designer.config.utils.constants import (
|
|
|
28
28
|
MANAGED_ASSETS_PATH,
|
|
29
29
|
MODEL_CONFIGS_FILE_PATH,
|
|
30
30
|
MODEL_PROVIDERS_FILE_PATH,
|
|
31
|
+
PREDEFINED_PROVIDERS,
|
|
31
32
|
)
|
|
32
33
|
from data_designer.config.utils.info import InfoType, InterfaceInfo
|
|
33
34
|
from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig
|
|
@@ -316,8 +317,7 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
316
317
|
|
|
317
318
|
Args:
|
|
318
319
|
run_config: A RunConfig instance containing runtime settings such as
|
|
319
|
-
early shutdown behavior
|
|
320
|
-
concurrency via `non_inference_max_parallel_workers`. Import RunConfig from
|
|
320
|
+
early shutdown behavior and batch sizing via `buffer_size`. Import RunConfig from
|
|
321
321
|
data_designer.essentials.
|
|
322
322
|
|
|
323
323
|
Example:
|
|
@@ -334,11 +334,8 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
334
334
|
def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
|
|
335
335
|
if model_providers is None:
|
|
336
336
|
model_providers = get_default_providers()
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
if len(providers_with_missing_keys) == len(model_providers):
|
|
341
|
-
# All providers have missing API keys
|
|
337
|
+
missing_api_keys = get_default_model_providers_missing_api_keys()
|
|
338
|
+
if len(missing_api_keys) == len(PREDEFINED_PROVIDERS):
|
|
342
339
|
logger.warning(
|
|
343
340
|
"🚨 You are trying to use a default model provider but your API keys are missing."
|
|
344
341
|
"\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
|
{data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/controllers/test_download_controller.py
RENAMED
|
@@ -85,18 +85,16 @@ def test_run_personas_with_all_flag(
|
|
|
85
85
|
# Verify NGC check was called
|
|
86
86
|
mock_check_ngc.assert_called_once()
|
|
87
87
|
|
|
88
|
-
# Verify all
|
|
89
|
-
assert mock_download.call_count ==
|
|
88
|
+
# Verify all 5 locales were downloaded
|
|
89
|
+
assert mock_download.call_count == 5
|
|
90
90
|
|
|
91
91
|
# Verify each locale was downloaded
|
|
92
92
|
downloaded_locales = [call[0][0] for call in mock_download.call_args_list]
|
|
93
93
|
assert "en_US" in downloaded_locales
|
|
94
94
|
assert "en_IN" in downloaded_locales
|
|
95
|
-
assert "en_SG" in downloaded_locales
|
|
96
95
|
assert "hi_Deva_IN" in downloaded_locales
|
|
97
96
|
assert "hi_Latn_IN" in downloaded_locales
|
|
98
97
|
assert "ja_JP" in downloaded_locales
|
|
99
|
-
assert "pt_BR" in downloaded_locales
|
|
100
98
|
|
|
101
99
|
|
|
102
100
|
@patch.object(DownloadController, "_download_locale", return_value=True)
|
|
@@ -219,14 +217,12 @@ def test_determine_locales_with_all_flag(controller: DownloadController) -> None
|
|
|
219
217
|
"""Test _determine_locales returns all locales when all_locales=True."""
|
|
220
218
|
result = controller._determine_locales(locales=None, all_locales=True)
|
|
221
219
|
|
|
222
|
-
assert len(result) ==
|
|
220
|
+
assert len(result) == 5
|
|
223
221
|
assert "en_US" in result
|
|
224
222
|
assert "en_IN" in result
|
|
225
|
-
assert "en_SG" in result
|
|
226
223
|
assert "hi_Deva_IN" in result
|
|
227
224
|
assert "hi_Latn_IN" in result
|
|
228
225
|
assert "ja_JP" in result
|
|
229
|
-
assert "pt_BR" in result
|
|
230
226
|
|
|
231
227
|
|
|
232
228
|
def test_determine_locales_with_valid_locale_flags(controller: DownloadController) -> None:
|
{data_designer-0.3.8 → data_designer-0.3.8rc1}/tests/cli/repositories/test_persona_repository.py
RENAMED
|
@@ -15,7 +15,7 @@ def repository() -> PersonaRepository:
|
|
|
15
15
|
def test_init(repository: PersonaRepository) -> None:
|
|
16
16
|
"""Test repository initialization creates registry."""
|
|
17
17
|
assert repository._registry is not None
|
|
18
|
-
assert len(repository._registry.locales) ==
|
|
18
|
+
assert len(repository._registry.locales) == 5
|
|
19
19
|
assert repository._registry.dataset_prefix == "nemotron-personas-dataset-"
|
|
20
20
|
|
|
21
21
|
|
|
@@ -24,11 +24,11 @@ def test_list_all(repository: PersonaRepository) -> None:
|
|
|
24
24
|
locales = repository.list_all()
|
|
25
25
|
|
|
26
26
|
assert isinstance(locales, list)
|
|
27
|
-
assert len(locales) ==
|
|
27
|
+
assert len(locales) == 5
|
|
28
28
|
|
|
29
29
|
# Verify all expected locales are present
|
|
30
30
|
locale_codes = {locale.code for locale in locales}
|
|
31
|
-
assert locale_codes == {"en_US", "en_IN", "
|
|
31
|
+
assert locale_codes == {"en_US", "en_IN", "hi_Deva_IN", "hi_Latn_IN", "ja_JP"}
|
|
32
32
|
|
|
33
33
|
# Verify each locale has required fields
|
|
34
34
|
for locale in locales:
|
|
@@ -51,14 +51,12 @@ def test_get_available_locales(service: DownloadService) -> None:
|
|
|
51
51
|
locales = service.get_available_locales()
|
|
52
52
|
|
|
53
53
|
assert isinstance(locales, dict)
|
|
54
|
-
assert len(locales) ==
|
|
54
|
+
assert len(locales) == 5
|
|
55
55
|
assert "en_US" in locales
|
|
56
56
|
assert "en_IN" in locales
|
|
57
|
-
assert "en_SG" in locales
|
|
58
57
|
assert "hi_Deva_IN" in locales
|
|
59
58
|
assert "hi_Latn_IN" in locales
|
|
60
59
|
assert "ja_JP" in locales
|
|
61
|
-
assert "pt_BR" in locales
|
|
62
60
|
|
|
63
61
|
# Verify values are locale codes (not descriptions)
|
|
64
62
|
assert locales["en_US"] == "en_US"
|
|
@@ -13,12 +13,12 @@ from data_designer.config.default_model_settings import (
|
|
|
13
13
|
get_builtin_model_providers,
|
|
14
14
|
get_default_inference_parameters,
|
|
15
15
|
get_default_model_configs,
|
|
16
|
+
get_default_model_providers_missing_api_keys,
|
|
16
17
|
get_default_provider_name,
|
|
17
18
|
get_default_providers,
|
|
18
|
-
get_providers_with_missing_api_keys,
|
|
19
19
|
resolve_seed_default_model_settings,
|
|
20
20
|
)
|
|
21
|
-
from data_designer.config.models import ChatCompletionInferenceParams, EmbeddingInferenceParams
|
|
21
|
+
from data_designer.config.models import ChatCompletionInferenceParams, EmbeddingInferenceParams
|
|
22
22
|
from data_designer.config.utils.visualization import get_nvidia_api_key, get_openai_api_key
|
|
23
23
|
|
|
24
24
|
|
|
@@ -190,77 +190,7 @@ def test_resolve_seed_default_model_settings(tmp_path: Path):
|
|
|
190
190
|
assert providers_data == {"providers": [p.model_dump() for p in get_builtin_model_providers()]}
|
|
191
191
|
|
|
192
192
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
ModelProvider(name="provider1", endpoint="http://test1.com", api_key="NVIDIA_API_KEY"), # env var
|
|
198
|
-
ModelProvider(name="provider2", endpoint="http://test2.com", api_key="sk-actual-key-12345"), # actual key
|
|
199
|
-
ModelProvider(name="provider3", endpoint="http://test3.com", api_key=None), # no key
|
|
200
|
-
]
|
|
201
|
-
|
|
202
|
-
with patch("data_designer.config.default_model_settings.os.environ.get") as mock_env:
|
|
203
|
-
# Mock env to have NVIDIA_API_KEY set but not MISSING_VAR
|
|
204
|
-
def mock_get(key: str) -> str | None:
|
|
205
|
-
return "test-key" if key == "NVIDIA_API_KEY" else None
|
|
206
|
-
|
|
207
|
-
mock_env.side_effect = mock_get
|
|
208
|
-
|
|
209
|
-
missing = get_providers_with_missing_api_keys(providers)
|
|
210
|
-
|
|
211
|
-
# provider1 has env var set -> OK
|
|
212
|
-
# provider2 has actual API key -> OK
|
|
213
|
-
# provider3 has no API key -> MISSING
|
|
214
|
-
assert len(missing) == 1
|
|
215
|
-
assert missing[0].name == "provider3"
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def test_get_providers_with_missing_api_keys_env_var_not_set():
|
|
219
|
-
"""Test detection when environment variable is not set."""
|
|
220
|
-
providers = [
|
|
221
|
-
ModelProvider(name="provider1", endpoint="http://test1.com", api_key="MISSING_ENV_VAR"),
|
|
222
|
-
]
|
|
223
|
-
|
|
224
|
-
with patch("data_designer.config.default_model_settings.os.environ.get", return_value=None):
|
|
225
|
-
missing = get_providers_with_missing_api_keys(providers)
|
|
226
|
-
assert len(missing) == 1
|
|
227
|
-
assert missing[0].name == "provider1"
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
def test_get_providers_with_missing_api_keys_all_valid():
|
|
231
|
-
"""Test when all providers have valid API keys."""
|
|
232
|
-
providers = [
|
|
233
|
-
ModelProvider(name="provider1", endpoint="http://test1.com", api_key="sk-actual-key-1"),
|
|
234
|
-
ModelProvider(name="provider2", endpoint="http://test2.com", api_key="sk-actual-key-2"),
|
|
235
|
-
]
|
|
236
|
-
|
|
237
|
-
missing = get_providers_with_missing_api_keys(providers)
|
|
238
|
-
assert len(missing) == 0
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
def test_get_providers_with_missing_api_keys_all_missing():
|
|
242
|
-
"""Test when all providers have missing API keys."""
|
|
243
|
-
providers = [
|
|
244
|
-
ModelProvider(name="provider1", endpoint="http://test1.com", api_key="MISSING_VAR_1"),
|
|
245
|
-
ModelProvider(name="provider2", endpoint="http://test2.com", api_key=None),
|
|
246
|
-
]
|
|
247
|
-
|
|
248
|
-
with patch("data_designer.config.default_model_settings.os.environ.get", return_value=None):
|
|
249
|
-
missing = get_providers_with_missing_api_keys(providers)
|
|
250
|
-
assert len(missing) == 2
|
|
251
|
-
assert {p.name for p in missing} == {"provider1", "provider2"}
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def test_get_providers_with_missing_api_keys_mixed_case():
|
|
255
|
-
"""Test that lowercase API keys are treated as actual keys, not env vars."""
|
|
256
|
-
providers = [
|
|
257
|
-
ModelProvider(name="provider1", endpoint="http://test1.com", api_key="lowercase_key"),
|
|
258
|
-
ModelProvider(name="provider2", endpoint="http://test2.com", api_key="UPPERCASE_KEY"),
|
|
259
|
-
]
|
|
260
|
-
|
|
261
|
-
with patch("data_designer.config.default_model_settings.os.environ.get", return_value=None):
|
|
262
|
-
missing = get_providers_with_missing_api_keys(providers)
|
|
263
|
-
# provider1 has lowercase key (treated as actual key) -> OK
|
|
264
|
-
# provider2 has uppercase key but env var not set -> MISSING
|
|
265
|
-
assert len(missing) == 1
|
|
266
|
-
assert missing[0].name == "provider2"
|
|
193
|
+
@patch("data_designer.config.default_model_settings.os.environ.get")
|
|
194
|
+
def test_get_default_model_providers_missing_api_keys(mock_environ_get):
|
|
195
|
+
mock_environ_get.return_value = None
|
|
196
|
+
assert get_default_model_providers_missing_api_keys() == ["NVIDIA_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY"]
|
|
@@ -15,7 +15,10 @@ from data_designer.config.processors import DropColumnsProcessorConfig
|
|
|
15
15
|
from data_designer.config.run_config import RunConfig
|
|
16
16
|
from data_designer.config.sampler_params import SamplerType, UUIDSamplerParams
|
|
17
17
|
from data_designer.engine.column_generators.generators.base import GenerationStrategy
|
|
18
|
-
from data_designer.engine.dataset_builders.column_wise_builder import
|
|
18
|
+
from data_designer.engine.dataset_builders.column_wise_builder import (
|
|
19
|
+
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR,
|
|
20
|
+
ColumnWiseDatasetBuilder,
|
|
21
|
+
)
|
|
19
22
|
from data_designer.engine.dataset_builders.errors import DatasetGenerationError
|
|
20
23
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum
|
|
21
24
|
from data_designer.engine.models.usage import ModelUsageStats, TokenUsageStats
|
|
@@ -240,9 +243,8 @@ def test_column_wise_dataset_builder_initialize_processors(stub_column_wise_buil
|
|
|
240
243
|
assert processors[BuildStage.POST_BATCH][0].config.column_names == ["column_to_drop"]
|
|
241
244
|
|
|
242
245
|
|
|
243
|
-
def
|
|
244
|
-
|
|
245
|
-
assert run_config.non_inference_max_parallel_workers == 4
|
|
246
|
+
def test_constants_max_concurrency_constant():
|
|
247
|
+
assert MAX_CONCURRENCY_PER_NON_LLM_GENERATOR == 4
|
|
246
248
|
|
|
247
249
|
|
|
248
250
|
@patch("data_designer.engine.dataset_builders.column_wise_builder.TelemetryHandler")
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: docs-searcher
|
|
3
|
-
description: Search local documentation in the docs/ folder for content related to a topic. Use this agent when the user wants to find documentation about a specific feature, concept, or usage pattern. Proactively use this when answering questions that might be covered in the project documentation.
|
|
4
|
-
tools: Glob, Grep, Read
|
|
5
|
-
model: haiku
|
|
6
|
-
permissionMode: bypassPermissions
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
# Documentation Search Agent
|
|
10
|
-
|
|
11
|
-
You are a documentation search specialist. Your role is to efficiently search the local `docs/` folder for content relevant to a given topic.
|
|
12
|
-
|
|
13
|
-
## Instructions
|
|
14
|
-
|
|
15
|
-
When given a search topic, perform the following searches:
|
|
16
|
-
|
|
17
|
-
1. **Find all documentation files** in the docs/ folder:
|
|
18
|
-
```
|
|
19
|
-
Glob pattern: "docs/**/*.md"
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
2. **Search for topic keywords** across all markdown files:
|
|
23
|
-
```
|
|
24
|
-
Grep pattern: "<topic keywords>" in path: "docs/"
|
|
25
|
-
```
|
|
26
|
-
- Try multiple variations of the search terms (singular/plural, related terms)
|
|
27
|
-
- Use case-insensitive search (`-i: true`)
|
|
28
|
-
|
|
29
|
-
3. **Read relevant sections** from files with matches:
|
|
30
|
-
- Read the matched files to get full context
|
|
31
|
-
- Extract the most relevant sections around the matches
|
|
32
|
-
|
|
33
|
-
4. **Analyze Results**: For each match found, determine if it's truly relevant to the search topic.
|
|
34
|
-
|
|
35
|
-
5. **Output Format**: Return a structured markdown summary with:
|
|
36
|
-
- Links to relevant documentation files
|
|
37
|
-
- Brief excerpts showing the relevant content
|
|
38
|
-
- A sentence explaining why each result is pertinent
|
|
39
|
-
|
|
40
|
-
## Output Template
|
|
41
|
-
|
|
42
|
-
```markdown
|
|
43
|
-
## Documentation Search Results for "<topic>"
|
|
44
|
-
|
|
45
|
-
### Relevant Documentation
|
|
46
|
-
|
|
47
|
-
- **[docs/path/to/file.md](docs/path/to/file.md)**
|
|
48
|
-
> Brief excerpt showing relevant content...
|
|
49
|
-
|
|
50
|
-
Explanation of why this is relevant to the search topic.
|
|
51
|
-
|
|
52
|
-
- **[docs/another/file.md](docs/another/file.md)**
|
|
53
|
-
> Another relevant excerpt...
|
|
54
|
-
|
|
55
|
-
Explanation of relevance.
|
|
56
|
-
|
|
57
|
-
### Summary
|
|
58
|
-
Brief summary of what was found and any recommendations for the user.
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
## Important Notes
|
|
62
|
-
|
|
63
|
-
- Only include results that are actually relevant to the search topic
|
|
64
|
-
- If no relevant documentation is found, clearly state that
|
|
65
|
-
- Keep excerpts concise but include enough context to be useful
|
|
66
|
-
- Prioritize user guides and examples over API reference when both exist
|
|
67
|
-
- If the docs/ folder doesn't exist or is empty, report that clearly
|
|
68
|
-
|
|
69
|
-
## Search Strategy
|
|
70
|
-
|
|
71
|
-
1. Start with exact keyword matches
|
|
72
|
-
2. If few results, try related terms or partial matches
|
|
73
|
-
3. Check file names for topic-related terms (e.g., searching "models" should check files named `models.md`, `model-config.md`, etc.)
|
|
74
|
-
4. Look at section headings within files for topic mentions
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: github-searcher
|
|
3
|
-
description: Search GitHub issues, discussions, and PRs for content related to a topic. Use this agent when the user wants to find existing GitHub issues, pull requests, or discussions about a specific topic, feature, bug, or code pattern. Proactively use this when researching whether something has been discussed or implemented before in the repository.
|
|
4
|
-
tools: Bash
|
|
5
|
-
model: haiku
|
|
6
|
-
permissionMode: bypassPermissions
|
|
7
|
-
---
|
|
8
|
-
|
|
9
|
-
# GitHub Content Search Agent
|
|
10
|
-
|
|
11
|
-
You are a GitHub search specialist. Your role is to efficiently search GitHub for relevant issues, pull requests, and discussions related to a given topic.
|
|
12
|
-
|
|
13
|
-
## Instructions
|
|
14
|
-
|
|
15
|
-
When given a search topic, perform the following searches:
|
|
16
|
-
|
|
17
|
-
1. **Search Issues** using the `gh` CLI:
|
|
18
|
-
```bash
|
|
19
|
-
gh issue list --search "<topic>" --limit 20 --json number,title,url,body,state
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
2. **Search Pull Requests** using the `gh` CLI:
|
|
23
|
-
```bash
|
|
24
|
-
gh pr list --search "<topic>" --limit 20 --json number,title,url,body,state
|
|
25
|
-
```
|
|
26
|
-
|
|
27
|
-
3. **Search Discussions** using the `gh` CLI (if the repository has discussions enabled):
|
|
28
|
-
```bash
|
|
29
|
-
gh api graphql -f query='
|
|
30
|
-
query($search: String!) {
|
|
31
|
-
search(query: $search, type: DISCUSSION, first: 20) {
|
|
32
|
-
nodes {
|
|
33
|
-
... on Discussion {
|
|
34
|
-
title
|
|
35
|
-
url
|
|
36
|
-
body
|
|
37
|
-
category { name }
|
|
38
|
-
}
|
|
39
|
-
}
|
|
40
|
-
}
|
|
41
|
-
}
|
|
42
|
-
' -f search="repo:{owner}/{repo} <topic>"
|
|
43
|
-
```
|
|
44
|
-
Note: Get the owner/repo from `gh repo view --json nameWithOwner -q .nameWithOwner`
|
|
45
|
-
|
|
46
|
-
4. **Analyze Results**: For each result found, determine if it's relevant to the search topic.
|
|
47
|
-
|
|
48
|
-
5. **Output Format**: Return a markdown list with:
|
|
49
|
-
- A link to each relevant item (issue, PR, or discussion)
|
|
50
|
-
- A *single* sentence explaining why that link is pertinent to the search topic
|
|
51
|
-
|
|
52
|
-
## Output Template
|
|
53
|
-
|
|
54
|
-
```markdown
|
|
55
|
-
## GitHub Search Results for "<topic>"
|
|
56
|
-
|
|
57
|
-
### Issues
|
|
58
|
-
- [Issue #123: Title](url) - Brief explanation of relevance.
|
|
59
|
-
- [Issue #456: Title](url) - Brief explanation of relevance.
|
|
60
|
-
|
|
61
|
-
### Pull Requests
|
|
62
|
-
- [PR #789: Title](url) - Brief explanation of relevance.
|
|
63
|
-
|
|
64
|
-
### Discussions
|
|
65
|
-
- [Discussion: Title](url) - Brief explanation of relevance.
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
## Important Notes
|
|
69
|
-
|
|
70
|
-
- Only include results that are actually relevant to the search topic
|
|
71
|
-
- If a category (issues, PRs, discussions) has no relevant results, note "No relevant items found"
|
|
72
|
-
- Keep descriptions to a single sentence
|
|
73
|
-
- If discussions search fails (repository doesn't have discussions), skip that section
|
|
74
|
-
- Prioritize open items over closed ones, but include relevant closed items too
|
|
75
|
-
|
|
76
|
-
## Command Guidelines
|
|
77
|
-
|
|
78
|
-
- **NEVER use pipes or shell fallbacks** like `|| echo "..."` or `| grep ...` in your commands
|
|
79
|
-
- Run each `gh` command directly without any error handling wrappers
|
|
80
|
-
- If a command returns an error or empty result, handle it in your analysis logic, not with shell constructs
|
|
81
|
-
- Run the three searches (issues, PRs, discussions) as separate Bash commands
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{}
|
|
@@ -1,22 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"permissions": {
|
|
3
|
-
"allow": [
|
|
4
|
-
"Bash(uv run:*)",
|
|
5
|
-
"Bash(make install-dev-notebooks:*)",
|
|
6
|
-
"Bash(make lint:*)",
|
|
7
|
-
"Bash(curl:*)",
|
|
8
|
-
"Bash(uv pip:*)",
|
|
9
|
-
"Bash(make lint-fix:*)",
|
|
10
|
-
"Bash(mv:*)",
|
|
11
|
-
"Bash(make test:*)",
|
|
12
|
-
"Bash(make serve-docs-locally:*)",
|
|
13
|
-
"Bash(rm:*)",
|
|
14
|
-
"Bash(ls:*)",
|
|
15
|
-
"Bash(find:*)",
|
|
16
|
-
"Bash(git -C /Users/johnnygreco/projects/nvidia/DataDesigner diff --stat)",
|
|
17
|
-
"Bash(git cherry-pick:*)"
|
|
18
|
-
],
|
|
19
|
-
"deny": [],
|
|
20
|
-
"ask": []
|
|
21
|
-
}
|
|
22
|
-
}
|