data-designer 0.3.8rc1__tar.gz → 0.3.8rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer-0.3.8rc2/.claude/agents/docs-searcher.md +74 -0
- data_designer-0.3.8rc2/.claude/agents/github-searcher.md +81 -0
- data_designer-0.3.8rc2/.claude/settings.json +1 -0
- data_designer-0.3.8rc2/.claude/settings.local.json +22 -0
- data_designer-0.3.8rc2/.claude/skills/new-sdg/SKILL.md +117 -0
- data_designer-0.3.8rc2/.claude/skills/search-docs/SKILL.md +16 -0
- data_designer-0.3.8rc2/.claude/skills/search-github/SKILL.md +16 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.gitignore +0 -2
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/PKG-INFO +1 -1
- data_designer-0.3.8rc2/docs/code_reference/run_config.md +6 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/recipes/cards.md +3 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/mkdocs.yml +1 -0
- data_designer-0.3.8rc2/packages/data-designer/src/data_designer/interface/_version.py +34 -0
- data_designer-0.3.8rc2/packages/data-designer-config/src/data_designer/config/_version.py +34 -0
- data_designer-0.3.8rc2/packages/data-designer-engine/src/data_designer/engine/_version.py +34 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/_version.py +2 -2
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/default_model_settings.py +14 -6
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/run_config.py +3 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +2 -5
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -3
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/interface/data_designer.py +8 -5
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_default_model_settings.py +76 -6
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/test_column_wise_builder.py +4 -6
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/ISSUE_TEMPLATE/bug-report.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/ISSUE_TEMPLATE/config.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/ISSUE_TEMPLATE/development-task.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/ISSUE_TEMPLATE/feature-request.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/build-docs.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/build-notebooks.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/check-colab-notebooks.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/ci.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/pack-tutorials.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/.pre-commit-config.yaml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/AGENTS.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/CLAUDE.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/CONTRIBUTING.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/DCO +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/LICENSE +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/Makefile +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/README.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/VERSIONING.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/assets/recipes/code_generation/text_to_python.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/assets/recipes/code_generation/text_to_sql.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/assets/recipes/qa_and_chat/product_info_qa.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/analysis.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/models.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/processors.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/colab_notebooks/1-the-basics.ipynb +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/colab_notebooks/4-providing-images-as-context.ipynb +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/columns.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/configure-model-settings-with-the-cli.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/custom-model-settings.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/default-model-settings.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/inference-parameters.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/model-configs.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/models/model-providers.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/person_sampling.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/processors.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/concepts/validators.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/css/style.css +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/images/top-models.png +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/index.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/installation.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/js/toc-toggle.js +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/1-the-basics.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/3-seeding-with-a-dataset.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/4-providing-images-as-context.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/README.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/_README.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/notebook_source/_pyproject.toml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/overrides/main.html +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/plugins/available.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/plugins/example.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/plugins/overview.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/quick-start.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/recipes/code_generation/text_to_python.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/recipes/code_generation/text_to_sql.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/recipes/qa_and_chat/multi_turn_chat.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/recipes/qa_and_chat/product_info_qa.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/docs/scripts/generate_colab_notebooks.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/pyproject.toml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/scripts/test_license_headers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/scripts/update_license_headers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/README.md +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/download.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/list.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/models.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/providers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/commands/reset.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/controllers/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/controllers/download_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/controllers/model_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/controllers/provider_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/field.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/form.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/model_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/forms/provider_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/main.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/repositories/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/repositories/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/repositories/model_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/repositories/persona_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/repositories/provider_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/services/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/services/download_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/services/model_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/services/provider_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/ui.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/cli/utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/column_profilers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/column_statistics.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/analysis/utils/reporting.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/column_configs.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/column_types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/config_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/dataset_metadata.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/exports.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/interface.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/models.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/preview_results.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/processors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/sampler_constraints.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/sampler_params.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/seed.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/seed_source.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/seed_source_types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/code_lang.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/constants.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/info.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/io_helpers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/misc.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/numerical_helpers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/type_helpers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/utils/visualization.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/validator_params.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/llm_completion.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/compiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/facade.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/factory.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/telemetry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/resources/seed_reader.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validation.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/essentials/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/interface/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/interface/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/interface/results.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/lazy_heavy_imports.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/logging.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugin_manager.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/plugin.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/testing/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/testing/stubs.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/plugins/testing/utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/commands/test_download_command.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/controllers/test_download_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/controllers/test_model_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/forms/test_field.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/forms/test_model_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/repositories/test_model_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/repositories/test_persona_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/services/test_download_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/services/test_model_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/cli/test_cli_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/analysis/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_columns.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_config_builder.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_models.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_processors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_seed.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_seed_source.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_misc.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/config/utils/test_visualization.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/column_profilers/test_base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_embedding.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_llm_completion_generators.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_facade.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/models/test_usage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/processors/test_schema_transform.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/processing/test_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/registry/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/resources/test_seed_reader.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_compiler.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_configurable_task.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_dataset_metadata.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/test_validation.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/essentials/test_init.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/interface/test_data_designer.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/interface/test_results.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/plugins/test_plugin_registry.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/test_import_perf.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/test_logging.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests/test_plugin_manager.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/pyproject.toml +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/config.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/impl.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/column_generator/plugin.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/__init__.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/config.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/impl.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/src/data_designer_e2e_tests/plugins/seed_reader/plugin.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/tests/test_e2e.py +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/tests_e2e/tests/test_seed.csv +0 -0
- {data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/uv.lock +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: docs-searcher
|
|
3
|
+
description: Search local documentation in the docs/ folder for content related to a topic. Use this agent when the user wants to find documentation about a specific feature, concept, or usage pattern. Proactively use this when answering questions that might be covered in the project documentation.
|
|
4
|
+
tools: Glob, Grep, Read
|
|
5
|
+
model: haiku
|
|
6
|
+
permissionMode: bypassPermissions
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# Documentation Search Agent
|
|
10
|
+
|
|
11
|
+
You are a documentation search specialist. Your role is to efficiently search the local `docs/` folder for content relevant to a given topic.
|
|
12
|
+
|
|
13
|
+
## Instructions
|
|
14
|
+
|
|
15
|
+
When given a search topic, perform the following searches:
|
|
16
|
+
|
|
17
|
+
1. **Find all documentation files** in the docs/ folder:
|
|
18
|
+
```
|
|
19
|
+
Glob pattern: "docs/**/*.md"
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
2. **Search for topic keywords** across all markdown files:
|
|
23
|
+
```
|
|
24
|
+
Grep pattern: "<topic keywords>" in path: "docs/"
|
|
25
|
+
```
|
|
26
|
+
- Try multiple variations of the search terms (singular/plural, related terms)
|
|
27
|
+
- Use case-insensitive search (`-i: true`)
|
|
28
|
+
|
|
29
|
+
3. **Read relevant sections** from files with matches:
|
|
30
|
+
- Read the matched files to get full context
|
|
31
|
+
- Extract the most relevant sections around the matches
|
|
32
|
+
|
|
33
|
+
4. **Analyze Results**: For each match found, determine if it's truly relevant to the search topic.
|
|
34
|
+
|
|
35
|
+
5. **Output Format**: Return a structured markdown summary with:
|
|
36
|
+
- Links to relevant documentation files
|
|
37
|
+
- Brief excerpts showing the relevant content
|
|
38
|
+
- A sentence explaining why each result is pertinent
|
|
39
|
+
|
|
40
|
+
## Output Template
|
|
41
|
+
|
|
42
|
+
```markdown
|
|
43
|
+
## Documentation Search Results for "<topic>"
|
|
44
|
+
|
|
45
|
+
### Relevant Documentation
|
|
46
|
+
|
|
47
|
+
- **[docs/path/to/file.md](docs/path/to/file.md)**
|
|
48
|
+
> Brief excerpt showing relevant content...
|
|
49
|
+
|
|
50
|
+
Explanation of why this is relevant to the search topic.
|
|
51
|
+
|
|
52
|
+
- **[docs/another/file.md](docs/another/file.md)**
|
|
53
|
+
> Another relevant excerpt...
|
|
54
|
+
|
|
55
|
+
Explanation of relevance.
|
|
56
|
+
|
|
57
|
+
### Summary
|
|
58
|
+
Brief summary of what was found and any recommendations for the user.
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Important Notes
|
|
62
|
+
|
|
63
|
+
- Only include results that are actually relevant to the search topic
|
|
64
|
+
- If no relevant documentation is found, clearly state that
|
|
65
|
+
- Keep excerpts concise but include enough context to be useful
|
|
66
|
+
- Prioritize user guides and examples over API reference when both exist
|
|
67
|
+
- If the docs/ folder doesn't exist or is empty, report that clearly
|
|
68
|
+
|
|
69
|
+
## Search Strategy
|
|
70
|
+
|
|
71
|
+
1. Start with exact keyword matches
|
|
72
|
+
2. If few results, try related terms or partial matches
|
|
73
|
+
3. Check file names for topic-related terms (e.g., searching "models" should check files named `models.md`, `model-config.md`, etc.)
|
|
74
|
+
4. Look at section headings within files for topic mentions
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: github-searcher
|
|
3
|
+
description: Search GitHub issues, discussions, and PRs for content related to a topic. Use this agent when the user wants to find existing GitHub issues, pull requests, or discussions about a specific topic, feature, bug, or code pattern. Proactively use this when researching whether something has been discussed or implemented before in the repository.
|
|
4
|
+
tools: Bash
|
|
5
|
+
model: haiku
|
|
6
|
+
permissionMode: bypassPermissions
|
|
7
|
+
---
|
|
8
|
+
|
|
9
|
+
# GitHub Content Search Agent
|
|
10
|
+
|
|
11
|
+
You are a GitHub search specialist. Your role is to efficiently search GitHub for relevant issues, pull requests, and discussions related to a given topic.
|
|
12
|
+
|
|
13
|
+
## Instructions
|
|
14
|
+
|
|
15
|
+
When given a search topic, perform the following searches:
|
|
16
|
+
|
|
17
|
+
1. **Search Issues** using the `gh` CLI:
|
|
18
|
+
```bash
|
|
19
|
+
gh issue list --search "<topic>" --limit 20 --json number,title,url,body,state
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
2. **Search Pull Requests** using the `gh` CLI:
|
|
23
|
+
```bash
|
|
24
|
+
gh pr list --search "<topic>" --limit 20 --json number,title,url,body,state
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
3. **Search Discussions** using the `gh` CLI (if the repository has discussions enabled):
|
|
28
|
+
```bash
|
|
29
|
+
gh api graphql -f query='
|
|
30
|
+
query($search: String!) {
|
|
31
|
+
search(query: $search, type: DISCUSSION, first: 20) {
|
|
32
|
+
nodes {
|
|
33
|
+
... on Discussion {
|
|
34
|
+
title
|
|
35
|
+
url
|
|
36
|
+
body
|
|
37
|
+
category { name }
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
' -f search="repo:{owner}/{repo} <topic>"
|
|
43
|
+
```
|
|
44
|
+
Note: Get the owner/repo from `gh repo view --json nameWithOwner -q .nameWithOwner`
|
|
45
|
+
|
|
46
|
+
4. **Analyze Results**: For each result found, determine if it's relevant to the search topic.
|
|
47
|
+
|
|
48
|
+
5. **Output Format**: Return a markdown list with:
|
|
49
|
+
- A link to each relevant item (issue, PR, or discussion)
|
|
50
|
+
- A *single* sentence explaining why that link is pertinent to the search topic
|
|
51
|
+
|
|
52
|
+
## Output Template
|
|
53
|
+
|
|
54
|
+
```markdown
|
|
55
|
+
## GitHub Search Results for "<topic>"
|
|
56
|
+
|
|
57
|
+
### Issues
|
|
58
|
+
- [Issue #123: Title](url) - Brief explanation of relevance.
|
|
59
|
+
- [Issue #456: Title](url) - Brief explanation of relevance.
|
|
60
|
+
|
|
61
|
+
### Pull Requests
|
|
62
|
+
- [PR #789: Title](url) - Brief explanation of relevance.
|
|
63
|
+
|
|
64
|
+
### Discussions
|
|
65
|
+
- [Discussion: Title](url) - Brief explanation of relevance.
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Important Notes
|
|
69
|
+
|
|
70
|
+
- Only include results that are actually relevant to the search topic
|
|
71
|
+
- If a category (issues, PRs, discussions) has no relevant results, note "No relevant items found"
|
|
72
|
+
- Keep descriptions to a single sentence
|
|
73
|
+
- If discussions search fails (repository doesn't have discussions), skip that section
|
|
74
|
+
- Prioritize open items over closed ones, but include relevant closed items too
|
|
75
|
+
|
|
76
|
+
## Command Guidelines
|
|
77
|
+
|
|
78
|
+
- **NEVER use pipes or shell fallbacks** like `|| echo "..."` or `| grep ...` in your commands
|
|
79
|
+
- Run each `gh` command directly without any error handling wrappers
|
|
80
|
+
- If a command returns an error or empty result, handle it in your analysis logic, not with shell constructs
|
|
81
|
+
- Run the three searches (issues, PRs, discussions) as separate Bash commands
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"Bash(uv run:*)",
|
|
5
|
+
"Bash(make install-dev-notebooks:*)",
|
|
6
|
+
"Bash(make lint:*)",
|
|
7
|
+
"Bash(curl:*)",
|
|
8
|
+
"Bash(uv pip:*)",
|
|
9
|
+
"Bash(make lint-fix:*)",
|
|
10
|
+
"Bash(mv:*)",
|
|
11
|
+
"Bash(make test:*)",
|
|
12
|
+
"Bash(make serve-docs-locally:*)",
|
|
13
|
+
"Bash(rm:*)",
|
|
14
|
+
"Bash(ls:*)",
|
|
15
|
+
"Bash(find:*)",
|
|
16
|
+
"Bash(git -C /Users/johnnygreco/projects/nvidia/DataDesigner diff --stat)",
|
|
17
|
+
"Bash(git cherry-pick:*)"
|
|
18
|
+
],
|
|
19
|
+
"deny": [],
|
|
20
|
+
"ask": []
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: new-sdg
|
|
3
|
+
description: Implement a new synthetic data generator using NeMo Data Designer by defining its configuration and executing a preview job.
|
|
4
|
+
argument-hint: <dataset-description>
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Your Goal
|
|
8
|
+
|
|
9
|
+
Implement a new synthetic data generator using NeMo Data Designer to match the user's specifications below.
|
|
10
|
+
|
|
11
|
+
<dataset-description>
|
|
12
|
+
**$ARGUMENTS**
|
|
13
|
+
</dataset-description>
|
|
14
|
+
|
|
15
|
+
## Getting Exact Specifications
|
|
16
|
+
|
|
17
|
+
The user will provide you with some description, but it is likely that you
|
|
18
|
+
do not have enough information to precisely define what they want. It is hard
|
|
19
|
+
for a user to define everything up front. Ask follow up questions to the user
|
|
20
|
+
using the AskUser tool to narrow down on precisely what they want.
|
|
21
|
+
|
|
22
|
+
Common things to make precise are:
|
|
23
|
+
|
|
24
|
+
- IMPORTANT: What the "axes of diversity" are -- e.g. what should be well represented and diverse in the resulting dataset.
|
|
25
|
+
- The kind an nature of any input data to the dataset.
|
|
26
|
+
- What variables should be randomized.
|
|
27
|
+
- The schema of the final dataset.
|
|
28
|
+
- The structure of any required structured output columns.
|
|
29
|
+
- What facets of the output dataset are important to capture.
|
|
30
|
+
|
|
31
|
+
## Interactive, Iterative Design
|
|
32
|
+
|
|
33
|
+
> USER: Request
|
|
34
|
+
> YOU: Clarifying AskUser Questions
|
|
35
|
+
> YOU: Script Impelmentation (with preview)
|
|
36
|
+
> YOU: Script Execution
|
|
37
|
+
> YOU: Result Presentation
|
|
38
|
+
> YOU: Followup Questions
|
|
39
|
+
> USER: Respond
|
|
40
|
+
> YOU: ...repeat...
|
|
41
|
+
|
|
42
|
+
Very often, the initial implementation will not conform precisely to what the user wants. You are to engage in an **iterative design loop** with the user. As shown
|
|
43
|
+
in the example below, you will construct a configuration, then review its outputs,
|
|
44
|
+
present those outputs to the user, and ask follow up questions.
|
|
45
|
+
|
|
46
|
+
Depending on the user responses, you will then edit the script, re-run it, and present the user with the results and ask followups and so. When showing results to the user DO NOT SUMMARIZE content, it is *very important* that you show them the records as-is so they can make thoughtful decisions.
|
|
47
|
+
|
|
48
|
+
DO NOT disengage from this **iterative design loop** unless commanded by the user.
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
## Implementing a NeMo Data Designer Synthetic Data Generator
|
|
52
|
+
|
|
53
|
+
- You will be writing a new python script for execution.
|
|
54
|
+
- The script should be made in the current working directory, so `$(pwd)/script-name.py`.
|
|
55
|
+
- Implement the script as a stand-alone, `uv`-executable script (https://docs.astral.sh/uv/guides/scripts/#creating-a-python-script).
|
|
56
|
+
- The script should depend on the latest version of `data-designer`.
|
|
57
|
+
- Include other third-party dependencies only if the job requires it.
|
|
58
|
+
- Model aliases are required when definining LLM generation columns.
|
|
59
|
+
- Before implementing, make sure to use the Explore tool to understand the src/ and docs/.
|
|
60
|
+
- Review available model aliases and providers.
|
|
61
|
+
- You will need to ask the user what Model Provider they want to use via AskUser tool.
|
|
62
|
+
- You may use Web Search to find any information you need to help you construct the SDG, since real-world grounding is key to a good dataset.
|
|
63
|
+
- If you need to use a large number of categories for a sampler, just build a pandas DataFrame and use it as a Seed dataset.
|
|
64
|
+
|
|
65
|
+
### Model Alises and Providers
|
|
66
|
+
|
|
67
|
+
View known model aliases and providers with the following command. You will need a longer timeout on first run (package first-time boot).
|
|
68
|
+
|
|
69
|
+
```bash
|
|
70
|
+
uv run --with data-designer data-designer config list
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
### Real World Seed Data
|
|
74
|
+
|
|
75
|
+
Depending on user requirements, you may need to access real-world datasets to serve as Seed datasets for your Data Designer SDG.
|
|
76
|
+
In these cases, you may use Web Search tools to search for datasets available on HuggingFace, and use the `datasets` python library
|
|
77
|
+
to load them. You will have to convert them to Pandas DataFrames in these cases.
|
|
78
|
+
|
|
79
|
+
If you do use real-world data, pay attention to file sizes and avoid large file transfers. Only download small sections of datasets or use a streaming option.
|
|
80
|
+
|
|
81
|
+
### Example
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
# /// script
|
|
85
|
+
# dependencies = [
|
|
86
|
+
# "data-designer",
|
|
87
|
+
# ]
|
|
88
|
+
# ///
|
|
89
|
+
|
|
90
|
+
# ... data designer config_builder implementation
|
|
91
|
+
|
|
92
|
+
def build_config() -> DataDesignerConfigBuilder:
|
|
93
|
+
"""Implements the definition of the synthetic data generator.
|
|
94
|
+
"""
|
|
95
|
+
config_builder = DataDesignerConfigBuilder()
|
|
96
|
+
|
|
97
|
+
## Add whatever columns need to be added
|
|
98
|
+
# config_builder.add_column(...)
|
|
99
|
+
# config_builder.add_column(...)
|
|
100
|
+
# config_builder.add_column(...)
|
|
101
|
+
|
|
102
|
+
return config_builder
|
|
103
|
+
|
|
104
|
+
if __name__ == "__main__":
|
|
105
|
+
config_builder = build_config()
|
|
106
|
+
designer = DataDesigner()
|
|
107
|
+
preview = designer.preview(config_builder=config_builder)
|
|
108
|
+
|
|
109
|
+
# The following command will print a random sample record
|
|
110
|
+
# which you can present to the user
|
|
111
|
+
preview.display_sample_record()
|
|
112
|
+
|
|
113
|
+
# The raw data is located in this Pandas DataFrame object.
|
|
114
|
+
# You can implenent code to display some or all of this
|
|
115
|
+
# to STDOUT so you can see the outputs and report to the user.
|
|
116
|
+
preview.dataset
|
|
117
|
+
```
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: search-docs
|
|
3
|
+
description: Search local documentation in the docs/ folder for content related to a topic
|
|
4
|
+
argument-hint: <search-topic>
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Documentation Search
|
|
8
|
+
|
|
9
|
+
Use the `docs-searcher` subagent to search local documentation for content related to: **$ARGUMENTS**
|
|
10
|
+
|
|
11
|
+
Call the Task tool with:
|
|
12
|
+
- `subagent_type: "docs-searcher"`
|
|
13
|
+
- `mode: "bypassPermissions"`
|
|
14
|
+
- `prompt`: the search topic
|
|
15
|
+
|
|
16
|
+
Report the results back to the user exactly as returned by the agent.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: search-github
|
|
3
|
+
description: Search GitHub issues, discussions, and PRs for content related to a topic
|
|
4
|
+
argument-hint: <search-topic>
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# GitHub Search
|
|
8
|
+
|
|
9
|
+
Use the `github-searcher` subagent to search GitHub for content related to: **$ARGUMENTS**
|
|
10
|
+
|
|
11
|
+
Call the Task tool with:
|
|
12
|
+
- `subagent_type: "github-searcher"`
|
|
13
|
+
- `mode: "bypassPermissions"`
|
|
14
|
+
- `prompt`: the search topic
|
|
15
|
+
|
|
16
|
+
Report the results back to the user exactly as returned by the agent.
|
|
@@ -6,6 +6,9 @@ Each recipe is a self-contained example that can be run independently.
|
|
|
6
6
|
!!! question "New to Data Designer?"
|
|
7
7
|
Recipes provide working code for specific use cases without detailed explanations. If you're learning Data Designer for the first time, we recommend starting with our [tutorial notebooks](../../notebooks/), which offer step-by-step guidance and explain core concepts. Once you're familiar with the basics, return here for practical, ready-to-use implementations.
|
|
8
8
|
|
|
9
|
+
!!! tip Prerequisite
|
|
10
|
+
These recipes use the Open AI model provider by default. Ensure your OpenAI model provider has been set up using the Data Designer CLI before running a recipe.
|
|
11
|
+
|
|
9
12
|
<div class="grid cards" markdown>
|
|
10
13
|
|
|
11
14
|
- :material-snake:{ .lg .middle } **Text to Python**
|
|
@@ -43,6 +43,7 @@ nav:
|
|
|
43
43
|
- column_configs: code_reference/column_configs.md
|
|
44
44
|
- config_builder: code_reference/config_builder.md
|
|
45
45
|
- data_designer_config: code_reference/data_designer_config.md
|
|
46
|
+
- run_config: code_reference/run_config.md
|
|
46
47
|
- sampler_params: code_reference/sampler_params.md
|
|
47
48
|
- validator_params: code_reference/validator_params.md
|
|
48
49
|
- processors: code_reference/processors.md
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.3.8rc2.dev38+ga53f0df07.d20260126'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc2', 'dev38', 'ga53f0df07.d20260126')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.3.8rc2.dev38+ga53f0df07.d20260126'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc2', 'dev38', 'ga53f0df07.d20260126')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# file generated by setuptools-scm
|
|
2
|
+
# don't change, don't track in version control
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"__version__",
|
|
6
|
+
"__version_tuple__",
|
|
7
|
+
"version",
|
|
8
|
+
"version_tuple",
|
|
9
|
+
"__commit_id__",
|
|
10
|
+
"commit_id",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
TYPE_CHECKING = False
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from typing import Tuple
|
|
16
|
+
from typing import Union
|
|
17
|
+
|
|
18
|
+
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
+
COMMIT_ID = Union[str, None]
|
|
20
|
+
else:
|
|
21
|
+
VERSION_TUPLE = object
|
|
22
|
+
COMMIT_ID = object
|
|
23
|
+
|
|
24
|
+
version: str
|
|
25
|
+
__version__: str
|
|
26
|
+
__version_tuple__: VERSION_TUPLE
|
|
27
|
+
version_tuple: VERSION_TUPLE
|
|
28
|
+
commit_id: COMMIT_ID
|
|
29
|
+
__commit_id__: COMMIT_ID
|
|
30
|
+
|
|
31
|
+
__version__ = version = '0.3.8rc2.dev38+ga53f0df07.d20260126'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc2', 'dev38', 'ga53f0df07.d20260126')
|
|
33
|
+
|
|
34
|
+
__commit_id__ = commit_id = None
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.3.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 3, 8, '
|
|
31
|
+
__version__ = version = '0.3.8rc2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 3, 8, 'rc2')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
{data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/config/default_model_settings.py
RENAMED
|
@@ -71,12 +71,20 @@ def get_default_model_configs() -> list[ModelConfig]:
|
|
|
71
71
|
return []
|
|
72
72
|
|
|
73
73
|
|
|
74
|
-
def
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
74
|
+
def get_providers_with_missing_api_keys(providers: list[ModelProvider]) -> list[ModelProvider]:
|
|
75
|
+
providers_with_missing_keys = []
|
|
76
|
+
|
|
77
|
+
for provider in providers:
|
|
78
|
+
if provider.api_key is None:
|
|
79
|
+
# No API key specified at all
|
|
80
|
+
providers_with_missing_keys.append(provider)
|
|
81
|
+
elif provider.api_key.isupper() and "_" in provider.api_key:
|
|
82
|
+
# Looks like an environment variable name, check if it's set
|
|
83
|
+
if os.environ.get(provider.api_key) is None:
|
|
84
|
+
providers_with_missing_keys.append(provider)
|
|
85
|
+
# else: It's an actual API key value (not an env var), so it's valid
|
|
86
|
+
|
|
87
|
+
return providers_with_missing_keys
|
|
80
88
|
|
|
81
89
|
|
|
82
90
|
def get_default_providers() -> list[ModelProvider]:
|
|
@@ -26,6 +26,8 @@ class RunConfig(ConfigBase):
|
|
|
26
26
|
buffer_size: Number of records to process in each batch during dataset generation.
|
|
27
27
|
A batch is processed end-to-end (column generation, post-batch processors, and writing the batch
|
|
28
28
|
to artifact storage) before moving on to the next batch. Must be > 0. Default is 1000.
|
|
29
|
+
non_inference_max_parallel_workers: Maximum number of worker threads used for non-inference
|
|
30
|
+
cell-by-cell generators. Must be >= 1. Default is 4.
|
|
29
31
|
max_conversation_restarts: Maximum number of full conversation restarts permitted when
|
|
30
32
|
generation tasks call `ModelFacade.generate(...)`. Must be >= 0. Default is 5.
|
|
31
33
|
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
@@ -37,6 +39,7 @@ class RunConfig(ConfigBase):
|
|
|
37
39
|
shutdown_error_rate: float = Field(default=0.5, ge=0.0, le=1.0)
|
|
38
40
|
shutdown_error_window: int = Field(default=10, ge=0)
|
|
39
41
|
buffer_size: int = Field(default=1000, gt=0)
|
|
42
|
+
non_inference_max_parallel_workers: int = Field(default=4, ge=1)
|
|
40
43
|
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
41
44
|
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
42
45
|
|
|
@@ -31,10 +31,7 @@ from data_designer.engine.compiler import compile_data_designer_config
|
|
|
31
31
|
from data_designer.engine.dataset_builders.artifact_storage import SDG_CONFIG_FILENAME, ArtifactStorage
|
|
32
32
|
from data_designer.engine.dataset_builders.errors import DatasetGenerationError, DatasetProcessingError
|
|
33
33
|
from data_designer.engine.dataset_builders.multi_column_configs import MultiColumnConfig
|
|
34
|
-
from data_designer.engine.dataset_builders.utils.concurrency import
|
|
35
|
-
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR,
|
|
36
|
-
ConcurrentThreadExecutor,
|
|
37
|
-
)
|
|
34
|
+
from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
|
|
38
35
|
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
39
36
|
from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
|
|
40
37
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
|
|
@@ -202,7 +199,7 @@ class ColumnWiseDatasetBuilder:
|
|
|
202
199
|
self.batch_manager.add_records(df.to_dict(orient="records"))
|
|
203
200
|
|
|
204
201
|
def _run_cell_by_cell_generator(self, generator: ColumnGenerator) -> None:
|
|
205
|
-
max_workers =
|
|
202
|
+
max_workers = self._resource_provider.run_config.non_inference_max_parallel_workers
|
|
206
203
|
if isinstance(generator, ColumnGeneratorWithModel):
|
|
207
204
|
max_workers = generator.inference_parameters.max_parallel_requests
|
|
208
205
|
self._fan_out_with_threads(generator, max_workers=max_workers)
|
|
@@ -16,9 +16,6 @@ from data_designer.engine.errors import DataDesignerRuntimeError, ErrorTrap
|
|
|
16
16
|
|
|
17
17
|
logger = logging.getLogger(__name__)
|
|
18
18
|
|
|
19
|
-
# Constants
|
|
20
|
-
MAX_CONCURRENCY_PER_NON_LLM_GENERATOR = 4
|
|
21
|
-
|
|
22
19
|
|
|
23
20
|
class ExecutorResults(BaseModel):
|
|
24
21
|
failure_threshold: float = 0.0 # Error rate threshold
|
{data_designer-0.3.8rc1 → data_designer-0.3.8rc2}/src/data_designer/interface/data_designer.py
RENAMED
|
@@ -12,9 +12,9 @@ from data_designer.config.config_builder import DataDesignerConfigBuilder
|
|
|
12
12
|
from data_designer.config.data_designer_config import DataDesignerConfig
|
|
13
13
|
from data_designer.config.default_model_settings import (
|
|
14
14
|
get_default_model_configs,
|
|
15
|
-
get_default_model_providers_missing_api_keys,
|
|
16
15
|
get_default_provider_name,
|
|
17
16
|
get_default_providers,
|
|
17
|
+
get_providers_with_missing_api_keys,
|
|
18
18
|
)
|
|
19
19
|
from data_designer.config.interface import DataDesignerInterface
|
|
20
20
|
from data_designer.config.models import (
|
|
@@ -28,7 +28,6 @@ from data_designer.config.utils.constants import (
|
|
|
28
28
|
MANAGED_ASSETS_PATH,
|
|
29
29
|
MODEL_CONFIGS_FILE_PATH,
|
|
30
30
|
MODEL_PROVIDERS_FILE_PATH,
|
|
31
|
-
PREDEFINED_PROVIDERS,
|
|
32
31
|
)
|
|
33
32
|
from data_designer.config.utils.info import InfoType, InterfaceInfo
|
|
34
33
|
from data_designer.engine.analysis.dataset_profiler import DataDesignerDatasetProfiler, DatasetProfilerConfig
|
|
@@ -317,7 +316,8 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
317
316
|
|
|
318
317
|
Args:
|
|
319
318
|
run_config: A RunConfig instance containing runtime settings such as
|
|
320
|
-
early shutdown behavior
|
|
319
|
+
early shutdown behavior, batch sizing via `buffer_size`, and non-inference worker
|
|
320
|
+
concurrency via `non_inference_max_parallel_workers`. Import RunConfig from
|
|
321
321
|
data_designer.essentials.
|
|
322
322
|
|
|
323
323
|
Example:
|
|
@@ -334,8 +334,11 @@ class DataDesigner(DataDesignerInterface[DatasetCreationResults]):
|
|
|
334
334
|
def _resolve_model_providers(self, model_providers: list[ModelProvider] | None) -> list[ModelProvider]:
|
|
335
335
|
if model_providers is None:
|
|
336
336
|
model_providers = get_default_providers()
|
|
337
|
-
|
|
338
|
-
|
|
337
|
+
# Check which providers have missing API keys (from YAML file or env vars)
|
|
338
|
+
providers_with_missing_keys = get_providers_with_missing_api_keys(model_providers)
|
|
339
|
+
|
|
340
|
+
if len(providers_with_missing_keys) == len(model_providers):
|
|
341
|
+
# All providers have missing API keys
|
|
339
342
|
logger.warning(
|
|
340
343
|
"🚨 You are trying to use a default model provider but your API keys are missing."
|
|
341
344
|
"\n\t\t\tSet the API key for the default providers you intend to use and re-initialize the Data Designer object."
|