data-designer 0.1.4__tar.gz → 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer-0.2.0/.github/workflows/build-docs.yml +68 -0
- data_designer-0.2.0/.github/workflows/check-colab-notebooks.yml +55 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/.gitignore +1 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/.pre-commit-config.yaml +1 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/Makefile +7 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/PKG-INFO +9 -9
- {data_designer-0.1.4 → data_designer-0.2.0}/README.md +8 -8
- data_designer-0.2.0/docs/assets/recipes/code_generation/text_to_python.py +318 -0
- data_designer-0.2.0/docs/assets/recipes/code_generation/text_to_sql.py +323 -0
- data_designer-0.2.0/docs/assets/recipes/qa_and_chat/multi_turn_chat.py +204 -0
- data_designer-0.2.0/docs/assets/recipes/qa_and_chat/product_info_qa.py +224 -0
- data_designer-0.2.0/docs/code_reference/analysis.md +31 -0
- data_designer-0.2.0/docs/code_reference/processors.md +6 -0
- data_designer-0.2.0/docs/colab_notebooks/1-the-basics.ipynb +540 -0
- data_designer-0.2.0/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +570 -0
- data_designer-0.2.0/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +468 -0
- data_designer-0.2.0/docs/colab_notebooks/4-providing-images-as-context.ipynb +531 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/columns.md +16 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/configure-model-settings-with-the-cli.md +4 -3
- data_designer-0.2.0/docs/concepts/models/custom-model-settings.md +229 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/default-model-settings.md +10 -4
- data_designer-0.2.0/docs/concepts/models/inference-parameters.md +148 -0
- data_designer-0.2.0/docs/concepts/models/model-configs.md +123 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/models/model-providers.md +5 -1
- data_designer-0.2.0/docs/concepts/person_sampling.md +185 -0
- data_designer-0.2.0/docs/concepts/processors.md +153 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/css/style.css +0 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/js/toc-toggle.js +7 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/1-the-basics.py +9 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +9 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/3-seeding-with-a-dataset.py +9 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/4-providing-images-as-context.py +5 -11
- data_designer-0.2.0/docs/overrides/main.html +31 -0
- data_designer-0.2.0/docs/plugins/available.md +3 -0
- data_designer-0.2.0/docs/plugins/example.md +306 -0
- data_designer-0.2.0/docs/plugins/overview.md +45 -0
- data_designer-0.2.0/docs/recipes/cards.md +81 -0
- data_designer-0.2.0/docs/recipes/code_generation/text_to_python.md +5 -0
- data_designer-0.2.0/docs/recipes/code_generation/text_to_sql.md +7 -0
- data_designer-0.2.0/docs/recipes/qa_and_chat/multi_turn_chat.md +5 -0
- data_designer-0.2.0/docs/recipes/qa_and_chat/product_info_qa.md +5 -0
- data_designer-0.2.0/docs/scripts/generate_colab_notebooks.py +186 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/mkdocs.yml +32 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/pyproject.toml +11 -16
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/_version.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/README.md +15 -1
- data_designer-0.2.0/src/data_designer/cli/commands/download.py +56 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/list.py +4 -18
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/__init__.py +2 -1
- data_designer-0.2.0/src/data_designer/cli/controllers/download_controller.py +217 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/model_controller.py +4 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/field.py +65 -19
- data_designer-0.2.0/src/data_designer/cli/forms/model_builder.py +332 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/main.py +11 -1
- data_designer-0.2.0/src/data_designer/cli/repositories/persona_repository.py +88 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/__init__.py +2 -1
- data_designer-0.2.0/src/data_designer/cli/services/download_service.py +97 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/ui.py +131 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/utils.py +34 -0
- data_designer-0.2.0/src/data_designer/config/analysis/column_profilers.py +157 -0
- data_designer-0.2.0/src/data_designer/config/analysis/column_statistics.py +418 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/dataset_profiler.py +23 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/utils/reporting.py +3 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/base.py +3 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/column_configs.py +27 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/column_types.py +24 -17
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/config_builder.py +34 -26
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/data_designer_config.py +7 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/datastore.py +6 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/default_model_settings.py +27 -34
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/exports.py +14 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/models.py +155 -29
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/preview_results.py +5 -4
- data_designer-0.2.0/src/data_designer/config/processors.py +146 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/sampler_constraints.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/sampler_params.py +31 -31
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/seed.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/code_lang.py +4 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/constants.py +31 -8
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/io_helpers.py +5 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/misc.py +1 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/numerical_helpers.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/type_helpers.py +3 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/validation.py +39 -9
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/visualization.py +62 -15
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/validator_params.py +4 -8
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/base.py +0 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +2 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_statistics.py +16 -16
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/dataset_profiler.py +25 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +71 -49
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/utils/judge_score_processing.py +5 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/base.py +34 -0
- data_designer-0.2.0/src/data_designer/engine/column_generators/generators/embedding.py +45 -0
- data_designer-0.1.4/src/data_designer/engine/column_generators/generators/llm_generators.py → data_designer-0.2.0/src/data_designer/engine/column_generators/generators/llm_completion.py +17 -49
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/registry.py +4 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +5 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/configurable_task.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/artifact_storage.py +14 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/column_wise_builder.py +12 -8
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/concurrency.py +6 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/facade.py +66 -9
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/litellm_overrides.py +5 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/errors.py +2 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/parser.py +2 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/postprocessors.py +3 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/types.py +4 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/registry.py +20 -11
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/usage.py +7 -9
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/ast.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/drop_columns.py +1 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/registry.py +3 -0
- data_designer-0.2.0/src/data_designer/engine/processing/processors/schema_transform.py +53 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/utils.py +40 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/base.py +12 -12
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/constraints.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/base.py +14 -14
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/phone_number.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/people_gen.py +3 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/base.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/data_designer.py +12 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/results.py +36 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/logging.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugin_manager.py +3 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/plugin.py +3 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/registry.py +2 -2
- data_designer-0.2.0/tests/cli/commands/test_download_command.py +71 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/conftest.py +5 -5
- data_designer-0.2.0/tests/cli/controllers/test_download_controller.py +398 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/controllers/test_model_controller.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_field.py +142 -12
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_model_builder.py +211 -45
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/repositories/test_model_repository.py +3 -1
- data_designer-0.2.0/tests/cli/repositories/test_persona_repository.py +171 -0
- data_designer-0.2.0/tests/cli/services/test_download_service.py +270 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/services/test_model_service.py +7 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/test_cli_utils.py +73 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/conftest.py +6 -6
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/test_column_statistics.py +12 -12
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_columns.py +37 -9
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_config_builder.py +7 -7
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_default_model_settings.py +36 -11
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_models.py +165 -31
- data_designer-0.2.0/tests/config/test_processors.py +139 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_type_helpers.py +8 -8
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_validation.py +37 -3
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_visualization.py +16 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/conftest.py +2 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/column_profilers/test_base.py +0 -11
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +19 -19
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/conftest.py +5 -5
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_column_statistics_calculator.py +4 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/utils/test_column_statistics_calculations.py +92 -48
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/utils/test_judge_score_processing.py +35 -35
- data_designer-0.2.0/tests/engine/column_generators/generators/test_embedding.py +49 -0
- data_designer-0.1.4/tests/engine/column_generators/generators/test_llm_generators.py → data_designer-0.2.0/tests/engine/column_generators/generators/test_llm_completion_generators.py +2 -19
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/test_registry.py +1 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_judge_score_factory.py +22 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_column_wise_builder.py +5 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/conftest.py +15 -3
- data_designer-0.2.0/tests/engine/models/test_facade.py +224 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_registry.py +44 -34
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_usage.py +12 -12
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/processors/test_drop_columns.py +4 -2
- data_designer-0.2.0/tests/engine/processing/processors/test_schema_transform.py +137 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/test_utils.py +17 -0
- data_designer-0.2.0/tests/engine/resources/__init__.py +2 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_configurable_task.py +6 -4
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/essentials/test_init.py +12 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/interface/test_data_designer.py +3 -1
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/plugins/test_plugin_registry.py +1 -2
- {data_designer-0.1.4 → data_designer-0.2.0}/uv.lock +100 -0
- data_designer-0.1.4/.github/workflows/build-docs.yml +0 -39
- data_designer-0.1.4/docs/concepts/models/model-configs.md +0 -244
- data_designer-0.1.4/docs/concepts/person_sampling.md +0 -36
- data_designer-0.1.4/docs/concepts/plugins.md +0 -0
- data_designer-0.1.4/docs/overrides/main.html +0 -18
- data_designer-0.1.4/src/data_designer/cli/forms/model_builder.py +0 -125
- data_designer-0.1.4/src/data_designer/config/analysis/column_profilers.py +0 -89
- data_designer-0.1.4/src/data_designer/config/analysis/column_statistics.py +0 -274
- data_designer-0.1.4/src/data_designer/config/processors.py +0 -41
- data_designer-0.1.4/tests/config/test_processors.py +0 -66
- data_designer-0.1.4/tests/engine/models/test_facade.py +0 -174
- {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/build-notebooks.yml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/ci.yml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/pack-tutorials.yml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/AGENTS.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/CLAUDE.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/CONTRIBUTING.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/DCO +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/LICENSE +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/VERSIONING.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/models.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/concepts/validators.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/index.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/installation.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/README.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/_README.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/notebook_source/_pyproject.toml +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/docs/quick-start.md +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/scripts/update_license_headers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/models.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/providers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/commands/reset.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/controllers/provider_controller.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/builder.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/form.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/forms/provider_builder.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/model_repository.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/repositories/provider_repository.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/model_service.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/cli/services/provider_service.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/__init__.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine → data_designer-0.2.0/src/data_designer/config/analysis}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/interface.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/config/utils/info.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/column_generators → data_designer-0.2.0/src/data_designer/engine}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/column_generators/generators → data_designer-0.2.0/src/data_designer/engine/column_generators}/__init__.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/dataset_builders/utils → data_designer-0.2.0/src/data_designer/engine/column_generators/generators}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/models → data_designer-0.2.0/src/data_designer/engine/dataset_builders/utils}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/models/parsers → data_designer-0.2.0/src/data_designer/engine/models}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/processing/ginja → data_designer-0.2.0/src/data_designer/engine/models/parsers}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/processing/gsonschema → data_designer-0.2.0/src/data_designer/engine/processing/ginja}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer-0.1.4/src/data_designer/engine/sampling_gen/entities → data_designer-0.2.0/src/data_designer/engine/processing/gsonschema}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/resources/seed_dataset_data_store.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer-0.1.4/src/data_designer/interface → data_designer-0.2.0/src/data_designer/engine/sampling_gen/entities}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/essentials/__init__.py +0 -0
- {data_designer-0.1.4/tests/config/utils → data_designer-0.2.0/src/data_designer/interface}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/interface/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/src/data_designer/plugins/errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_datastore.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_seed.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.1.4/tests/engine/column_generators/generators → data_designer-0.2.0/tests/config/utils}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/config/utils/test_misc.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.1.4/tests/engine/processing → data_designer-0.2.0/tests/engine/column_generators/generators}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/conftest.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.1.4/tests/engine/processing/ginja → data_designer-0.2.0/tests/engine/processing}/__init__.py +0 -0
- {data_designer-0.1.4/tests/engine/processing/gsonschema → data_designer-0.2.0/tests/engine/processing/ginja}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.1.4/tests/engine/processing/processors → data_designer-0.2.0/tests/engine/processing/gsonschema}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.1.4/tests/engine/registry → data_designer-0.2.0/tests/engine/processing/processors}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.1.4/tests/engine/resources → data_designer-0.2.0/tests/engine/registry}/__init__.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/conftest.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/interface/test_results.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/test_logging.py +0 -0
- {data_designer-0.1.4 → data_designer-0.2.0}/tests/test_plugin_manager.py +0 -0
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
name: Build docs
|
|
2
|
+
on:
|
|
3
|
+
workflow_dispatch:
|
|
4
|
+
release:
|
|
5
|
+
types:
|
|
6
|
+
- published
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build-notebooks:
|
|
10
|
+
uses: ./.github/workflows/build-notebooks.yml
|
|
11
|
+
secrets: inherit
|
|
12
|
+
deploy:
|
|
13
|
+
needs: build-notebooks
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
permissions:
|
|
16
|
+
contents: write
|
|
17
|
+
steps:
|
|
18
|
+
- name: Checkout repository
|
|
19
|
+
uses: actions/checkout@v2
|
|
20
|
+
- name: Install uv
|
|
21
|
+
uses: astral-sh/setup-uv@v6
|
|
22
|
+
with:
|
|
23
|
+
version: "0.9.5"
|
|
24
|
+
- name: Set up Python
|
|
25
|
+
run: uv python install
|
|
26
|
+
- name: Install dependencies for docs
|
|
27
|
+
run: uv sync --group docs
|
|
28
|
+
- name: Download artifact from previous step
|
|
29
|
+
uses: actions/download-artifact@v5
|
|
30
|
+
with:
|
|
31
|
+
name: notebooks
|
|
32
|
+
path: docs/notebooks
|
|
33
|
+
- name: Find the latest existing release tag
|
|
34
|
+
id: get_release
|
|
35
|
+
run: |
|
|
36
|
+
if [ "${{ github.event_name }}" == "release" ]; then
|
|
37
|
+
LATEST_TAG="${{ github.event.release.tag_name }}"
|
|
38
|
+
else
|
|
39
|
+
echo "::notice::Running manually via workflow_dispatch. Fetching latest release tag..."
|
|
40
|
+
|
|
41
|
+
gh auth status || echo "GitHub CLI is not authenticated, relying on GITHUB_TOKEN."
|
|
42
|
+
|
|
43
|
+
# We use tr -d '\n' to remove the trailing newline for a clean tag string
|
|
44
|
+
LATEST_TAG=$(gh release view --json tagName -q .tagName 2>/dev/null)
|
|
45
|
+
|
|
46
|
+
if [ -z "$LATEST_TAG" ]; then
|
|
47
|
+
echo "::error::Could not find the latest published release tag. Ensure a release exists."
|
|
48
|
+
exit 1
|
|
49
|
+
fi
|
|
50
|
+
fi
|
|
51
|
+
|
|
52
|
+
echo "Latest release tag found: $LATEST_TAG"
|
|
53
|
+
echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_ENV
|
|
54
|
+
env:
|
|
55
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
56
|
+
- name: Extract version from release tag
|
|
57
|
+
run: |
|
|
58
|
+
# Remove the 'v' prefix and any suffix after a space
|
|
59
|
+
VERSION=$(echo ${{ env.LATEST_TAG }} | sed 's/^v//' | sed 's/ .*$//')
|
|
60
|
+
echo "::notice::Extracted version: $VERSION"
|
|
61
|
+
echo "VERSION=$VERSION" >> $GITHUB_ENV
|
|
62
|
+
- name: Setup doc deploy
|
|
63
|
+
run: |
|
|
64
|
+
git fetch origin gh-pages --depth=1
|
|
65
|
+
git config --global user.name "github-actions[bot]"
|
|
66
|
+
git config --global user.email "41898282+github-actions[bot]@users.noreply.github.com"
|
|
67
|
+
- name: Build and deploy docs
|
|
68
|
+
run: uv run mike deploy --push --update-aliases ${{ env.VERSION }} latest
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
name: Check Colab notebooks
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [ main ]
|
|
6
|
+
paths:
|
|
7
|
+
- 'docs/notebook_source/*.py'
|
|
8
|
+
pull_request:
|
|
9
|
+
branches: [ main ]
|
|
10
|
+
paths:
|
|
11
|
+
- 'docs/notebook_source/*.py'
|
|
12
|
+
workflow_dispatch:
|
|
13
|
+
|
|
14
|
+
jobs:
|
|
15
|
+
check-colab-notebooks:
|
|
16
|
+
name: Check Colab Notebooks
|
|
17
|
+
runs-on: ubuntu-latest
|
|
18
|
+
|
|
19
|
+
steps:
|
|
20
|
+
- name: Checkout code
|
|
21
|
+
uses: actions/checkout@v4
|
|
22
|
+
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v5
|
|
25
|
+
with:
|
|
26
|
+
version: "latest"
|
|
27
|
+
python-version: "3.11"
|
|
28
|
+
enable-cache: true
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: |
|
|
32
|
+
uv sync --group notebooks --group docs
|
|
33
|
+
|
|
34
|
+
- name: Generate Colab notebooks
|
|
35
|
+
run: |
|
|
36
|
+
make generate-colab-notebooks
|
|
37
|
+
|
|
38
|
+
- name: Check for differences
|
|
39
|
+
run: |
|
|
40
|
+
# Get the diff, filtering out cell ID changes (which are randomly generated)
|
|
41
|
+
# Filter out: file markers (--- and +++), and "id" lines
|
|
42
|
+
MEANINGFUL_DIFF=$(git diff docs/colab_notebooks/ | grep -E '^[+-]' | grep -v '^---' | grep -v '^+++' | grep -vE '^[+-]\s*"id": "[0-9a-fA-F]+",?$' || true)
|
|
43
|
+
|
|
44
|
+
if [ -z "$MEANINGFUL_DIFF" ]; then
|
|
45
|
+
echo "✅ Colab notebooks are up-to-date (ignoring cell ID changes)"
|
|
46
|
+
else
|
|
47
|
+
echo "❌ Colab notebooks are out of sync with source files"
|
|
48
|
+
echo ""
|
|
49
|
+
echo "The generated notebooks differ from the committed ones."
|
|
50
|
+
echo "Please run 'make generate-colab-notebooks' locally and commit the changes."
|
|
51
|
+
echo ""
|
|
52
|
+
echo "Differences found:"
|
|
53
|
+
echo "$MEANINGFUL_DIFF"
|
|
54
|
+
exit 1
|
|
55
|
+
fi
|
|
@@ -36,6 +36,7 @@ help:
|
|
|
36
36
|
@echo "🛠️ Utilities:"
|
|
37
37
|
@echo " clean - Remove coverage reports and cache files"
|
|
38
38
|
@echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext"
|
|
39
|
+
@echo " generate-colab-notebooks - Generate Colab-compatible notebooks"
|
|
39
40
|
@echo " serve-docs-locally - Serve documentation locally"
|
|
40
41
|
@echo " check-license-headers - Check if all files have license headers"
|
|
41
42
|
@echo " update-license-headers - Add license headers to all files"
|
|
@@ -95,6 +96,11 @@ convert-execute-notebooks:
|
|
|
95
96
|
rm docs/notebook_source/*.csv
|
|
96
97
|
@echo "✅ Notebooks created in docs/notebooks/"
|
|
97
98
|
|
|
99
|
+
generate-colab-notebooks:
|
|
100
|
+
@echo "📓 Generating Colab-compatible notebooks..."
|
|
101
|
+
uv run --group notebooks python docs/scripts/generate_colab_notebooks.py
|
|
102
|
+
@echo "✅ Colab notebooks created in docs/colab_notebooks/"
|
|
103
|
+
|
|
98
104
|
serve-docs-locally:
|
|
99
105
|
@echo "📝 Building and serving docs..."
|
|
100
106
|
uv sync --group docs
|
|
@@ -125,4 +131,4 @@ install-dev-notebooks:
|
|
|
125
131
|
$(call install-pre-commit-hooks)
|
|
126
132
|
@echo "✅ Dev + notebooks installation complete!"
|
|
127
133
|
|
|
128
|
-
.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks
|
|
134
|
+
.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.2.0
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
5
|
License-Expression: Apache-2.0
|
|
6
6
|
License-File: LICENSE
|
|
@@ -144,12 +144,12 @@ preview.display_sample_record()
|
|
|
144
144
|
|
|
145
145
|
### 📚 Learn more
|
|
146
146
|
|
|
147
|
-
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
|
|
148
|
-
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
149
|
-
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
|
-
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
151
|
-
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
|
|
152
|
-
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
147
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
|
|
148
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
|
|
149
|
+
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
|
+
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
151
|
+
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
|
|
152
|
+
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
153
153
|
|
|
154
154
|
### 🔧 Configure models via CLI
|
|
155
155
|
|
|
@@ -161,7 +161,7 @@ data-designer config list # View current settings
|
|
|
161
161
|
|
|
162
162
|
### 🤝 Get involved
|
|
163
163
|
|
|
164
|
-
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
|
|
164
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
|
|
165
165
|
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
|
|
166
166
|
|
|
167
167
|
---
|
|
@@ -178,7 +178,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
|
|
|
178
178
|
|
|
179
179
|
```bibtex
|
|
180
180
|
@misc{nemo-data-designer,
|
|
181
|
-
author = {The NeMo Data Designer Team},
|
|
181
|
+
author = {The NeMo Data Designer Team, NVIDIA},
|
|
182
182
|
title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
|
|
183
183
|
howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
|
|
184
184
|
year = {2025},
|
|
@@ -95,12 +95,12 @@ preview.display_sample_record()
|
|
|
95
95
|
|
|
96
96
|
### 📚 Learn more
|
|
97
97
|
|
|
98
|
-
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
|
|
99
|
-
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
100
|
-
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
101
|
-
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
102
|
-
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/concepts/models/model-configs/)** – Configure custom models and providers
|
|
103
|
-
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
98
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/latest/quick-start/)** – Detailed walkthrough with more examples
|
|
99
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/latest/notebooks/)** – Step-by-step interactive tutorials
|
|
100
|
+
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
101
|
+
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
102
|
+
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/models/model-configs/)** – Configure custom models and providers
|
|
103
|
+
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/latest/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
104
104
|
|
|
105
105
|
### 🔧 Configure models via CLI
|
|
106
106
|
|
|
@@ -112,7 +112,7 @@ data-designer config list # View current settings
|
|
|
112
112
|
|
|
113
113
|
### 🤝 Get involved
|
|
114
114
|
|
|
115
|
-
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
|
|
115
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/latest/CONTRIBUTING)** – Help improve Data Designer
|
|
116
116
|
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
|
|
117
117
|
|
|
118
118
|
---
|
|
@@ -129,7 +129,7 @@ If you use NeMo Data Designer in your research, please cite it using the followi
|
|
|
129
129
|
|
|
130
130
|
```bibtex
|
|
131
131
|
@misc{nemo-data-designer,
|
|
132
|
-
author = {The NeMo Data Designer Team},
|
|
132
|
+
author = {The NeMo Data Designer Team, NVIDIA},
|
|
133
133
|
title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
|
|
134
134
|
howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
|
|
135
135
|
year = {2025},
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from data_designer.essentials import (
|
|
4
|
+
CategorySamplerParams,
|
|
5
|
+
CodeLang,
|
|
6
|
+
CodeValidatorParams,
|
|
7
|
+
DataDesigner,
|
|
8
|
+
DataDesignerConfigBuilder,
|
|
9
|
+
LLMCodeColumnConfig,
|
|
10
|
+
LLMJudgeColumnConfig,
|
|
11
|
+
LLMTextColumnConfig,
|
|
12
|
+
SamplerColumnConfig,
|
|
13
|
+
SamplerType,
|
|
14
|
+
Score,
|
|
15
|
+
SubcategorySamplerParams,
|
|
16
|
+
ValidationColumnConfig,
|
|
17
|
+
ValidatorType,
|
|
18
|
+
)
|
|
19
|
+
from data_designer.interface.results import DatasetCreationResults
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_config(model_alias: str) -> DataDesignerConfigBuilder:
|
|
23
|
+
config_builder = DataDesignerConfigBuilder()
|
|
24
|
+
|
|
25
|
+
config_builder.add_column(
|
|
26
|
+
SamplerColumnConfig(
|
|
27
|
+
name="industry_sector",
|
|
28
|
+
sampler_type=SamplerType.CATEGORY,
|
|
29
|
+
params=CategorySamplerParams(
|
|
30
|
+
values=[
|
|
31
|
+
"Healthcare",
|
|
32
|
+
"Finance",
|
|
33
|
+
"Technology",
|
|
34
|
+
],
|
|
35
|
+
),
|
|
36
|
+
),
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
config_builder.add_column(
|
|
40
|
+
SamplerColumnConfig(
|
|
41
|
+
name="topic",
|
|
42
|
+
sampler_type=SamplerType.SUBCATEGORY,
|
|
43
|
+
params=SubcategorySamplerParams(
|
|
44
|
+
category="industry_sector",
|
|
45
|
+
values={
|
|
46
|
+
"Healthcare": [
|
|
47
|
+
"Electronic Health Records (EHR) Systems",
|
|
48
|
+
"Telemedicine Platforms",
|
|
49
|
+
"AI-Powered Diagnostic Tools",
|
|
50
|
+
],
|
|
51
|
+
"Finance": [
|
|
52
|
+
"Fraud Detection Software",
|
|
53
|
+
"Automated Trading Systems",
|
|
54
|
+
"Personal Finance Apps",
|
|
55
|
+
],
|
|
56
|
+
"Technology": [
|
|
57
|
+
"Cloud Computing Platforms",
|
|
58
|
+
"Artificial Intelligence and Machine Learning Platforms",
|
|
59
|
+
"DevOps and CI/CD Tools",
|
|
60
|
+
],
|
|
61
|
+
},
|
|
62
|
+
),
|
|
63
|
+
),
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
config_builder.add_column(
|
|
67
|
+
SamplerColumnConfig(
|
|
68
|
+
name="code_complexity",
|
|
69
|
+
sampler_type=SamplerType.CATEGORY,
|
|
70
|
+
params=CategorySamplerParams(
|
|
71
|
+
values=[
|
|
72
|
+
"Beginner",
|
|
73
|
+
"Intermediate",
|
|
74
|
+
"Advanced",
|
|
75
|
+
],
|
|
76
|
+
),
|
|
77
|
+
),
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
config_builder.add_column(
|
|
81
|
+
SamplerColumnConfig(
|
|
82
|
+
name="code_concept",
|
|
83
|
+
sampler_type=SamplerType.SUBCATEGORY,
|
|
84
|
+
params=SubcategorySamplerParams(
|
|
85
|
+
category="code_complexity",
|
|
86
|
+
values={
|
|
87
|
+
"Beginner": [
|
|
88
|
+
"Variables",
|
|
89
|
+
"Data Types",
|
|
90
|
+
"Functions",
|
|
91
|
+
"Loops",
|
|
92
|
+
"Classes",
|
|
93
|
+
],
|
|
94
|
+
"Intermediate": [
|
|
95
|
+
"List Comprehensions",
|
|
96
|
+
"Object-oriented programming",
|
|
97
|
+
"Lambda Functions",
|
|
98
|
+
"Web frameworks",
|
|
99
|
+
"Pandas",
|
|
100
|
+
],
|
|
101
|
+
"Advanced": [
|
|
102
|
+
"Multithreading",
|
|
103
|
+
"Context Managers",
|
|
104
|
+
"Generators",
|
|
105
|
+
],
|
|
106
|
+
},
|
|
107
|
+
),
|
|
108
|
+
),
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
config_builder.add_column(
|
|
112
|
+
SamplerColumnConfig(
|
|
113
|
+
name="instruction_phrase",
|
|
114
|
+
sampler_type=SamplerType.CATEGORY,
|
|
115
|
+
params=CategorySamplerParams(
|
|
116
|
+
values=[
|
|
117
|
+
"Write a function that",
|
|
118
|
+
"Create a class that",
|
|
119
|
+
"Implement a script",
|
|
120
|
+
"Can you create a function",
|
|
121
|
+
"Develop a module that",
|
|
122
|
+
],
|
|
123
|
+
),
|
|
124
|
+
),
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
config_builder.add_column(
|
|
128
|
+
LLMTextColumnConfig(
|
|
129
|
+
name="instruction",
|
|
130
|
+
model_alias=model_alias,
|
|
131
|
+
system_prompt=("You are an expert at generating clear and specific programming tasks."),
|
|
132
|
+
prompt=(
|
|
133
|
+
"Generate an instruction to create Python code that solves a specific problem.\n"
|
|
134
|
+
"Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
|
|
135
|
+
"Important Guidelines:\n"
|
|
136
|
+
"* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
|
|
137
|
+
"* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
|
|
138
|
+
"* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
|
|
139
|
+
"* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
|
|
140
|
+
),
|
|
141
|
+
)
|
|
142
|
+
)
|
|
143
|
+
|
|
144
|
+
config_builder.add_column(
|
|
145
|
+
LLMCodeColumnConfig(
|
|
146
|
+
name="code_implementation",
|
|
147
|
+
model_alias=model_alias,
|
|
148
|
+
code_lang=CodeLang.PYTHON,
|
|
149
|
+
system_prompt=(
|
|
150
|
+
"You are an expert Python programmer who writes clean, efficient, and well-documented code."
|
|
151
|
+
),
|
|
152
|
+
prompt=(
|
|
153
|
+
"Write Python code for the following instruction:\n"
|
|
154
|
+
"Instruction: {{ instruction }}\n\n"
|
|
155
|
+
"Important Guidelines:\n"
|
|
156
|
+
"* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
|
|
157
|
+
"* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
|
|
158
|
+
"* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
|
|
159
|
+
"* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{code_concept}}.\n"
|
|
160
|
+
),
|
|
161
|
+
)
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
config_builder.add_column(
|
|
165
|
+
LLMTextColumnConfig(
|
|
166
|
+
name="instruction",
|
|
167
|
+
model_alias=model_alias,
|
|
168
|
+
system_prompt=("You are an expert at generating clear and specific programming tasks."),
|
|
169
|
+
prompt=(
|
|
170
|
+
"Generate an instruction to create Python code that solves a specific problem.\n"
|
|
171
|
+
"Each instruction should begin with one of the following phrases: {{ instruction_phrase }}.\n\n"
|
|
172
|
+
"Important Guidelines:\n"
|
|
173
|
+
"* Industry Relevance: Ensure the instruction pertains to the {{ industry_sector }} sector and {{ topic }} topic.\n"
|
|
174
|
+
"* Code Complexity: Tailor the instruction to the {{ code_complexity }} level. Utilize relevant {{ code_concept }} where appropriate to match the complexity level.\n"
|
|
175
|
+
"* Clarity and Specificity: Make the problem statement clear and unambiguous. Provide sufficient context to understand the requirements without being overly verbose.\n"
|
|
176
|
+
"* Response Formatting: Do not include any markers such as ### Response ### in the instruction.\n"
|
|
177
|
+
),
|
|
178
|
+
)
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
config_builder.add_column(
|
|
182
|
+
LLMCodeColumnConfig(
|
|
183
|
+
name="code_implementation",
|
|
184
|
+
model_alias=model_alias,
|
|
185
|
+
code_lang=CodeLang.PYTHON,
|
|
186
|
+
system_prompt=(
|
|
187
|
+
"You are an expert Python programmer who writes clean, efficient, and well-documented code."
|
|
188
|
+
),
|
|
189
|
+
prompt=(
|
|
190
|
+
"Write Python code for the following instruction:\n"
|
|
191
|
+
"Instruction: {{ instruction }}\n\n"
|
|
192
|
+
"Important Guidelines:\n"
|
|
193
|
+
"* Code Quality: Your code should be clean, complete, self-contained, and accurate.\n"
|
|
194
|
+
"* Code Validity: Please ensure that your Python code is executable and does not contain any errors.\n"
|
|
195
|
+
"* Packages: Remember to import any necessary libraries, and to use all libraries you import.\n"
|
|
196
|
+
"* Complexity & Concepts: The code should be written at a {{ code_complexity }} level, making use of concepts such as {{ code_concept }}.\n"
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
config_builder.add_column(
|
|
202
|
+
LLMJudgeColumnConfig(
|
|
203
|
+
name="code_judge_result",
|
|
204
|
+
model_alias=model_alias,
|
|
205
|
+
prompt=TEXT_TO_PYTHON_JUDGE_TEMPLATE,
|
|
206
|
+
scores=python_scoring,
|
|
207
|
+
)
|
|
208
|
+
)
|
|
209
|
+
|
|
210
|
+
config_builder.add_column(
|
|
211
|
+
ValidationColumnConfig(
|
|
212
|
+
name="code_validity_result",
|
|
213
|
+
validator_type=ValidatorType.CODE,
|
|
214
|
+
target_columns=["code_implementation"],
|
|
215
|
+
validator_params=CodeValidatorParams(
|
|
216
|
+
code_lang=CodeLang.PYTHON,
|
|
217
|
+
),
|
|
218
|
+
batch_size=100,
|
|
219
|
+
)
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
return config_builder
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def create_dataset(
|
|
226
|
+
config_builder: DataDesignerConfigBuilder,
|
|
227
|
+
num_records: int,
|
|
228
|
+
artifact_path: Path | str | None = None,
|
|
229
|
+
) -> DatasetCreationResults:
|
|
230
|
+
data_designer = DataDesigner(artifact_path=artifact_path)
|
|
231
|
+
results = data_designer.create(config_builder, num_records=num_records)
|
|
232
|
+
return results
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
TEXT_TO_PYTHON_JUDGE_TEMPLATE = """\
|
|
236
|
+
You are an expert in Python programming, with specialized knowledge in software engineering, data science, and algorithmic problem-solving.
|
|
237
|
+
|
|
238
|
+
You think about potential flaws and errors in the code. You are a tough critic, but a fair one.
|
|
239
|
+
|
|
240
|
+
Take a deep breath and use the Python Code Quality Rubric below to score the **Generated Python Code** based on the INSTRUCTIONS.
|
|
241
|
+
|
|
242
|
+
#### INSTRUCTIONS
|
|
243
|
+
The Generated Python Code should be a valid response to the Natural Language Prompt below
|
|
244
|
+
|
|
245
|
+
Natural Language Prompt:
|
|
246
|
+
{{ instruction }}
|
|
247
|
+
|
|
248
|
+
Generated Python Code
|
|
249
|
+
{{ code_implementation }}
|
|
250
|
+
"""
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
python_scoring = [
|
|
254
|
+
Score(
|
|
255
|
+
name="Relevance",
|
|
256
|
+
description="Adherence to INSTRUCTIONS and CONTEXT",
|
|
257
|
+
options={
|
|
258
|
+
4: "Perfectly meets all specified requirements.",
|
|
259
|
+
3: "Meets most requirements with minor deviations.",
|
|
260
|
+
2: "Moderate deviation from the instructions.",
|
|
261
|
+
1: "Significant deviations from the instructions.",
|
|
262
|
+
0: "Does not adhere to the instructions.",
|
|
263
|
+
},
|
|
264
|
+
),
|
|
265
|
+
Score(
|
|
266
|
+
name="Pythonic",
|
|
267
|
+
description="Pythonic Code and Best Practices (Does the code follow Python conventions and best practices?)",
|
|
268
|
+
options={
|
|
269
|
+
4: "The code exemplifies Pythonic principles, making excellent use of Python-specific constructs, standard library modules and programming idioms; follows all relevant PEPs.",
|
|
270
|
+
3: "The code closely follows Python conventions and adheres to many best practices; good use of Python-specific constructs, standard library modules and programming idioms.",
|
|
271
|
+
2: "The code generally follows Python conventions but has room for better alignment with Pythonic practices.",
|
|
272
|
+
1: "The code loosely follows Python conventions, with several deviations from best practices.",
|
|
273
|
+
0: "The code does not follow Python conventions or best practices, using non-Pythonic approaches.",
|
|
274
|
+
},
|
|
275
|
+
),
|
|
276
|
+
Score(
|
|
277
|
+
name="Readability",
|
|
278
|
+
description="Readability and Maintainability (Is the Python code easy to understand and maintain?)",
|
|
279
|
+
options={
|
|
280
|
+
4: (
|
|
281
|
+
"The code is excellently formatted, follows PEP 8 guidelines, is elegantly concise and clear, uses meaningful variable names, "
|
|
282
|
+
"ensuring high readability and ease of maintenance; organizes complex logic well. Docstrings are given in a Google Docstring format."
|
|
283
|
+
),
|
|
284
|
+
3: "The code is well-formatted in the sense of code-as-documentation, making it relatively easy to understand and maintain; uses descriptive names and organizes logic clearly.",
|
|
285
|
+
2: "The code is somewhat readable with basic formatting and some comments, but improvements are needed; needs better use of descriptive names and organization.",
|
|
286
|
+
1: "The code has minimal formatting, making it hard to understand; lacks meaningful names and organization.",
|
|
287
|
+
0: "The code is unreadable, with no attempt at formatting or description.",
|
|
288
|
+
},
|
|
289
|
+
),
|
|
290
|
+
Score(
|
|
291
|
+
name="Efficiency",
|
|
292
|
+
description="Efficiency and Performance (Is the code optimized for performance?)",
|
|
293
|
+
options={
|
|
294
|
+
4: "The solution is highly efficient, using appropriate data structures and algorithms; avoids unnecessary computations and optimizes for both time and space complexity.",
|
|
295
|
+
3: "The solution is efficient, with good use of Python's built-in functions and libraries; minor areas for optimization.",
|
|
296
|
+
2: "The solution is moderately efficient, but misses some opportunities for optimization; uses some inefficient patterns.",
|
|
297
|
+
1: "The solution shows poor efficiency, with notable performance issues; lacks effective optimization techniques.",
|
|
298
|
+
0: "The solution is highly inefficient; overlooks fundamental optimization practices, resulting in significant performance issues.",
|
|
299
|
+
},
|
|
300
|
+
),
|
|
301
|
+
]
|
|
302
|
+
|
|
303
|
+
|
|
304
|
+
if __name__ == "__main__":
|
|
305
|
+
from argparse import ArgumentParser
|
|
306
|
+
|
|
307
|
+
parser = ArgumentParser()
|
|
308
|
+
parser.add_argument("--model-alias", type=str, default="openai-text")
|
|
309
|
+
parser.add_argument("--num-records", type=int, default=5)
|
|
310
|
+
parser.add_argument("--artifact-path", type=str, default=None)
|
|
311
|
+
args = parser.parse_args()
|
|
312
|
+
|
|
313
|
+
config_builder = build_config(model_alias=args.model_alias)
|
|
314
|
+
results = create_dataset(config_builder, num_records=args.num_records, artifact_path=args.artifact_path)
|
|
315
|
+
|
|
316
|
+
print(f"Dataset saved to: {results.artifact_storage.final_dataset_path}")
|
|
317
|
+
|
|
318
|
+
results.load_analysis().to_report()
|