data-designer 0.1.0__tar.gz → 0.1.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer-0.1.2/.github/workflows/pack-tutorials.yml +74 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/CONTRIBUTING.md +62 -68
- {data_designer-0.1.0 → data_designer-0.1.2}/PKG-INFO +27 -13
- {data_designer-0.1.0 → data_designer-0.1.2}/README.md +24 -10
- data_designer-0.1.2/docs/concepts/person_sampling.md +36 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/index.md +2 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/models/default-model-settings.md +1 -2
- data_designer-0.1.0/docs/notebooks/intro.md → data_designer-0.1.2/docs/notebooks/README.md +11 -19
- data_designer-0.1.2/docs/notebooks/pyproject.toml +9 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/mkdocs.yml +2 -3
- {data_designer-0.1.0 → data_designer-0.1.2}/pyproject.toml +2 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/_version.py +2 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/column_configs.py +29 -4
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/datastore.py +70 -34
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/default_model_settings.py +12 -8
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/sampler_params.py +16 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/resources/seed_dataset_data_store.py +20 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/interface/data_designer.py +24 -3
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_columns.py +120 -1
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_datastore.py +66 -16
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_default_model_settings.py +8 -2
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/column_profilers/test_base.py +12 -4
- data_designer-0.1.0/docs/concepts/persons.md +0 -240
- {data_designer-0.1.0 → data_designer-0.1.2}/.github/workflows/build-docs.yml +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/.github/workflows/ci.yml +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/.github/workflows/dco-assistant.yml +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/.github/workflows/semantic-pull-requests.yml +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/.gitignore +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/.pre-commit-config.yaml +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/AGENTS.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/CLAUDE.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/CODE_OF_CONDUCT.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/DCO +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/LICENSE +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/Makefile +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/VERSIONING.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/CONTRIBUTING.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/assets/palette-favicon.png +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/code_reference/column_configs.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/code_reference/config_builder.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/code_reference/data_designer_config.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/code_reference/sampler_params.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/code_reference/validator_params.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/concepts/columns.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/concepts/plugins.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/concepts/validators.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/css/mkdocstrings.css +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/css/style.css +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/installation.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/js/toc-toggle.js +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/models/configure-model-settings-with-the-cli.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/models/model-configs.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/models/model-providers.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/notebooks/.gitignore +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/notebooks/1-the-basics.ipynb +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/notebooks/2-structured-outputs-and-jinja-expressions.ipynb +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/notebooks/3-seeding-with-a-dataset.ipynb +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/docs/quick-start.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/scripts/update_license_headers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/README.md +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/commands/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/commands/list.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/commands/models.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/commands/providers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/commands/reset.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/controllers/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/controllers/model_controller.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/controllers/provider_controller.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/field.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/form.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/model_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/forms/provider_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/main.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/repositories/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/repositories/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/repositories/model_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/repositories/provider_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/services/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/services/model_service.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/services/provider_service.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/ui.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/cli/utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/analysis/column_profilers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/analysis/column_statistics.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/analysis/utils/reporting.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/column_types.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/config_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/interface.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/models.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/preview_results.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/processors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/sampler_constraints.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/seed.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/code_lang.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/constants.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/info.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/io_helpers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/misc.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/numerical_helpers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/type_helpers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/validation.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/utils/visualization.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/config/validator_params.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/llm_generators.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/facade.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/models/utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/essentials/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/interface/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/interface/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/interface/results.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/logging.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/plugin_manager.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/plugins/errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/plugins/plugin.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/src/data_designer/plugins/registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/commands/test_list_command.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/commands/test_models_command.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/commands/test_providers_command.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/commands/test_reset_command.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/controllers/test_model_controller.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/controllers/test_provider_controller.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/forms/test_field.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/forms/test_form.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/forms/test_model_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/forms/test_provider_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/repositories/test_model_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/repositories/test_provider_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/services/test_model_service.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/services/test_provider_service.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/cli/test_cli_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/analysis/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_config_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_data_designer_config.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_models.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_processors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_sampler_params.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_seed.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/test_validator_params.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_code_lang.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_info.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_misc.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_validation.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/config/utils/test_visualization.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_llm_generators.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_facade.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_model_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/models/test_usage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/processing/test_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/registry/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/registry/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/registry/test_base.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/registry/test_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/__init__.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/test_configurable_task.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/test_engine_errors.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/test_model_provider.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/validators/test_python.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/validators/test_remote.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/engine/validators/test_sql.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/essentials/test_init.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/interface/test_data_designer.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/interface/test_results.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/plugins/test_plugin.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/plugins/test_plugin_registry.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/test_logging.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/tests/test_plugin_manager.py +0 -0
- {data_designer-0.1.0 → data_designer-0.1.2}/uv.lock +0 -0
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
name: Pack Tutorials
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
workflow_dispatch:
|
|
5
|
+
release:
|
|
6
|
+
types: [published]
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
zip_and_upload:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
contents: write
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout repository
|
|
16
|
+
uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up environment variables
|
|
19
|
+
id: env_setup
|
|
20
|
+
run: |
|
|
21
|
+
echo "SOURCE_FOLDER_PATH=docs/notebooks" >> $GITHUB_ENV
|
|
22
|
+
echo "TARGET_FOLDER_NAME=data_designer_tutorial" >> $GITHUB_ENV
|
|
23
|
+
echo "ZIP_FILE_NAME=data_designer_tutorial.zip" >> $GITHUB_ENV
|
|
24
|
+
|
|
25
|
+
- name: Check if source folder exists
|
|
26
|
+
run: |
|
|
27
|
+
if [ ! -d "${{ env.SOURCE_FOLDER_PATH }}" ]; then
|
|
28
|
+
echo "::error::Source folder '${{ env.SOURCE_FOLDER_PATH }}' not found. Check the input value."
|
|
29
|
+
exit 1
|
|
30
|
+
fi
|
|
31
|
+
|
|
32
|
+
- name: Rename source folder
|
|
33
|
+
run: mv ${{ env.SOURCE_FOLDER_PATH }} ${{ env.TARGET_FOLDER_NAME }}
|
|
34
|
+
|
|
35
|
+
- name: Zip the target folder
|
|
36
|
+
run: |
|
|
37
|
+
zip -r ${{ env.ZIP_FILE_NAME }} ${{ env.TARGET_FOLDER_NAME }}
|
|
38
|
+
echo "Successfully created ${{ env.ZIP_FILE_NAME }}"
|
|
39
|
+
|
|
40
|
+
- name: Find the latest existing release tag
|
|
41
|
+
id: get_release
|
|
42
|
+
run: |
|
|
43
|
+
if [ "${{ github.event_name }}" == "release" ]; then
|
|
44
|
+
LATEST_TAG="${{ github.event.release.tag_name }}"
|
|
45
|
+
else
|
|
46
|
+
echo "::notice::Running manually via workflow_dispatch. Fetching latest release tag..."
|
|
47
|
+
|
|
48
|
+
gh auth status || echo "GitHub CLI is not authenticated, relying on GITHUB_TOKEN."
|
|
49
|
+
|
|
50
|
+
# We use tr -d '\n' to remove the trailing newline for a clean tag string
|
|
51
|
+
LATEST_TAG=$(gh release view --json tagName -q .tagName 2>/dev/null)
|
|
52
|
+
|
|
53
|
+
if [ -z "$LATEST_TAG" ]; then
|
|
54
|
+
echo "::error::Could not find the latest published release tag. Ensure a release exists."
|
|
55
|
+
exit 1
|
|
56
|
+
fi
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
echo "Latest release tag found: $LATEST_TAG"
|
|
60
|
+
echo "tag=$LATEST_TAG" >> $GITHUB_OUTPUT
|
|
61
|
+
env:
|
|
62
|
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
63
|
+
|
|
64
|
+
- name: Upload zip file as release asset
|
|
65
|
+
uses: softprops/action-gh-release@v2
|
|
66
|
+
with:
|
|
67
|
+
tag_name: ${{ steps.get_release.outputs.tag }}
|
|
68
|
+
files: ${{ env.ZIP_FILE_NAME }}
|
|
69
|
+
draft: false
|
|
70
|
+
prerelease: false
|
|
71
|
+
|
|
72
|
+
- name: Cleanup
|
|
73
|
+
if: always()
|
|
74
|
+
run: rm -f ${{ env.ZIP_FILE_NAME }}
|
|
@@ -12,7 +12,6 @@ This guide will help you get started with the contribution process.
|
|
|
12
12
|
- [Ways to Contribute](#ways-to-contribute)
|
|
13
13
|
- [Feature Requests](#feature-requests)
|
|
14
14
|
- [Development Guide](#development-guide)
|
|
15
|
-
- [Code Quality Standards](#code-quality-standards)
|
|
16
15
|
- [Submitting Changes](#submitting-changes)
|
|
17
16
|
- [Code of Conduct](#code-of-conduct)
|
|
18
17
|
- [Signing off on your work](#signing-off-on-your-work)
|
|
@@ -88,94 +87,97 @@ Data Designer uses [`uv`](https://github.com/astral-sh/uv) for dependency manage
|
|
|
88
87
|
### Initial Setup
|
|
89
88
|
0. **Create or find an issue**
|
|
90
89
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
90
|
+
Before starting work, ensure there's an issue tracking your contribution:
|
|
91
|
+
|
|
92
|
+
- For bug fixes: Search [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) or [create a new one](https://github.com/NVIDIA-NeMo/DataDesigner/issues/new)
|
|
93
|
+
- For new features: Open a [feature request](#feature-requests) to discuss the approach first
|
|
94
|
+
- Comment on the issue to let maintainers know you're working on it
|
|
95
95
|
|
|
96
96
|
1. **Fork and clone the repository**
|
|
97
97
|
|
|
98
|
-
|
|
98
|
+
Start by [forking the Data Designer repository](https://github.com/NVIDIA-NeMo/DataDesigner/fork), then clone your fork and add the upstream remote:
|
|
99
99
|
|
|
100
|
-
|
|
101
|
-
|
|
100
|
+
```bash
|
|
101
|
+
git clone https://github.com/YOUR_GITHUB_USERNAME/DataDesigner.git
|
|
102
102
|
|
|
103
|
-
|
|
103
|
+
cd DataDesigner
|
|
104
104
|
|
|
105
|
-
|
|
106
|
-
|
|
105
|
+
git remote add upstream https://github.com/NVIDIA-NeMo/DataDesigner.git
|
|
106
|
+
```
|
|
107
107
|
|
|
108
108
|
2. **Install dependencies**
|
|
109
109
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
110
|
+
```bash
|
|
111
|
+
# Install project with dev dependencies
|
|
112
|
+
make install-dev
|
|
113
113
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
114
|
+
# Or, if you use Jupyter / IPython for development
|
|
115
|
+
make install-dev-notebooks
|
|
116
|
+
```
|
|
117
117
|
|
|
118
118
|
3. **Verify your setup**
|
|
119
119
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
120
|
+
```bash
|
|
121
|
+
make test && make check-all
|
|
122
|
+
```
|
|
123
123
|
|
|
124
|
-
|
|
124
|
+
If no errors are reported, you're ready to develop 🚀
|
|
125
125
|
|
|
126
126
|
### Making Changes
|
|
127
127
|
|
|
128
128
|
1. **Create a feature branch**
|
|
129
129
|
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
130
|
+
```bash
|
|
131
|
+
git checkout main
|
|
132
|
+
git pull upstream main
|
|
133
|
+
git checkout -b <username>/<type-of-change>/<issue-number>-<short-description>
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
Example types of change:
|
|
137
|
+
|
|
138
|
+
- `feat` for new features
|
|
139
|
+
- `fix` for bug fixes
|
|
140
|
+
- `docs` for documentation updates
|
|
141
|
+
- `test` for testing changes
|
|
142
|
+
- `refactor` for code refactoring
|
|
143
|
+
- `chore` for chore tasks
|
|
144
|
+
- `style` for style changes
|
|
145
|
+
- `perf` for performance improvements
|
|
146
|
+
|
|
147
|
+
Example branch name:
|
|
148
|
+
|
|
149
|
+
- `johnnygreco/feat/123-add-xyz-generator` for a new feature by @johnnygreco, addressing issue #123
|
|
148
150
|
|
|
149
151
|
2. **Develop your changes**
|
|
150
152
|
|
|
151
|
-
|
|
153
|
+
Please follow the patterns and conventions used throughout the codebase, as well as those outlined in [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md).
|
|
152
154
|
|
|
153
155
|
3. **Test and validate**
|
|
154
156
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
157
|
+
```bash
|
|
158
|
+
make check-all-fix # Format code and fix linting issues
|
|
159
|
+
make test # Run all tests
|
|
160
|
+
make coverage # Check test coverage (must be >90%)
|
|
161
|
+
```
|
|
160
162
|
|
|
161
|
-
|
|
163
|
+
**Writing tests**: Place tests in [tests/](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/) mirroring the source structure. Use fixtures from [tests/conftest.py](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/tests/conftest.py), mock external services with `unittest.mock` or `pytest-httpx`, and test both success and failure cases. See [AGENTS.md](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/AGENTS.md) for patterns and examples.
|
|
162
164
|
|
|
163
165
|
4. **Commit your work**
|
|
164
166
|
|
|
165
|
-
|
|
167
|
+
Write clear, descriptive commit messages, optionally including a brief summary (50 characters or less) and reference issue numbers when applicable (e.g., "Fixes #123").
|
|
166
168
|
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
169
|
+
```bash
|
|
170
|
+
git commit -m "Add XYZ generator for synthetic data" -m "Fixes #123"
|
|
171
|
+
```
|
|
170
172
|
|
|
171
173
|
5. **Stay up to date**
|
|
172
174
|
|
|
173
|
-
|
|
175
|
+
Regularly sync your branch with upstream changes:
|
|
174
176
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
177
|
+
```bash
|
|
178
|
+
git fetch upstream
|
|
179
|
+
git merge upstream/main
|
|
180
|
+
```
|
|
179
181
|
|
|
180
182
|
## Submitting Changes
|
|
181
183
|
|
|
@@ -194,9 +196,9 @@ Ensure your changes meet the following criteria:
|
|
|
194
196
|
|
|
195
197
|
1. **Push your changes** to your fork:
|
|
196
198
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
199
|
+
```bash
|
|
200
|
+
git push origin <username>/<type-of-change>/<issue-number>-<short-description>
|
|
201
|
+
```
|
|
200
202
|
|
|
201
203
|
2. **Open a pull request** on GitHub from your fork to the main repository
|
|
202
204
|
|
|
@@ -213,7 +215,7 @@ Ensure your changes meet the following criteria:
|
|
|
213
215
|
## Code of Conduct
|
|
214
216
|
Data Designer follows the Contributor Covenant Code of Conduct. We are committed to providing a welcoming and inclusive environment for all contributors.
|
|
215
217
|
|
|
216
|
-
**Please read our complete [Code of Conduct](CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
|
|
218
|
+
**Please read our complete [Code of Conduct](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/CODE_OF_CONDUCT.md)** for full details on our standards and expectations.
|
|
217
219
|
|
|
218
220
|
### License File Headers
|
|
219
221
|
All code files that are added to this repository must include the appropriate NVIDIA copyright header:
|
|
@@ -225,17 +227,9 @@ All code files that are added to this repository must include the appropriate NV
|
|
|
225
227
|
|
|
226
228
|
Use `make update-license-headers` to add headers automatically.
|
|
227
229
|
|
|
228
|
-
## Getting Help
|
|
229
|
-
Need help with your contribution?
|
|
230
|
-
|
|
231
|
-
- **Documentation**: Check the [documentation](docs/) and [AGENTS.md](AGENTS.md) for additional information
|
|
232
|
-
- **Issues**: Browse [existing issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues) for similar questions
|
|
233
|
-
- **Contact**: Reach out to the core maintainers at [data-designer@nvidia.com](mailto:data-designer@nvidia.com)
|
|
234
|
-
|
|
235
|
-
|
|
236
230
|
## Signing off on your work
|
|
237
231
|
|
|
238
|
-
When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
|
|
232
|
+
When contributing to this project, you must agree that you have authored 100% of the content, that you have the necessary rights to the content and that the content you contribute may be provided under the project license. All contributors are asked to sign the Data Designer [Developer Certificate of Origin (DCO)](https://github.com/NVIDIA-NeMo/DataDesigner/blob/main/DCO) when submitting their first pull request. The process is automated by a bot that will comment on the pull request. Our DCO is the same as the Linux Foundation requires its contributors to sign.
|
|
239
233
|
|
|
240
234
|
---
|
|
241
235
|
|
|
@@ -1,18 +1,18 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: data-designer
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.2
|
|
4
4
|
Summary: General framework for synthetic data generation
|
|
5
|
+
License-Expression: Apache-2.0
|
|
5
6
|
License-File: LICENSE
|
|
6
7
|
Classifier: Development Status :: 4 - Beta
|
|
7
8
|
Classifier: Intended Audience :: Developers
|
|
8
9
|
Classifier: Intended Audience :: Science/Research
|
|
9
|
-
Classifier: License ::
|
|
10
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
10
11
|
Classifier: Programming Language :: Python :: 3.10
|
|
11
12
|
Classifier: Programming Language :: Python :: 3.11
|
|
12
13
|
Classifier: Programming Language :: Python :: 3.12
|
|
13
14
|
Classifier: Programming Language :: Python :: 3.13
|
|
14
15
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
-
Classifier: Topic :: Scientific/Engineering :: Human Machine Interfaces
|
|
16
16
|
Classifier: Topic :: Software Development
|
|
17
17
|
Requires-Python: >=3.10
|
|
18
18
|
Requires-Dist: anyascii<1.0,>=0.3.3
|
|
@@ -51,7 +51,7 @@ Description-Content-Type: text/markdown
|
|
|
51
51
|
|
|
52
52
|
[](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml)
|
|
53
53
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
54
|
-
[](https://www.python.org/downloads/) [](https://docs.nvidia.com/nemo/microservices/latest/index.html) [](https://nvidia-nemo.github.io/DataDesigner/)
|
|
55
55
|
|
|
56
56
|
**Generate high-quality synthetic datasets from scratch or using your own seed data.**
|
|
57
57
|
|
|
@@ -97,8 +97,7 @@ export NVIDIA_API_KEY="your-api-key-here"
|
|
|
97
97
|
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
98
98
|
```
|
|
99
99
|
|
|
100
|
-
### 3.
|
|
101
|
-
|
|
100
|
+
### 3. Start generating data!
|
|
102
101
|
```python
|
|
103
102
|
from data_designer.essentials import (
|
|
104
103
|
CategorySamplerParams,
|
|
@@ -139,18 +138,18 @@ preview = data_designer.preview(config_builder=config_builder)
|
|
|
139
138
|
preview.display_sample_record()
|
|
140
139
|
```
|
|
141
140
|
|
|
142
|
-
**That's it!** You've created a dataset.
|
|
143
|
-
|
|
144
141
|
---
|
|
145
142
|
|
|
146
143
|
## What's next?
|
|
147
144
|
|
|
148
145
|
### 📚 Learn more
|
|
149
146
|
|
|
150
|
-
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner)** – Detailed walkthrough with more examples
|
|
151
|
-
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/
|
|
147
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
|
|
148
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
152
149
|
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
150
|
+
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
153
151
|
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
|
|
152
|
+
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
154
153
|
|
|
155
154
|
### 🔧 Configure models via CLI
|
|
156
155
|
|
|
@@ -162,12 +161,27 @@ data-designer config list # View current settings
|
|
|
162
161
|
|
|
163
162
|
### 🤝 Get involved
|
|
164
163
|
|
|
165
|
-
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING
|
|
166
|
-
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or request
|
|
167
|
-
- **[GitHub Discussions](https://github.com/NVIDIA-NeMo/DataDesigner/discussions)** – Ask questions and share ideas
|
|
164
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
|
|
165
|
+
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
|
|
168
166
|
|
|
169
167
|
---
|
|
170
168
|
|
|
171
169
|
## License
|
|
172
170
|
|
|
173
171
|
Apache License 2.0 – see [LICENSE](LICENSE) for details.
|
|
172
|
+
|
|
173
|
+
---
|
|
174
|
+
|
|
175
|
+
## Citation
|
|
176
|
+
|
|
177
|
+
If you use NeMo Data Designer in your research, please cite it using the following BibTeX entry:
|
|
178
|
+
|
|
179
|
+
```bibtex
|
|
180
|
+
@misc{nemo-data-designer,
|
|
181
|
+
author = {The NeMo Data Designer Team},
|
|
182
|
+
title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
|
|
183
|
+
howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
|
|
184
|
+
year = {2025},
|
|
185
|
+
note = {GitHub Repository},
|
|
186
|
+
}
|
|
187
|
+
```
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/NVIDIA-NeMo/DataDesigner/actions/workflows/ci.yml)
|
|
4
4
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
5
|
-
[](https://www.python.org/downloads/) [](https://docs.nvidia.com/nemo/microservices/latest/index.html) [](https://nvidia-nemo.github.io/DataDesigner/)
|
|
6
6
|
|
|
7
7
|
**Generate high-quality synthetic datasets from scratch or using your own seed data.**
|
|
8
8
|
|
|
@@ -48,8 +48,7 @@ export NVIDIA_API_KEY="your-api-key-here"
|
|
|
48
48
|
export OPENAI_API_KEY="your-openai-api-key-here"
|
|
49
49
|
```
|
|
50
50
|
|
|
51
|
-
### 3.
|
|
52
|
-
|
|
51
|
+
### 3. Start generating data!
|
|
53
52
|
```python
|
|
54
53
|
from data_designer.essentials import (
|
|
55
54
|
CategorySamplerParams,
|
|
@@ -90,18 +89,18 @@ preview = data_designer.preview(config_builder=config_builder)
|
|
|
90
89
|
preview.display_sample_record()
|
|
91
90
|
```
|
|
92
91
|
|
|
93
|
-
**That's it!** You've created a dataset.
|
|
94
|
-
|
|
95
92
|
---
|
|
96
93
|
|
|
97
94
|
## What's next?
|
|
98
95
|
|
|
99
96
|
### 📚 Learn more
|
|
100
97
|
|
|
101
|
-
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner)** – Detailed walkthrough with more examples
|
|
102
|
-
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/
|
|
98
|
+
- **[Quick Start Guide](https://nvidia-nemo.github.io/DataDesigner/quick-start/)** – Detailed walkthrough with more examples
|
|
99
|
+
- **[Tutorial Notebooks](https://nvidia-nemo.github.io/DataDesigner/notebooks/)** – Step-by-step interactive tutorials
|
|
103
100
|
- **[Column Types](https://nvidia-nemo.github.io/DataDesigner/concepts/columns/)** – Explore samplers, LLM columns, validators, and more
|
|
101
|
+
- **[Validators](https://nvidia-nemo.github.io/DataDesigner/concepts/validators/)** – Learn how to validate generated data with Python, SQL, and remote validators
|
|
104
102
|
- **[Model Configuration](https://nvidia-nemo.github.io/DataDesigner/models/model-configs/)** – Configure custom models and providers
|
|
103
|
+
- **[Person Sampling](https://nvidia-nemo.github.io/DataDesigner/concepts/person_sampling/)** – Learn how to sample realistic person data with demographic attributes
|
|
105
104
|
|
|
106
105
|
### 🔧 Configure models via CLI
|
|
107
106
|
|
|
@@ -113,12 +112,27 @@ data-designer config list # View current settings
|
|
|
113
112
|
|
|
114
113
|
### 🤝 Get involved
|
|
115
114
|
|
|
116
|
-
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING
|
|
117
|
-
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or request
|
|
118
|
-
- **[GitHub Discussions](https://github.com/NVIDIA-NeMo/DataDesigner/discussions)** – Ask questions and share ideas
|
|
115
|
+
- **[Contributing Guide](https://nvidia-nemo.github.io/DataDesigner/CONTRIBUTING)** – Help improve Data Designer
|
|
116
|
+
- **[GitHub Issues](https://github.com/NVIDIA-NeMo/DataDesigner/issues)** – Report bugs or make a feature request
|
|
119
117
|
|
|
120
118
|
---
|
|
121
119
|
|
|
122
120
|
## License
|
|
123
121
|
|
|
124
122
|
Apache License 2.0 – see [LICENSE](LICENSE) for details.
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## Citation
|
|
127
|
+
|
|
128
|
+
If you use NeMo Data Designer in your research, please cite it using the following BibTeX entry:
|
|
129
|
+
|
|
130
|
+
```bibtex
|
|
131
|
+
@misc{nemo-data-designer,
|
|
132
|
+
author = {The NeMo Data Designer Team},
|
|
133
|
+
title = {NeMo Data Designer: A framework for generating synthetic data from scratch or based on your own seed data},
|
|
134
|
+
howpublished = {\url{https://github.com/NVIDIA-NeMo/DataDesigner}},
|
|
135
|
+
year = {2025},
|
|
136
|
+
note = {GitHub Repository},
|
|
137
|
+
}
|
|
138
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# Person Sampling in Data Designer
|
|
2
|
+
|
|
3
|
+
Person sampling in Data Designer allows you to generate synthetic person data for your datasets using the Faker library.
|
|
4
|
+
|
|
5
|
+
## Faker-Based Sampling
|
|
6
|
+
|
|
7
|
+
### What It Does
|
|
8
|
+
Uses the Faker library to generate random personal information. The data is basic and not demographically accurate, but is useful for quick testing, prototyping, or when realistic demographic distributions are not relevant for your use case.
|
|
9
|
+
|
|
10
|
+
### Features
|
|
11
|
+
- Gives you access to person attributes that Faker exposes
|
|
12
|
+
- Quick to set up with no additional downloads
|
|
13
|
+
- Generates random names, emails, addresses, phone numbers, etc.
|
|
14
|
+
- Supports [all Faker-supported locales](https://faker.readthedocs.io/en/master/locales.html)
|
|
15
|
+
- **Not demographically grounded** - data patterns don't reflect real-world demographics
|
|
16
|
+
|
|
17
|
+
### Usage Example
|
|
18
|
+
```python
|
|
19
|
+
from data_designer.essentials import (
|
|
20
|
+
SamplerColumnConfig,
|
|
21
|
+
SamplerType,
|
|
22
|
+
PersonFromFakerSamplerParams,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
config_builder.add_column(
|
|
26
|
+
SamplerColumnConfig(
|
|
27
|
+
name="customer",
|
|
28
|
+
sampler_type=SamplerType.PERSON_FROM_FAKER,
|
|
29
|
+
params=PersonFromFakerSamplerParams(
|
|
30
|
+
locale="en_US",
|
|
31
|
+
age_range=[25, 65],
|
|
32
|
+
sex="Female",
|
|
33
|
+
),
|
|
34
|
+
)
|
|
35
|
+
)
|
|
36
|
+
```
|
|
@@ -34,11 +34,11 @@ Data Designer helps you create datasets through an intuitive, **iterative** proc
|
|
|
34
34
|
3. **🔁 Preview** your results and iterate
|
|
35
35
|
- Generate a preview dataset stored in memory for fast iteration
|
|
36
36
|
- Inspect sample records and analysis results to refine your configuration
|
|
37
|
-
- Try for yourself by running the [tutorial notebooks](notebooks/
|
|
37
|
+
- Try for yourself by running the [tutorial notebooks](notebooks/README.md)
|
|
38
38
|
4. **🖼️ Create** your dataset
|
|
39
39
|
- Generate your full dataset and save results to disk
|
|
40
40
|
- Access the generated dataset and associated artifacts for downstream use
|
|
41
|
-
- Give it a try by running the [tutorial notebooks](notebooks/
|
|
41
|
+
- Give it a try by running the [tutorial notebooks](notebooks/README.md)
|
|
42
42
|
|
|
43
43
|
## Library and Microservice
|
|
44
44
|
|
|
@@ -92,5 +92,4 @@ Both methods operate on the same files, ensuring consistency across your entire
|
|
|
92
92
|
|
|
93
93
|
- **[Configure Model Settings With the CLI](configure-model-settings-with-the-cli.md)**: Learn how to use the CLI to manage model settings.
|
|
94
94
|
- **[Quick Start Guide](../quick-start.md)**: Get started with a simple example
|
|
95
|
-
- **[Model
|
|
96
|
-
- **[Column Configurations](../code_reference/column_configs.md)**: Learn about all column types
|
|
95
|
+
- **[Model Configurations](model-configs.md)**: Learn about model configurations
|
|
@@ -6,21 +6,15 @@ Welcome to the Data Designer tutorial series! These hands-on notebooks will guid
|
|
|
6
6
|
|
|
7
7
|
### Local Setup Best Practices
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
To run
|
|
9
|
+
First, download the tutorial [from the release assets](https://github.com/NVIDIA-NeMo/DataDesigner/releases/latest/download/data_designer_tutorial.zip).
|
|
10
|
+
To run the tutorial notebooks locally, we recommend using a virtual environment to manage dependencies:
|
|
11
11
|
|
|
12
12
|
=== "uv (Recommended)"
|
|
13
13
|
|
|
14
14
|
```bash
|
|
15
|
-
#
|
|
16
|
-
|
|
17
|
-
cd
|
|
18
|
-
|
|
19
|
-
# Initialize a new uv project
|
|
20
|
-
uv init
|
|
21
|
-
|
|
22
|
-
# Add data-designer and jupyter
|
|
23
|
-
uv add data-designer jupyter
|
|
15
|
+
# Extract tutorial notebooks
|
|
16
|
+
unzip data_designer_tutorial.zip
|
|
17
|
+
cd data_designer_tutorial
|
|
24
18
|
|
|
25
19
|
# Launch Jupyter
|
|
26
20
|
uv run jupyter notebook
|
|
@@ -29,15 +23,13 @@ To run them locally, we recommend using a virtual environment to manage dependen
|
|
|
29
23
|
=== "pip + venv"
|
|
30
24
|
|
|
31
25
|
```bash
|
|
32
|
-
#
|
|
33
|
-
|
|
34
|
-
cd
|
|
26
|
+
# Extract tutorial notebooks
|
|
27
|
+
unzip data_designer_tutorial.zip
|
|
28
|
+
cd data_designer_tutorial
|
|
35
29
|
|
|
36
|
-
# Create and
|
|
30
|
+
# Create Python virtual environment and install required packages
|
|
37
31
|
python -m venv venv
|
|
38
32
|
source venv/bin/activate
|
|
39
|
-
|
|
40
|
-
# Install data-designer and jupyter
|
|
41
33
|
pip install data-designer jupyter
|
|
42
34
|
|
|
43
35
|
# Launch Jupyter
|
|
@@ -108,7 +100,7 @@ Understanding these concepts will help you make the most of the tutorials:
|
|
|
108
100
|
|
|
109
101
|
- **[Columns](../concepts/columns.md)** - Learn about different column types (Sampler, LLM, Expression, Validation, etc.)
|
|
110
102
|
- **[Validators](../concepts/validators.md)** - Understand how to validate generated data with Python, SQL, and remote validators
|
|
111
|
-
- **[
|
|
103
|
+
- **[Person Sampling](../concepts/person_sampling.md)** - Learn how to sample realistic person data with demographic attributes
|
|
112
104
|
|
|
113
105
|
### Code Reference
|
|
114
106
|
|
|
@@ -117,4 +109,4 @@ Quick reference guides for the main configuration objects:
|
|
|
117
109
|
- **[column_configs](../code_reference/column_configs.md)** - All column configuration types
|
|
118
110
|
- **[config_builder](../code_reference/config_builder.md)** - The `DataDesignerConfigBuilder` API
|
|
119
111
|
- **[data_designer_config](../code_reference/data_designer_config.md)** - Main configuration schema
|
|
120
|
-
- **[validator_params](../code_reference/validator_params.md)** - Validator configuration options
|
|
112
|
+
- **[validator_params](../code_reference/validator_params.md)** - Validator configuration options
|
|
@@ -10,15 +10,14 @@ nav:
|
|
|
10
10
|
- Concepts:
|
|
11
11
|
- Columns: concepts/columns.md
|
|
12
12
|
- Validators: concepts/validators.md
|
|
13
|
-
-
|
|
14
|
-
# - Plugins: concepts/plugins.md
|
|
13
|
+
- Person Sampling: concepts/person_sampling.md
|
|
15
14
|
- Models:
|
|
16
15
|
- Default Model Settings: models/default-model-settings.md
|
|
17
16
|
- Configure with the CLI: models/configure-model-settings-with-the-cli.md
|
|
18
17
|
- Model Providers: models/model-providers.md
|
|
19
18
|
- Model Configs: models/model-configs.md
|
|
20
19
|
- Tutorials:
|
|
21
|
-
- Overview: notebooks/
|
|
20
|
+
- Overview: notebooks/README.md
|
|
22
21
|
- The Basics: notebooks/1-the-basics.ipynb
|
|
23
22
|
- Structured Outputs and Jinja Expressions: notebooks/2-structured-outputs-and-jinja-expressions.ipynb
|
|
24
23
|
- Seeding with an External Dataset: notebooks/3-seeding-with-a-dataset.ipynb
|
|
@@ -4,15 +4,15 @@ dynamic = ["version"]
|
|
|
4
4
|
description = "General framework for synthetic data generation"
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
7
|
+
license = "Apache-2.0"
|
|
7
8
|
|
|
8
9
|
classifiers = [
|
|
9
10
|
"Development Status :: 4 - Beta",
|
|
10
11
|
"Intended Audience :: Developers",
|
|
11
12
|
"Intended Audience :: Science/Research",
|
|
12
13
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
13
|
-
"Topic :: Scientific/Engineering :: Human Machine Interfaces",
|
|
14
14
|
"Topic :: Software Development",
|
|
15
|
-
"License ::
|
|
15
|
+
"License :: OSI Approved :: Apache Software License",
|
|
16
16
|
"Programming Language :: Python :: 3.10",
|
|
17
17
|
"Programming Language :: Python :: 3.11",
|
|
18
18
|
"Programming Language :: Python :: 3.12",
|
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.1.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 1,
|
|
31
|
+
__version__ = version = '0.1.2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 1, 2)
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|