data-designer-engine 0.4.0__tar.gz → 0.4.0rc2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/PKG-INFO +1 -1
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/_version.py +2 -2
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/llm_completion.py +4 -7
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +5 -24
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/facade.py +26 -23
- data_designer_engine-0.4.0rc2/src/data_designer/engine/models/utils.py +38 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_llm_completion_generators.py +12 -15
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -1
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_facade.py +29 -65
- data_designer_engine-0.4.0rc2/tests/engine/models/test_model_utils.py +36 -0
- data_designer_engine-0.4.0/src/data_designer/engine/dataset_builders/utils/progress_tracker.py +0 -122
- data_designer_engine-0.4.0/src/data_designer/engine/models/utils.py +0 -101
- data_designer_engine-0.4.0/tests/engine/dataset_builders/utils/test_progress_tracker.py +0 -290
- data_designer_engine-0.4.0/tests/engine/models/test_model_utils.py +0 -23
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/.gitignore +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/README.md +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/pyproject.toml +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/compiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/factory.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/telemetry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/seed_reader.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/stubs.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validation.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_embedding.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_usage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_schema_transform.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/test_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_base.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/__init__.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_seed_reader.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_compiler.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_configurable_task.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_dataset_metadata.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_engine_errors.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_model_provider.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_validation.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_python.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_remote.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_sql.py +0 -0
- {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/test_plugin_manager.py +0 -0
{data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/_version.py
RENAMED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.4.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 4, 0)
|
|
31
|
+
__version__ = version = '0.4.0rc2'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -12,7 +12,7 @@ from data_designer.config.column_configs import (
|
|
|
12
12
|
LLMStructuredColumnConfig,
|
|
13
13
|
LLMTextColumnConfig,
|
|
14
14
|
)
|
|
15
|
-
from data_designer.config.utils.constants import
|
|
15
|
+
from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
16
16
|
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
|
|
17
17
|
from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|
18
18
|
PromptType,
|
|
@@ -66,7 +66,7 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
66
66
|
for context in self.config.multi_modal_context:
|
|
67
67
|
multi_modal_context.extend(context.get_contexts(deserialized_record))
|
|
68
68
|
|
|
69
|
-
response,
|
|
69
|
+
response, reasoning_trace = self.model.generate(
|
|
70
70
|
prompt=self.prompt_renderer.render(
|
|
71
71
|
record=deserialized_record,
|
|
72
72
|
prompt_template=self.config.prompt,
|
|
@@ -87,11 +87,8 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
87
87
|
serialized_output = self.response_recipe.serialize_output(response)
|
|
88
88
|
data[self.config.name] = self._process_serialized_output(serialized_output)
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
self.config.
|
|
92
|
-
)
|
|
93
|
-
if should_save_trace:
|
|
94
|
-
data[self.config.name + TRACE_COLUMN_POSTFIX] = [message.to_dict() for message in trace]
|
|
90
|
+
if reasoning_trace:
|
|
91
|
+
data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace
|
|
95
92
|
|
|
96
93
|
return data
|
|
97
94
|
|
|
@@ -34,7 +34,6 @@ from data_designer.engine.dataset_builders.multi_column_configs import MultiColu
|
|
|
34
34
|
from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
|
|
35
35
|
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
36
36
|
from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
|
|
37
|
-
from data_designer.engine.dataset_builders.utils.progress_tracker import ProgressTracker
|
|
38
37
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
|
|
39
38
|
from data_designer.engine.processing.processors.base import Processor
|
|
40
39
|
from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
|
|
@@ -222,18 +221,16 @@ class ColumnWiseDatasetBuilder:
|
|
|
222
221
|
"generator so concurrency through threads is not supported."
|
|
223
222
|
)
|
|
224
223
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
224
|
+
logger.info(
|
|
225
|
+
f"🐙 Processing {generator.config.column_type} column '{generator.config.name}' "
|
|
226
|
+
f"with {max_workers} concurrent workers"
|
|
228
227
|
)
|
|
229
|
-
progress_tracker.log_start(max_workers)
|
|
230
|
-
|
|
231
228
|
settings = self._resource_provider.run_config
|
|
232
229
|
with ConcurrentThreadExecutor(
|
|
233
230
|
max_workers=max_workers,
|
|
234
231
|
column_name=generator.config.name,
|
|
235
|
-
result_callback=self.
|
|
236
|
-
error_callback=self.
|
|
232
|
+
result_callback=self._worker_result_callback,
|
|
233
|
+
error_callback=self._worker_error_callback,
|
|
237
234
|
shutdown_error_rate=settings.shutdown_error_rate,
|
|
238
235
|
shutdown_error_window=settings.shutdown_error_window,
|
|
239
236
|
disable_early_shutdown=settings.disable_early_shutdown,
|
|
@@ -241,26 +238,10 @@ class ColumnWiseDatasetBuilder:
|
|
|
241
238
|
for i, record in self.batch_manager.iter_current_batch():
|
|
242
239
|
executor.submit(lambda record: generator.generate(record), record, context={"index": i})
|
|
243
240
|
|
|
244
|
-
progress_tracker.log_final()
|
|
245
|
-
|
|
246
241
|
if len(self._records_to_drop) > 0:
|
|
247
242
|
self.batch_manager.drop_records(self._records_to_drop)
|
|
248
243
|
self._records_to_drop.clear()
|
|
249
244
|
|
|
250
|
-
def _make_result_callback(self, progress_tracker: ProgressTracker) -> Callable[[dict], None]:
|
|
251
|
-
def callback(result: dict, *, context: dict | None = None) -> None:
|
|
252
|
-
self._worker_result_callback(result, context=context)
|
|
253
|
-
progress_tracker.record_success()
|
|
254
|
-
|
|
255
|
-
return callback
|
|
256
|
-
|
|
257
|
-
def _make_error_callback(self, progress_tracker: ProgressTracker) -> Callable[[Exception], None]:
|
|
258
|
-
def callback(exc: Exception, *, context: dict | None = None) -> None:
|
|
259
|
-
self._worker_error_callback(exc, context=context)
|
|
260
|
-
progress_tracker.record_failure()
|
|
261
|
-
|
|
262
|
-
return callback
|
|
263
|
-
|
|
264
245
|
def _write_processed_batch(self, dataframe: pd.DataFrame) -> None:
|
|
265
246
|
self.batch_manager.update_records(dataframe.to_dict(orient="records"))
|
|
266
247
|
self.batch_manager.write()
|
|
@@ -18,7 +18,7 @@ from data_designer.engine.models.errors import (
|
|
|
18
18
|
from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs
|
|
19
19
|
from data_designer.engine.models.parsers.errors import ParserException
|
|
20
20
|
from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
|
|
21
|
-
from data_designer.engine.models.utils import
|
|
21
|
+
from data_designer.engine.models.utils import prompt_to_messages, str_to_message
|
|
22
22
|
from data_designer.engine.secret_resolver import SecretResolver
|
|
23
23
|
from data_designer.lazy_heavy_imports import litellm
|
|
24
24
|
|
|
@@ -67,17 +67,16 @@ class ModelFacade:
|
|
|
67
67
|
return self._usage_stats
|
|
68
68
|
|
|
69
69
|
def completion(
|
|
70
|
-
self, messages: list[
|
|
70
|
+
self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs
|
|
71
71
|
) -> litellm.ModelResponse:
|
|
72
|
-
message_payloads = [message.to_dict() for message in messages]
|
|
73
72
|
logger.debug(
|
|
74
73
|
f"Prompting model {self.model_name!r}...",
|
|
75
|
-
extra={"model": self.model_name, "messages":
|
|
74
|
+
extra={"model": self.model_name, "messages": messages},
|
|
76
75
|
)
|
|
77
76
|
response = None
|
|
78
77
|
kwargs = self.consolidate_kwargs(**kwargs)
|
|
79
78
|
try:
|
|
80
|
-
response = self._router.completion(model=self.model_name, messages=
|
|
79
|
+
response = self._router.completion(model=self.model_name, messages=messages, **kwargs)
|
|
81
80
|
logger.debug(
|
|
82
81
|
f"Received completion from model {self.model_name!r}",
|
|
83
82
|
extra={
|
|
@@ -150,7 +149,7 @@ class ModelFacade:
|
|
|
150
149
|
skip_usage_tracking: bool = False,
|
|
151
150
|
purpose: str | None = None,
|
|
152
151
|
**kwargs,
|
|
153
|
-
) -> tuple[Any,
|
|
152
|
+
) -> tuple[Any, str | None]:
|
|
154
153
|
"""Generate a parsed output with correction steps.
|
|
155
154
|
|
|
156
155
|
This generation call will attempt to generate an output which is
|
|
@@ -183,12 +182,6 @@ class ModelFacade:
|
|
|
183
182
|
It is expected to be used by the @catch_llm_exceptions decorator.
|
|
184
183
|
**kwargs: Additional arguments to pass to the model.
|
|
185
184
|
|
|
186
|
-
Returns:
|
|
187
|
-
A tuple containing:
|
|
188
|
-
- The parsed output object from the parser.
|
|
189
|
-
- The full trace of ChatMessage entries in the conversation, including any
|
|
190
|
-
corrections and reasoning traces. Callers can decide whether to store this.
|
|
191
|
-
|
|
192
185
|
Raises:
|
|
193
186
|
GenerationValidationFailureError: If the maximum number of retries or
|
|
194
187
|
correction steps are met and the last response failures on
|
|
@@ -197,17 +190,29 @@ class ModelFacade:
|
|
|
197
190
|
output_obj = None
|
|
198
191
|
curr_num_correction_steps = 0
|
|
199
192
|
curr_num_restarts = 0
|
|
193
|
+
curr_generation_attempt = 0
|
|
194
|
+
max_generation_attempts = (max_correction_steps + 1) * (max_conversation_restarts + 1)
|
|
200
195
|
|
|
201
196
|
starting_messages = prompt_to_messages(
|
|
202
197
|
user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context
|
|
203
198
|
)
|
|
204
|
-
messages
|
|
199
|
+
messages = deepcopy(starting_messages)
|
|
205
200
|
|
|
206
201
|
while True:
|
|
202
|
+
curr_generation_attempt += 1
|
|
203
|
+
logger.debug(
|
|
204
|
+
f"Starting generation attempt {curr_generation_attempt} of {max_generation_attempts} attempts."
|
|
205
|
+
)
|
|
206
|
+
|
|
207
207
|
completion_response = self.completion(messages, skip_usage_tracking=skip_usage_tracking, **kwargs)
|
|
208
208
|
response = completion_response.choices[0].message.content or ""
|
|
209
209
|
reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None)
|
|
210
|
-
|
|
210
|
+
|
|
211
|
+
if reasoning_trace:
|
|
212
|
+
## There are generally some extra newlines with how these get parsed.
|
|
213
|
+
response = response.strip()
|
|
214
|
+
reasoning_trace = reasoning_trace.strip()
|
|
215
|
+
|
|
211
216
|
curr_num_correction_steps += 1
|
|
212
217
|
|
|
213
218
|
try:
|
|
@@ -218,23 +223,21 @@ class ModelFacade:
|
|
|
218
223
|
raise GenerationValidationFailureError(
|
|
219
224
|
"Unsuccessful generation attempt. No retries were attempted."
|
|
220
225
|
) from exc
|
|
221
|
-
|
|
222
226
|
if curr_num_correction_steps <= max_correction_steps:
|
|
223
|
-
|
|
224
|
-
messages
|
|
225
|
-
|
|
227
|
+
## Add turns to loop-back errors for correction
|
|
228
|
+
messages += [
|
|
229
|
+
str_to_message(content=response, role="assistant"),
|
|
230
|
+
str_to_message(content=str(get_exception_primary_cause(exc)), role="user"),
|
|
231
|
+
]
|
|
226
232
|
elif curr_num_restarts < max_conversation_restarts:
|
|
227
233
|
curr_num_correction_steps = 0
|
|
228
234
|
curr_num_restarts += 1
|
|
229
235
|
messages = deepcopy(starting_messages)
|
|
230
|
-
|
|
231
236
|
else:
|
|
232
237
|
raise GenerationValidationFailureError(
|
|
233
|
-
f"Unsuccessful generation despite {
|
|
234
|
-
f"and {max_conversation_restarts} conversation restarts."
|
|
238
|
+
f"Unsuccessful generation attempt despite {max_generation_attempts} attempts."
|
|
235
239
|
) from exc
|
|
236
|
-
|
|
237
|
-
return output_obj, messages
|
|
240
|
+
return output_obj, reasoning_trace
|
|
238
241
|
|
|
239
242
|
def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
|
|
240
243
|
provider = self._model_provider_registry.get_provider(model_config.provider)
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def prompt_to_messages(
|
|
10
|
+
*,
|
|
11
|
+
user_prompt: str,
|
|
12
|
+
system_prompt: str | None = None,
|
|
13
|
+
multi_modal_context: list[dict[str, Any]] | None = None,
|
|
14
|
+
) -> list[dict[str, str | list[dict]]]:
|
|
15
|
+
"""Convert a user and system prompt into Messages format.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
user_prompt (str): A user prompt.
|
|
19
|
+
system_prompt (str, optional): An optional system prompt.
|
|
20
|
+
"""
|
|
21
|
+
user_content = user_prompt
|
|
22
|
+
if multi_modal_context and len(multi_modal_context) > 0:
|
|
23
|
+
user_content = []
|
|
24
|
+
for context in multi_modal_context:
|
|
25
|
+
user_content.append(context)
|
|
26
|
+
user_content.append({"type": "text", "text": user_prompt})
|
|
27
|
+
return (
|
|
28
|
+
[
|
|
29
|
+
str_to_message(content=system_prompt, role="system"),
|
|
30
|
+
str_to_message(content=user_content, role="user"),
|
|
31
|
+
]
|
|
32
|
+
if system_prompt
|
|
33
|
+
else [str_to_message(content=user_content, role="user")]
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def str_to_message(content: str | list[dict], role: str = "user") -> dict[str, str | list[dict]]:
|
|
38
|
+
return {"content": content, "role": role}
|
|
@@ -12,15 +12,14 @@ from data_designer.config.column_configs import (
|
|
|
12
12
|
LLMTextColumnConfig,
|
|
13
13
|
)
|
|
14
14
|
from data_designer.config.run_config import RunConfig
|
|
15
|
-
from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
|
|
16
15
|
from data_designer.engine.column_generators.generators.base import GenerationStrategy
|
|
17
16
|
from data_designer.engine.column_generators.generators.llm_completion import (
|
|
17
|
+
REASONING_TRACE_COLUMN_POSTFIX,
|
|
18
18
|
LLMCodeCellGenerator,
|
|
19
19
|
LLMJudgeCellGenerator,
|
|
20
20
|
LLMStructuredCellGenerator,
|
|
21
21
|
LLMTextCellGenerator,
|
|
22
22
|
)
|
|
23
|
-
from data_designer.engine.models.utils import ChatMessage
|
|
24
23
|
|
|
25
24
|
|
|
26
25
|
def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwargs):
|
|
@@ -68,14 +67,14 @@ def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwar
|
|
|
68
67
|
)
|
|
69
68
|
|
|
70
69
|
|
|
71
|
-
def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output"):
|
|
70
|
+
def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output", reasoning=None):
|
|
72
71
|
"""Helper function to setup common generate method mocks."""
|
|
73
72
|
mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
|
|
74
73
|
mock_response_recipe.serialize_output.return_value = {"result": output}
|
|
75
|
-
mock_model.generate.return_value = ({"result": output},
|
|
74
|
+
mock_model.generate.return_value = ({"result": output}, reasoning)
|
|
76
75
|
|
|
77
76
|
|
|
78
|
-
def test_generate_method()
|
|
77
|
+
def test_generate_method():
|
|
79
78
|
generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
|
|
80
79
|
|
|
81
80
|
# Test basic generation
|
|
@@ -88,19 +87,16 @@ def test_generate_method() -> None:
|
|
|
88
87
|
assert mock_model.generate.call_args[1]["max_correction_steps"] == 2
|
|
89
88
|
assert mock_model.generate.call_args[1]["max_conversation_restarts"] == 7
|
|
90
89
|
assert result["test_column"] == {"result": "test_output"}
|
|
91
|
-
assert "test_column" +
|
|
90
|
+
assert "test_column" + REASONING_TRACE_COLUMN_POSTFIX not in result
|
|
92
91
|
|
|
93
|
-
# Test with
|
|
92
|
+
# Test with reasoning trace
|
|
94
93
|
mock_model.reset_mock()
|
|
95
94
|
mock_prompt_renderer.reset_mock()
|
|
96
|
-
|
|
97
|
-
mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
|
|
98
|
-
mock_response_recipe.serialize_output.return_value = {"result": "test_output"}
|
|
99
|
-
mock_model.generate.return_value = ({"result": "test_output"}, [ChatMessage.as_user("x")])
|
|
95
|
+
_setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, reasoning="reasoning_trace")
|
|
100
96
|
result = generator.generate(data)
|
|
101
97
|
|
|
102
98
|
assert result["test_column"] == {"result": "test_output"}
|
|
103
|
-
assert result["test_column" +
|
|
99
|
+
assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "reasoning_trace"
|
|
104
100
|
|
|
105
101
|
# Test multi-modal context is None
|
|
106
102
|
call_args = mock_model.generate.call_args
|
|
@@ -239,7 +235,7 @@ def test_generate_with_errors(error_type, error_message):
|
|
|
239
235
|
|
|
240
236
|
if error_type == "serialization":
|
|
241
237
|
mock_response_recipe.serialize_output.side_effect = Exception(error_message)
|
|
242
|
-
mock_model.generate.return_value = ({"result": "test_output"},
|
|
238
|
+
mock_model.generate.return_value = ({"result": "test_output"}, None)
|
|
243
239
|
elif error_type == "model":
|
|
244
240
|
mock_model.generate.side_effect = Exception(error_message)
|
|
245
241
|
elif error_type == "prompt_render":
|
|
@@ -253,12 +249,13 @@ def test_generate_with_errors(error_type, error_message):
|
|
|
253
249
|
|
|
254
250
|
def test_generate_with_complex_data():
|
|
255
251
|
generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
|
|
256
|
-
_setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output")
|
|
252
|
+
_setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output", "complex_reasoning")
|
|
257
253
|
|
|
258
254
|
data = {"input": "test_input", "nested": {"key": "value"}, "list": [1, 2, 3], "json_string": '{"key": "value"}'}
|
|
259
255
|
result = generator.generate(data)
|
|
260
256
|
|
|
261
257
|
assert result["test_column"] == {"result": "complex_output"}
|
|
258
|
+
assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "complex_reasoning"
|
|
262
259
|
assert result["input"] == "test_input"
|
|
263
260
|
assert result["nested"] == {"key": "value"}
|
|
264
261
|
assert result["list"] == [1, 2, 3]
|
|
@@ -344,7 +341,7 @@ def test_generator_output_type_handling(
|
|
|
344
341
|
mock_response_recipe.serialize_output.return_value = serialized_output
|
|
345
342
|
stub_resource_provider.model_registry.get_model.return_value.generate.return_value = (
|
|
346
343
|
{"result": "raw_output"},
|
|
347
|
-
|
|
344
|
+
None,
|
|
348
345
|
)
|
|
349
346
|
|
|
350
347
|
data = {"input": "test_input"}
|
|
@@ -378,7 +378,6 @@ def test_fan_out_with_threads_uses_early_shutdown_settings_from_resource_provide
|
|
|
378
378
|
mock_generator.config.column_type = "llm_text"
|
|
379
379
|
|
|
380
380
|
builder.batch_manager = Mock()
|
|
381
|
-
builder.batch_manager.num_records_batch = 10
|
|
382
381
|
builder.batch_manager.iter_current_batch.return_value = []
|
|
383
382
|
|
|
384
383
|
builder._fan_out_with_threads(mock_generator, max_workers=4)
|
{data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_facade.py
RENAMED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
|
-
from
|
|
4
|
+
from collections import namedtuple
|
|
5
5
|
from unittest.mock import patch
|
|
6
6
|
|
|
7
7
|
import pytest
|
|
@@ -10,33 +10,14 @@ from litellm.types.utils import Choices, EmbeddingResponse, Message, ModelRespon
|
|
|
10
10
|
from data_designer.engine.models.errors import ModelGenerationValidationFailureError
|
|
11
11
|
from data_designer.engine.models.facade import ModelFacade
|
|
12
12
|
from data_designer.engine.models.parsers.errors import ParserException
|
|
13
|
-
from data_designer.engine.models.utils import ChatMessage
|
|
14
13
|
|
|
14
|
+
MockMessage = namedtuple("MockMessage", ["content"])
|
|
15
|
+
MockChoice = namedtuple("MockChoice", ["message"])
|
|
16
|
+
MockCompletion = namedtuple("MockCompletion", ["choices"])
|
|
15
17
|
|
|
16
|
-
class FakeMessage:
|
|
17
|
-
"""Unified fake message class for mocking LLM completion responses."""
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
content: str | None,
|
|
22
|
-
reasoning_content: str | None = None,
|
|
23
|
-
) -> None:
|
|
24
|
-
self.content = content
|
|
25
|
-
self.reasoning_content = reasoning_content
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
class FakeChoice:
|
|
29
|
-
def __init__(self, message: FakeMessage) -> None:
|
|
30
|
-
self.message = message
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class FakeResponse:
|
|
34
|
-
def __init__(self, message: FakeMessage) -> None:
|
|
35
|
-
self.choices = [FakeChoice(message)]
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def mock_oai_response_object(response_text: str) -> FakeResponse:
|
|
39
|
-
return FakeResponse(FakeMessage(content=response_text))
|
|
19
|
+
def mock_oai_response_object(response_text: str) -> MockCompletion:
|
|
20
|
+
return MockCompletion(choices=[MockChoice(message=MockMessage(content=response_text))])
|
|
40
21
|
|
|
41
22
|
|
|
42
23
|
@pytest.fixture
|
|
@@ -49,8 +30,8 @@ def stub_model_facade(stub_model_configs, stub_secrets_resolver, stub_model_prov
|
|
|
49
30
|
|
|
50
31
|
|
|
51
32
|
@pytest.fixture
|
|
52
|
-
def stub_completion_messages()
|
|
53
|
-
return [
|
|
33
|
+
def stub_completion_messages():
|
|
34
|
+
return [{"role": "user", "content": "test"}]
|
|
54
35
|
|
|
55
36
|
|
|
56
37
|
@pytest.fixture
|
|
@@ -112,29 +93,17 @@ def test_generate(
|
|
|
112
93
|
@pytest.mark.parametrize(
|
|
113
94
|
"system_prompt,expected_messages",
|
|
114
95
|
[
|
|
115
|
-
("", [
|
|
116
|
-
("hello!", [
|
|
96
|
+
("", [{"role": "user", "content": "does not matter"}]),
|
|
97
|
+
("hello!", [{"content": "hello!", "role": "system"}, {"role": "user", "content": "does not matter"}]),
|
|
117
98
|
],
|
|
118
99
|
)
|
|
119
100
|
@patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True)
|
|
120
|
-
def test_generate_with_system_prompt(
|
|
121
|
-
mock_completion
|
|
122
|
-
stub_model_facade: ModelFacade,
|
|
123
|
-
system_prompt: str,
|
|
124
|
-
expected_messages: list[ChatMessage],
|
|
125
|
-
) -> None:
|
|
126
|
-
# Capture messages at call time since they get mutated after the call
|
|
127
|
-
captured_messages = []
|
|
128
|
-
|
|
129
|
-
def capture_and_return(*args: Any, **kwargs: Any) -> ModelResponse:
|
|
130
|
-
captured_messages.append(list(args[1])) # Copy the messages list
|
|
131
|
-
return ModelResponse(choices=Choices(message=Message(content="Hello!")))
|
|
132
|
-
|
|
133
|
-
mock_completion.side_effect = capture_and_return
|
|
101
|
+
def test_generate_with_system_prompt(mock_completion, stub_model_facade, system_prompt, expected_messages):
|
|
102
|
+
mock_completion.return_value = ModelResponse(choices=Choices(message=Message(content="Hello!")))
|
|
134
103
|
|
|
135
104
|
stub_model_facade.generate(prompt="does not matter", system_prompt=system_prompt, parser=lambda x: x)
|
|
136
105
|
assert mock_completion.call_count == 1
|
|
137
|
-
assert
|
|
106
|
+
assert mock_completion.call_args[0][1] == expected_messages
|
|
138
107
|
|
|
139
108
|
|
|
140
109
|
def test_model_alias_property(stub_model_facade, stub_model_configs):
|
|
@@ -182,31 +151,26 @@ def test_consolidate_kwargs(stub_model_configs, stub_model_facade):
|
|
|
182
151
|
)
|
|
183
152
|
@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
|
|
184
153
|
def test_completion_success(
|
|
185
|
-
mock_router_completion
|
|
186
|
-
stub_completion_messages
|
|
187
|
-
stub_model_configs
|
|
188
|
-
stub_model_facade
|
|
189
|
-
stub_expected_completion_response
|
|
190
|
-
skip_usage_tracking
|
|
191
|
-
)
|
|
154
|
+
mock_router_completion,
|
|
155
|
+
stub_completion_messages,
|
|
156
|
+
stub_model_configs,
|
|
157
|
+
stub_model_facade,
|
|
158
|
+
stub_expected_completion_response,
|
|
159
|
+
skip_usage_tracking,
|
|
160
|
+
):
|
|
192
161
|
mock_router_completion.side_effect = lambda self, model, messages, **kwargs: stub_expected_completion_response
|
|
193
162
|
result = stub_model_facade.completion(stub_completion_messages, skip_usage_tracking=skip_usage_tracking)
|
|
194
|
-
expected_messages = [message.to_dict() for message in stub_completion_messages]
|
|
195
163
|
assert result == stub_expected_completion_response
|
|
196
164
|
assert mock_router_completion.call_count == 1
|
|
197
165
|
assert mock_router_completion.call_args[1] == {
|
|
198
166
|
"model": "stub-model-text",
|
|
199
|
-
"messages":
|
|
167
|
+
"messages": stub_completion_messages,
|
|
200
168
|
**stub_model_configs[0].inference_parameters.generate_kwargs,
|
|
201
169
|
}
|
|
202
170
|
|
|
203
171
|
|
|
204
172
|
@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
|
|
205
|
-
def test_completion_with_exception(
|
|
206
|
-
mock_router_completion: Any,
|
|
207
|
-
stub_completion_messages: list[ChatMessage],
|
|
208
|
-
stub_model_facade: ModelFacade,
|
|
209
|
-
) -> None:
|
|
173
|
+
def test_completion_with_exception(mock_router_completion, stub_completion_messages, stub_model_facade):
|
|
210
174
|
mock_router_completion.side_effect = Exception("Router error")
|
|
211
175
|
|
|
212
176
|
with pytest.raises(Exception, match="Router error"):
|
|
@@ -215,15 +179,15 @@ def test_completion_with_exception(
|
|
|
215
179
|
|
|
216
180
|
@patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
|
|
217
181
|
def test_completion_with_kwargs(
|
|
218
|
-
mock_router_completion
|
|
219
|
-
stub_completion_messages
|
|
220
|
-
stub_model_configs
|
|
221
|
-
stub_model_facade
|
|
222
|
-
stub_expected_completion_response
|
|
223
|
-
)
|
|
182
|
+
mock_router_completion,
|
|
183
|
+
stub_completion_messages,
|
|
184
|
+
stub_model_configs,
|
|
185
|
+
stub_model_facade,
|
|
186
|
+
stub_expected_completion_response,
|
|
187
|
+
):
|
|
224
188
|
captured_kwargs = {}
|
|
225
189
|
|
|
226
|
-
def mock_completion(self
|
|
190
|
+
def mock_completion(self, model, messages, **kwargs):
|
|
227
191
|
captured_kwargs.update(kwargs)
|
|
228
192
|
return stub_expected_completion_response
|
|
229
193
|
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from data_designer.engine.models.utils import prompt_to_messages, str_to_message
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_str_to_message():
|
|
8
|
+
assert str_to_message("hello") == {"content": "hello", "role": "user"}
|
|
9
|
+
assert str_to_message("hello", role="system") == {"content": "hello", "role": "system"}
|
|
10
|
+
assert str_to_message([{"type": "text", "text": "hello"}]) == {
|
|
11
|
+
"content": [{"type": "text", "text": "hello"}],
|
|
12
|
+
"role": "user",
|
|
13
|
+
}
|
|
14
|
+
assert str_to_message([{"type": "text", "text": "hello"}], role="system") == {
|
|
15
|
+
"content": [{"type": "text", "text": "hello"}],
|
|
16
|
+
"role": "system",
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def test_prompt_to_messages():
|
|
21
|
+
stub_system_prompt = "some system prompt"
|
|
22
|
+
mult_modal_context = {"type": "image_url", "image_url": {"url": "http://example.com/image.png"}}
|
|
23
|
+
assert prompt_to_messages(user_prompt="hello") == [{"content": "hello", "role": "user"}]
|
|
24
|
+
assert prompt_to_messages(user_prompt="hello", system_prompt=stub_system_prompt) == [
|
|
25
|
+
{"content": stub_system_prompt, "role": "system"},
|
|
26
|
+
{"content": "hello", "role": "user"},
|
|
27
|
+
]
|
|
28
|
+
assert prompt_to_messages(user_prompt="hello", multi_modal_context=[mult_modal_context]) == [
|
|
29
|
+
{"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"}
|
|
30
|
+
]
|
|
31
|
+
assert prompt_to_messages(
|
|
32
|
+
user_prompt="hello", system_prompt=stub_system_prompt, multi_modal_context=[mult_modal_context]
|
|
33
|
+
) == [
|
|
34
|
+
{"content": stub_system_prompt, "role": "system"},
|
|
35
|
+
{"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"},
|
|
36
|
+
]
|