data-designer-engine 0.4.0rc2__tar.gz → 0.4.0rc3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/PKG-INFO +1 -1
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/_version.py +2 -2
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/llm_completion.py +7 -4
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/column_wise_builder.py +24 -5
- data_designer_engine-0.4.0rc3/src/data_designer/engine/dataset_builders/utils/progress_tracker.py +122 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/facade.py +23 -26
- data_designer_engine-0.4.0rc3/src/data_designer/engine/models/utils.py +101 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_llm_completion_generators.py +15 -12
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_column_wise_builder.py +1 -0
- data_designer_engine-0.4.0rc3/tests/engine/dataset_builders/utils/test_progress_tracker.py +290 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_facade.py +65 -29
- data_designer_engine-0.4.0rc3/tests/engine/models/test_model_utils.py +23 -0
- data_designer_engine-0.4.0rc2/src/data_designer/engine/models/utils.py +0 -38
- data_designer_engine-0.4.0rc2/tests/engine/models/test_model_utils.py +0 -36
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/.gitignore +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/README.md +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/pyproject.toml +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_statistics.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/compiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/configurable_task.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/model_provider.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/factory.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/litellm_overrides.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/parser.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/types.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/recipes/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/telemetry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/usage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/ast.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/environment.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/record.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_storage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/resource_provider.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/seed_reader.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/column.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/generator.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/schema.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/secret_resolver.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/stubs.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validation.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/local_callable.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/python.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/remote.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/sql.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/column_profilers/test_base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_dataset_profiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_embedding.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_expression.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_samplers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_validation.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/test_registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_parser.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_parsers_types.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_postprocessors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/recipes/test_recipe_base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/recipes/test_response_recipes.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/stub_secrets.json +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_litellm_overrides.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_model_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_model_registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_usage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_ast.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_environment.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_exceptions.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_record.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_types.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_validators.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_drop_columns.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_schema_transform.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/processing/test_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/registry/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/registry/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_base.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_data_designer_registry.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/__init__.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_storage.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_resource_provider.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_seed_reader.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/conftest.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_person.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_column.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_constraints.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_generator.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_people_gen.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_schema.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_utils.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_compiler.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_configurable_task.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_dataset_metadata.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_engine_errors.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_model_provider.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_secret_resolver.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/test_validation.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_local_callable.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_python.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_remote.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_sql.py +0 -0
- {data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/tests/test_plugin_manager.py +0 -0
{data_designer_engine-0.4.0rc2 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/_version.py
RENAMED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.4.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 4, 0, '
|
|
31
|
+
__version__ = version = '0.4.0rc3'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -12,7 +12,7 @@ from data_designer.config.column_configs import (
|
|
|
12
12
|
LLMStructuredColumnConfig,
|
|
13
13
|
LLMTextColumnConfig,
|
|
14
14
|
)
|
|
15
|
-
from data_designer.config.utils.constants import
|
|
15
|
+
from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
|
|
16
16
|
from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
|
|
17
17
|
from data_designer.engine.column_generators.utils.prompt_renderer import (
|
|
18
18
|
PromptType,
|
|
@@ -66,7 +66,7 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
66
66
|
for context in self.config.multi_modal_context:
|
|
67
67
|
multi_modal_context.extend(context.get_contexts(deserialized_record))
|
|
68
68
|
|
|
69
|
-
response,
|
|
69
|
+
response, trace = self.model.generate(
|
|
70
70
|
prompt=self.prompt_renderer.render(
|
|
71
71
|
record=deserialized_record,
|
|
72
72
|
prompt_template=self.config.prompt,
|
|
@@ -87,8 +87,11 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
|
|
|
87
87
|
serialized_output = self.response_recipe.serialize_output(response)
|
|
88
88
|
data[self.config.name] = self._process_serialized_output(serialized_output)
|
|
89
89
|
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
should_save_trace = (
|
|
91
|
+
self.config.with_trace or self.resource_provider.run_config.debug_override_save_all_column_traces
|
|
92
|
+
)
|
|
93
|
+
if should_save_trace:
|
|
94
|
+
data[self.config.name + TRACE_COLUMN_POSTFIX] = [message.to_dict() for message in trace]
|
|
92
95
|
|
|
93
96
|
return data
|
|
94
97
|
|
|
@@ -34,6 +34,7 @@ from data_designer.engine.dataset_builders.multi_column_configs import MultiColu
|
|
|
34
34
|
from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
|
|
35
35
|
from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
|
|
36
36
|
from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
|
|
37
|
+
from data_designer.engine.dataset_builders.utils.progress_tracker import ProgressTracker
|
|
37
38
|
from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
|
|
38
39
|
from data_designer.engine.processing.processors.base import Processor
|
|
39
40
|
from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
|
|
@@ -221,16 +222,18 @@ class ColumnWiseDatasetBuilder:
|
|
|
221
222
|
"generator so concurrency through threads is not supported."
|
|
222
223
|
)
|
|
223
224
|
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
f"
|
|
225
|
+
progress_tracker = ProgressTracker(
|
|
226
|
+
total_records=self.batch_manager.num_records_batch,
|
|
227
|
+
label=f"{generator.config.column_type} column '{generator.config.name}'",
|
|
227
228
|
)
|
|
229
|
+
progress_tracker.log_start(max_workers)
|
|
230
|
+
|
|
228
231
|
settings = self._resource_provider.run_config
|
|
229
232
|
with ConcurrentThreadExecutor(
|
|
230
233
|
max_workers=max_workers,
|
|
231
234
|
column_name=generator.config.name,
|
|
232
|
-
result_callback=self.
|
|
233
|
-
error_callback=self.
|
|
235
|
+
result_callback=self._make_result_callback(progress_tracker),
|
|
236
|
+
error_callback=self._make_error_callback(progress_tracker),
|
|
234
237
|
shutdown_error_rate=settings.shutdown_error_rate,
|
|
235
238
|
shutdown_error_window=settings.shutdown_error_window,
|
|
236
239
|
disable_early_shutdown=settings.disable_early_shutdown,
|
|
@@ -238,10 +241,26 @@ class ColumnWiseDatasetBuilder:
|
|
|
238
241
|
for i, record in self.batch_manager.iter_current_batch():
|
|
239
242
|
executor.submit(lambda record: generator.generate(record), record, context={"index": i})
|
|
240
243
|
|
|
244
|
+
progress_tracker.log_final()
|
|
245
|
+
|
|
241
246
|
if len(self._records_to_drop) > 0:
|
|
242
247
|
self.batch_manager.drop_records(self._records_to_drop)
|
|
243
248
|
self._records_to_drop.clear()
|
|
244
249
|
|
|
250
|
+
def _make_result_callback(self, progress_tracker: ProgressTracker) -> Callable[[dict], None]:
|
|
251
|
+
def callback(result: dict, *, context: dict | None = None) -> None:
|
|
252
|
+
self._worker_result_callback(result, context=context)
|
|
253
|
+
progress_tracker.record_success()
|
|
254
|
+
|
|
255
|
+
return callback
|
|
256
|
+
|
|
257
|
+
def _make_error_callback(self, progress_tracker: ProgressTracker) -> Callable[[Exception], None]:
|
|
258
|
+
def callback(exc: Exception, *, context: dict | None = None) -> None:
|
|
259
|
+
self._worker_error_callback(exc, context=context)
|
|
260
|
+
progress_tracker.record_failure()
|
|
261
|
+
|
|
262
|
+
return callback
|
|
263
|
+
|
|
245
264
|
def _write_processed_batch(self, dataframe: pd.DataFrame) -> None:
|
|
246
265
|
self.batch_manager.update_records(dataframe.to_dict(orient="records"))
|
|
247
266
|
self.batch_manager.write()
|
data_designer_engine-0.4.0rc3/src/data_designer/engine/dataset_builders/utils/progress_tracker.py
ADDED
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import logging
|
|
7
|
+
import time
|
|
8
|
+
from threading import Lock
|
|
9
|
+
|
|
10
|
+
from data_designer.logging import RandomEmoji
|
|
11
|
+
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ProgressTracker:
|
|
16
|
+
"""
|
|
17
|
+
Thread-safe progress tracker for monitoring concurrent task completion.
|
|
18
|
+
|
|
19
|
+
Tracks completed, successful, and failed task counts and logs progress
|
|
20
|
+
at configurable intervals. Designed for use with ConcurrentThreadExecutor
|
|
21
|
+
to provide visibility into long-running batch operations.
|
|
22
|
+
|
|
23
|
+
Example usage:
|
|
24
|
+
tracker = ProgressTracker(total_records=100, label="LLM_TEXT column 'response'")
|
|
25
|
+
tracker.log_start(max_workers=8)
|
|
26
|
+
|
|
27
|
+
# In callbacks from ConcurrentThreadExecutor:
|
|
28
|
+
tracker.record_success() # or tracker.record_failure()
|
|
29
|
+
|
|
30
|
+
# After executor completes:
|
|
31
|
+
tracker.log_final()
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self, total_records: int, label: str, log_interval_percent: int = 10):
|
|
35
|
+
"""
|
|
36
|
+
Initialize the progress tracker.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
total_records: Total number of records to process.
|
|
40
|
+
label: Human-readable label for log messages (e.g., "LLM_TEXT column 'response'").
|
|
41
|
+
log_interval_percent: How often to log progress as a percentage (default 10%).
|
|
42
|
+
"""
|
|
43
|
+
self.total_records = total_records
|
|
44
|
+
self.label = label
|
|
45
|
+
|
|
46
|
+
self.completed = 0
|
|
47
|
+
self.success = 0
|
|
48
|
+
self.failed = 0
|
|
49
|
+
|
|
50
|
+
interval_fraction = max(1, log_interval_percent) / 100.0
|
|
51
|
+
self.log_interval = max(1, int(total_records * interval_fraction)) if total_records > 0 else 1
|
|
52
|
+
self.next_log_at = self.log_interval
|
|
53
|
+
|
|
54
|
+
self.start_time = time.perf_counter()
|
|
55
|
+
self.lock = Lock()
|
|
56
|
+
self._random_emoji = RandomEmoji()
|
|
57
|
+
|
|
58
|
+
def log_start(self, max_workers: int) -> None:
|
|
59
|
+
"""Log the start of processing with worker count and interval information."""
|
|
60
|
+
logger.info(
|
|
61
|
+
"🐙 Processing %s with %d concurrent workers",
|
|
62
|
+
self.label,
|
|
63
|
+
max_workers,
|
|
64
|
+
)
|
|
65
|
+
logger.info(
|
|
66
|
+
"🧭 %s will report progress every %d record(s).",
|
|
67
|
+
self.label,
|
|
68
|
+
self.log_interval,
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
def record_success(self) -> None:
|
|
72
|
+
"""Record a successful task completion and log progress if at interval."""
|
|
73
|
+
self._record_completion(success=True)
|
|
74
|
+
|
|
75
|
+
def record_failure(self) -> None:
|
|
76
|
+
"""Record a failed task completion and log progress if at interval."""
|
|
77
|
+
self._record_completion(success=False)
|
|
78
|
+
|
|
79
|
+
def log_final(self) -> None:
|
|
80
|
+
"""Log final progress summary."""
|
|
81
|
+
with self.lock:
|
|
82
|
+
if self.completed > 0:
|
|
83
|
+
self._log_progress_unlocked()
|
|
84
|
+
|
|
85
|
+
def _record_completion(self, *, success: bool) -> None:
|
|
86
|
+
should_log = False
|
|
87
|
+
with self.lock:
|
|
88
|
+
self.completed += 1
|
|
89
|
+
if success:
|
|
90
|
+
self.success += 1
|
|
91
|
+
else:
|
|
92
|
+
self.failed += 1
|
|
93
|
+
|
|
94
|
+
if self.completed >= self.next_log_at and self.completed < self.total_records:
|
|
95
|
+
should_log = True
|
|
96
|
+
while self.next_log_at <= self.completed:
|
|
97
|
+
self.next_log_at += self.log_interval
|
|
98
|
+
|
|
99
|
+
if should_log:
|
|
100
|
+
with self.lock:
|
|
101
|
+
self._log_progress_unlocked()
|
|
102
|
+
|
|
103
|
+
def _log_progress_unlocked(self) -> None:
|
|
104
|
+
"""Log current progress. Must be called while holding the lock."""
|
|
105
|
+
elapsed = time.perf_counter() - self.start_time
|
|
106
|
+
rate = self.completed / elapsed if elapsed > 0 else 0.0
|
|
107
|
+
remaining = max(0, self.total_records - self.completed)
|
|
108
|
+
eta = f"{(remaining / rate):.1f}s" if rate > 0 else "unknown"
|
|
109
|
+
percent = (self.completed / self.total_records) * 100 if self.total_records else 100.0
|
|
110
|
+
|
|
111
|
+
logger.info(
|
|
112
|
+
" |-- %s %s progress: %d/%d (%.0f%%) complete, %d ok, %d failed, %.2f rec/s, eta %s",
|
|
113
|
+
self._random_emoji.progress(percent),
|
|
114
|
+
self.label,
|
|
115
|
+
self.completed,
|
|
116
|
+
self.total_records,
|
|
117
|
+
percent,
|
|
118
|
+
self.success,
|
|
119
|
+
self.failed,
|
|
120
|
+
rate,
|
|
121
|
+
eta,
|
|
122
|
+
)
|
|
@@ -18,7 +18,7 @@ from data_designer.engine.models.errors import (
|
|
|
18
18
|
from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs
|
|
19
19
|
from data_designer.engine.models.parsers.errors import ParserException
|
|
20
20
|
from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
|
|
21
|
-
from data_designer.engine.models.utils import
|
|
21
|
+
from data_designer.engine.models.utils import ChatMessage, prompt_to_messages
|
|
22
22
|
from data_designer.engine.secret_resolver import SecretResolver
|
|
23
23
|
from data_designer.lazy_heavy_imports import litellm
|
|
24
24
|
|
|
@@ -67,16 +67,17 @@ class ModelFacade:
|
|
|
67
67
|
return self._usage_stats
|
|
68
68
|
|
|
69
69
|
def completion(
|
|
70
|
-
self, messages: list[
|
|
70
|
+
self, messages: list[ChatMessage], skip_usage_tracking: bool = False, **kwargs
|
|
71
71
|
) -> litellm.ModelResponse:
|
|
72
|
+
message_payloads = [message.to_dict() for message in messages]
|
|
72
73
|
logger.debug(
|
|
73
74
|
f"Prompting model {self.model_name!r}...",
|
|
74
|
-
extra={"model": self.model_name, "messages":
|
|
75
|
+
extra={"model": self.model_name, "messages": message_payloads},
|
|
75
76
|
)
|
|
76
77
|
response = None
|
|
77
78
|
kwargs = self.consolidate_kwargs(**kwargs)
|
|
78
79
|
try:
|
|
79
|
-
response = self._router.completion(model=self.model_name, messages=
|
|
80
|
+
response = self._router.completion(model=self.model_name, messages=message_payloads, **kwargs)
|
|
80
81
|
logger.debug(
|
|
81
82
|
f"Received completion from model {self.model_name!r}",
|
|
82
83
|
extra={
|
|
@@ -149,7 +150,7 @@ class ModelFacade:
|
|
|
149
150
|
skip_usage_tracking: bool = False,
|
|
150
151
|
purpose: str | None = None,
|
|
151
152
|
**kwargs,
|
|
152
|
-
) -> tuple[Any,
|
|
153
|
+
) -> tuple[Any, list[ChatMessage]]:
|
|
153
154
|
"""Generate a parsed output with correction steps.
|
|
154
155
|
|
|
155
156
|
This generation call will attempt to generate an output which is
|
|
@@ -182,6 +183,12 @@ class ModelFacade:
|
|
|
182
183
|
It is expected to be used by the @catch_llm_exceptions decorator.
|
|
183
184
|
**kwargs: Additional arguments to pass to the model.
|
|
184
185
|
|
|
186
|
+
Returns:
|
|
187
|
+
A tuple containing:
|
|
188
|
+
- The parsed output object from the parser.
|
|
189
|
+
- The full trace of ChatMessage entries in the conversation, including any
|
|
190
|
+
corrections and reasoning traces. Callers can decide whether to store this.
|
|
191
|
+
|
|
185
192
|
Raises:
|
|
186
193
|
GenerationValidationFailureError: If the maximum number of retries or
|
|
187
194
|
correction steps are met and the last response failures on
|
|
@@ -190,29 +197,17 @@ class ModelFacade:
|
|
|
190
197
|
output_obj = None
|
|
191
198
|
curr_num_correction_steps = 0
|
|
192
199
|
curr_num_restarts = 0
|
|
193
|
-
curr_generation_attempt = 0
|
|
194
|
-
max_generation_attempts = (max_correction_steps + 1) * (max_conversation_restarts + 1)
|
|
195
200
|
|
|
196
201
|
starting_messages = prompt_to_messages(
|
|
197
202
|
user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context
|
|
198
203
|
)
|
|
199
|
-
messages = deepcopy(starting_messages)
|
|
204
|
+
messages: list[ChatMessage] = deepcopy(starting_messages)
|
|
200
205
|
|
|
201
206
|
while True:
|
|
202
|
-
curr_generation_attempt += 1
|
|
203
|
-
logger.debug(
|
|
204
|
-
f"Starting generation attempt {curr_generation_attempt} of {max_generation_attempts} attempts."
|
|
205
|
-
)
|
|
206
|
-
|
|
207
207
|
completion_response = self.completion(messages, skip_usage_tracking=skip_usage_tracking, **kwargs)
|
|
208
208
|
response = completion_response.choices[0].message.content or ""
|
|
209
209
|
reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None)
|
|
210
|
-
|
|
211
|
-
if reasoning_trace:
|
|
212
|
-
## There are generally some extra newlines with how these get parsed.
|
|
213
|
-
response = response.strip()
|
|
214
|
-
reasoning_trace = reasoning_trace.strip()
|
|
215
|
-
|
|
210
|
+
messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None))
|
|
216
211
|
curr_num_correction_steps += 1
|
|
217
212
|
|
|
218
213
|
try:
|
|
@@ -223,21 +218,23 @@ class ModelFacade:
|
|
|
223
218
|
raise GenerationValidationFailureError(
|
|
224
219
|
"Unsuccessful generation attempt. No retries were attempted."
|
|
225
220
|
) from exc
|
|
221
|
+
|
|
226
222
|
if curr_num_correction_steps <= max_correction_steps:
|
|
227
|
-
|
|
228
|
-
messages
|
|
229
|
-
|
|
230
|
-
str_to_message(content=str(get_exception_primary_cause(exc)), role="user"),
|
|
231
|
-
]
|
|
223
|
+
# Add user message with error for correction
|
|
224
|
+
messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc))))
|
|
225
|
+
|
|
232
226
|
elif curr_num_restarts < max_conversation_restarts:
|
|
233
227
|
curr_num_correction_steps = 0
|
|
234
228
|
curr_num_restarts += 1
|
|
235
229
|
messages = deepcopy(starting_messages)
|
|
230
|
+
|
|
236
231
|
else:
|
|
237
232
|
raise GenerationValidationFailureError(
|
|
238
|
-
f"Unsuccessful generation
|
|
233
|
+
f"Unsuccessful generation despite {max_correction_steps} correction steps "
|
|
234
|
+
f"and {max_conversation_restarts} conversation restarts."
|
|
239
235
|
) from exc
|
|
240
|
-
|
|
236
|
+
|
|
237
|
+
return output_obj, messages
|
|
241
238
|
|
|
242
239
|
def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
|
|
243
240
|
provider = self._model_provider_registry.get_provider(model_config.provider)
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from typing import Any, Literal
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class ChatMessage:
|
|
12
|
+
"""A chat message in an LLM conversation.
|
|
13
|
+
|
|
14
|
+
This dataclass represents messages exchanged in a conversation with an LLM,
|
|
15
|
+
supporting various message types including user prompts, assistant responses,
|
|
16
|
+
system instructions, and tool interactions.
|
|
17
|
+
|
|
18
|
+
Attributes:
|
|
19
|
+
role: The role of the message sender. One of 'user', 'assistant', 'system', or 'tool'.
|
|
20
|
+
content: The message content. Can be a string or a list of content blocks
|
|
21
|
+
for multimodal messages (e.g., text + images).
|
|
22
|
+
reasoning_content: Optional reasoning/thinking content from the assistant,
|
|
23
|
+
typically from extended thinking or chain-of-thought models.
|
|
24
|
+
tool_calls: Optional list of tool calls requested by the assistant.
|
|
25
|
+
Each tool call contains 'id', 'type', and 'function' keys.
|
|
26
|
+
tool_call_id: Optional ID linking a tool response to its corresponding
|
|
27
|
+
tool call. Required for messages with role='tool'.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
role: Literal["user", "assistant", "system", "tool"]
|
|
31
|
+
content: str | list[dict[str, Any]] = ""
|
|
32
|
+
reasoning_content: str | None = None
|
|
33
|
+
tool_calls: list[dict[str, Any]] = field(default_factory=list)
|
|
34
|
+
tool_call_id: str | None = None
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> dict[str, Any]:
|
|
37
|
+
"""Convert the message to a dictionary format for API calls.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
A dictionary containing the message fields. Only includes non-empty
|
|
41
|
+
optional fields to keep the output clean.
|
|
42
|
+
"""
|
|
43
|
+
result: dict[str, Any] = {"role": self.role, "content": self.content}
|
|
44
|
+
if self.reasoning_content:
|
|
45
|
+
result["reasoning_content"] = self.reasoning_content
|
|
46
|
+
if self.tool_calls:
|
|
47
|
+
result["tool_calls"] = self.tool_calls
|
|
48
|
+
if self.tool_call_id:
|
|
49
|
+
result["tool_call_id"] = self.tool_call_id
|
|
50
|
+
return result
|
|
51
|
+
|
|
52
|
+
@classmethod
|
|
53
|
+
def as_user(cls, content: str | list[dict[str, Any]]) -> ChatMessage:
|
|
54
|
+
"""Create a user message."""
|
|
55
|
+
return cls(role="user", content=content)
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def as_assistant(
|
|
59
|
+
cls,
|
|
60
|
+
content: str = "",
|
|
61
|
+
reasoning_content: str | None = None,
|
|
62
|
+
tool_calls: list[dict[str, Any]] | None = None,
|
|
63
|
+
) -> ChatMessage:
|
|
64
|
+
"""Create an assistant message."""
|
|
65
|
+
return cls(
|
|
66
|
+
role="assistant",
|
|
67
|
+
content=content,
|
|
68
|
+
reasoning_content=reasoning_content,
|
|
69
|
+
tool_calls=tool_calls or [],
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
@classmethod
|
|
73
|
+
def as_system(cls, content: str) -> ChatMessage:
|
|
74
|
+
"""Create a system message."""
|
|
75
|
+
return cls(role="system", content=content)
|
|
76
|
+
|
|
77
|
+
@classmethod
|
|
78
|
+
def as_tool(cls, content: str, tool_call_id: str) -> ChatMessage:
|
|
79
|
+
"""Create a tool response message."""
|
|
80
|
+
return cls(role="tool", content=content, tool_call_id=tool_call_id)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def prompt_to_messages(
|
|
84
|
+
*,
|
|
85
|
+
user_prompt: str,
|
|
86
|
+
system_prompt: str | None = None,
|
|
87
|
+
multi_modal_context: list[dict[str, Any]] | None = None,
|
|
88
|
+
) -> list[ChatMessage]:
|
|
89
|
+
"""Convert a user and system prompt into ChatMessage list.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
user_prompt (str): A user prompt.
|
|
93
|
+
system_prompt (str, optional): An optional system prompt.
|
|
94
|
+
"""
|
|
95
|
+
user_content: str | list[dict[str, Any]] = user_prompt
|
|
96
|
+
if multi_modal_context:
|
|
97
|
+
user_content = [*multi_modal_context, {"type": "text", "text": user_prompt}]
|
|
98
|
+
|
|
99
|
+
if system_prompt:
|
|
100
|
+
return [ChatMessage.as_system(system_prompt), ChatMessage.as_user(user_content)]
|
|
101
|
+
return [ChatMessage.as_user(user_content)]
|
|
@@ -12,14 +12,15 @@ from data_designer.config.column_configs import (
|
|
|
12
12
|
LLMTextColumnConfig,
|
|
13
13
|
)
|
|
14
14
|
from data_designer.config.run_config import RunConfig
|
|
15
|
+
from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
|
|
15
16
|
from data_designer.engine.column_generators.generators.base import GenerationStrategy
|
|
16
17
|
from data_designer.engine.column_generators.generators.llm_completion import (
|
|
17
|
-
REASONING_TRACE_COLUMN_POSTFIX,
|
|
18
18
|
LLMCodeCellGenerator,
|
|
19
19
|
LLMJudgeCellGenerator,
|
|
20
20
|
LLMStructuredCellGenerator,
|
|
21
21
|
LLMTextCellGenerator,
|
|
22
22
|
)
|
|
23
|
+
from data_designer.engine.models.utils import ChatMessage
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwargs):
|
|
@@ -67,14 +68,14 @@ def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwar
|
|
|
67
68
|
)
|
|
68
69
|
|
|
69
70
|
|
|
70
|
-
def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output"
|
|
71
|
+
def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output"):
|
|
71
72
|
"""Helper function to setup common generate method mocks."""
|
|
72
73
|
mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
|
|
73
74
|
mock_response_recipe.serialize_output.return_value = {"result": output}
|
|
74
|
-
mock_model.generate.return_value = ({"result": output},
|
|
75
|
+
mock_model.generate.return_value = ({"result": output}, [])
|
|
75
76
|
|
|
76
77
|
|
|
77
|
-
def test_generate_method():
|
|
78
|
+
def test_generate_method() -> None:
|
|
78
79
|
generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
|
|
79
80
|
|
|
80
81
|
# Test basic generation
|
|
@@ -87,16 +88,19 @@ def test_generate_method():
|
|
|
87
88
|
assert mock_model.generate.call_args[1]["max_correction_steps"] == 2
|
|
88
89
|
assert mock_model.generate.call_args[1]["max_conversation_restarts"] == 7
|
|
89
90
|
assert result["test_column"] == {"result": "test_output"}
|
|
90
|
-
assert "test_column" +
|
|
91
|
+
assert "test_column" + TRACE_COLUMN_POSTFIX not in result
|
|
91
92
|
|
|
92
|
-
# Test with
|
|
93
|
+
# Test with full trace enabled
|
|
93
94
|
mock_model.reset_mock()
|
|
94
95
|
mock_prompt_renderer.reset_mock()
|
|
95
|
-
|
|
96
|
+
generator.resource_provider.run_config.debug_override_save_all_column_traces = True
|
|
97
|
+
mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
|
|
98
|
+
mock_response_recipe.serialize_output.return_value = {"result": "test_output"}
|
|
99
|
+
mock_model.generate.return_value = ({"result": "test_output"}, [ChatMessage.as_user("x")])
|
|
96
100
|
result = generator.generate(data)
|
|
97
101
|
|
|
98
102
|
assert result["test_column"] == {"result": "test_output"}
|
|
99
|
-
assert result["test_column" +
|
|
103
|
+
assert result["test_column" + TRACE_COLUMN_POSTFIX] == [{"role": "user", "content": "x"}]
|
|
100
104
|
|
|
101
105
|
# Test multi-modal context is None
|
|
102
106
|
call_args = mock_model.generate.call_args
|
|
@@ -235,7 +239,7 @@ def test_generate_with_errors(error_type, error_message):
|
|
|
235
239
|
|
|
236
240
|
if error_type == "serialization":
|
|
237
241
|
mock_response_recipe.serialize_output.side_effect = Exception(error_message)
|
|
238
|
-
mock_model.generate.return_value = ({"result": "test_output"},
|
|
242
|
+
mock_model.generate.return_value = ({"result": "test_output"}, [])
|
|
239
243
|
elif error_type == "model":
|
|
240
244
|
mock_model.generate.side_effect = Exception(error_message)
|
|
241
245
|
elif error_type == "prompt_render":
|
|
@@ -249,13 +253,12 @@ def test_generate_with_errors(error_type, error_message):
|
|
|
249
253
|
|
|
250
254
|
def test_generate_with_complex_data():
|
|
251
255
|
generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
|
|
252
|
-
_setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output"
|
|
256
|
+
_setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output")
|
|
253
257
|
|
|
254
258
|
data = {"input": "test_input", "nested": {"key": "value"}, "list": [1, 2, 3], "json_string": '{"key": "value"}'}
|
|
255
259
|
result = generator.generate(data)
|
|
256
260
|
|
|
257
261
|
assert result["test_column"] == {"result": "complex_output"}
|
|
258
|
-
assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "complex_reasoning"
|
|
259
262
|
assert result["input"] == "test_input"
|
|
260
263
|
assert result["nested"] == {"key": "value"}
|
|
261
264
|
assert result["list"] == [1, 2, 3]
|
|
@@ -341,7 +344,7 @@ def test_generator_output_type_handling(
|
|
|
341
344
|
mock_response_recipe.serialize_output.return_value = serialized_output
|
|
342
345
|
stub_resource_provider.model_registry.get_model.return_value.generate.return_value = (
|
|
343
346
|
{"result": "raw_output"},
|
|
344
|
-
|
|
347
|
+
[],
|
|
345
348
|
)
|
|
346
349
|
|
|
347
350
|
data = {"input": "test_input"}
|
|
@@ -378,6 +378,7 @@ def test_fan_out_with_threads_uses_early_shutdown_settings_from_resource_provide
|
|
|
378
378
|
mock_generator.config.column_type = "llm_text"
|
|
379
379
|
|
|
380
380
|
builder.batch_manager = Mock()
|
|
381
|
+
builder.batch_manager.num_records_batch = 10
|
|
381
382
|
builder.batch_manager.iter_current_batch.return_value = []
|
|
382
383
|
|
|
383
384
|
builder._fan_out_with_threads(mock_generator, max_workers=4)
|