data-designer-engine 0.4.0__tar.gz → 0.4.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/PKG-INFO +1 -1
  2. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/_version.py +2 -2
  3. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/llm_completion.py +4 -7
  4. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +5 -24
  5. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/facade.py +26 -23
  6. data_designer_engine-0.4.0rc2/src/data_designer/engine/models/utils.py +38 -0
  7. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_llm_completion_generators.py +12 -15
  8. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -1
  9. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_facade.py +29 -65
  10. data_designer_engine-0.4.0rc2/tests/engine/models/test_model_utils.py +36 -0
  11. data_designer_engine-0.4.0/src/data_designer/engine/dataset_builders/utils/progress_tracker.py +0 -122
  12. data_designer_engine-0.4.0/src/data_designer/engine/models/utils.py +0 -101
  13. data_designer_engine-0.4.0/tests/engine/dataset_builders/utils/test_progress_tracker.py +0 -290
  14. data_designer_engine-0.4.0/tests/engine/models/test_model_utils.py +0 -23
  15. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/.gitignore +0 -0
  16. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/README.md +0 -0
  17. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/pyproject.toml +0 -0
  18. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/__init__.py +0 -0
  19. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
  20. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
  21. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  22. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
  23. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
  24. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/errors.py +0 -0
  25. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
  26. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
  27. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/__init__.py +0 -0
  28. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
  29. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/base.py +0 -0
  30. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
  31. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  32. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  33. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  34. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  35. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/registry.py +0 -0
  36. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  37. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
  38. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
  39. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  40. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/compiler.py +0 -0
  41. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/configurable_task.py +0 -0
  42. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
  43. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  44. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  45. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
  46. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
  47. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  48. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  49. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  50. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  51. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/errors.py +0 -0
  52. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/model_provider.py +0 -0
  53. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/__init__.py +0 -0
  54. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/errors.py +0 -0
  55. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/factory.py +0 -0
  56. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
  57. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
  58. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/errors.py +0 -0
  59. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/parser.py +0 -0
  60. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
  61. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  62. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/types.py +0 -0
  63. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/base.py +0 -0
  64. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  65. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/registry.py +0 -0
  66. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/telemetry.py +0 -0
  67. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/usage.py +0 -0
  68. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
  69. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
  70. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  71. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  72. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/record.py +0 -0
  73. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
  74. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  75. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  76. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  77. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  78. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/base.py +0 -0
  79. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
  80. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/registry.py +0 -0
  81. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/schema_transform.py +0 -0
  82. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/utils.py +0 -0
  83. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/base.py +0 -0
  84. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  85. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/errors.py +0 -0
  86. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  87. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  88. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_storage.py +0 -0
  89. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/resource_provider.py +0 -0
  90. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/seed_reader.py +0 -0
  91. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/column.py +0 -0
  92. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
  93. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
  94. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  95. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  96. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
  97. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  98. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
  99. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  100. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  101. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  102. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  103. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
  104. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  105. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  106. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  107. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
  108. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  109. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  110. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  111. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  112. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/secret_resolver.py +0 -0
  113. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/__init__.py +0 -0
  114. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/stubs.py +0 -0
  115. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/utils.py +0 -0
  116. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validation.py +0 -0
  117. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/__init__.py +0 -0
  118. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/base.py +0 -0
  119. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/local_callable.py +0 -0
  120. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/python.py +0 -0
  121. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/remote.py +0 -0
  122. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/sql.py +0 -0
  123. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/conftest.py +0 -0
  124. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_base.py +0 -0
  125. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  126. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/conftest.py +0 -0
  127. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  128. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  129. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  130. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  131. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  132. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_errors.py +0 -0
  133. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  134. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  135. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/__init__.py +0 -0
  136. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  137. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_embedding.py +0 -0
  138. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_expression.py +0 -0
  139. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  140. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  141. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_validation.py +0 -0
  142. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/test_registry.py +0 -0
  143. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  144. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
  145. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  146. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  147. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/conftest.py +0 -0
  148. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  149. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  150. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  151. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  152. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  153. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  154. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/conftest.py +0 -0
  155. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parser.py +0 -0
  156. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  157. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  158. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  159. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  160. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  161. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/stub_secrets.json +0 -0
  162. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_litellm_overrides.py +0 -0
  163. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_errors.py +0 -0
  164. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_registry.py +0 -0
  165. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_usage.py +0 -0
  166. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/__init__.py +0 -0
  167. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/__init__.py +0 -0
  168. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_ast.py +0 -0
  169. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_environment.py +0 -0
  170. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  171. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_record.py +0 -0
  172. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/__init__.py +0 -0
  173. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  174. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  175. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_types.py +0 -0
  176. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  177. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/__init__.py +0 -0
  178. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
  179. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_registry.py +0 -0
  180. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_schema_transform.py +0 -0
  181. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/processing/test_utils.py +0 -0
  182. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/__init__.py +0 -0
  183. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/conftest.py +0 -0
  184. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_base.py +0 -0
  185. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_data_designer_registry.py +0 -0
  186. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_errors.py +0 -0
  187. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/__init__.py +0 -0
  188. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/conftest.py +0 -0
  189. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  190. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  191. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_storage.py +0 -0
  192. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_resource_provider.py +0 -0
  193. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_seed_reader.py +0 -0
  194. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/conftest.py +0 -0
  195. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  196. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  197. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  198. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  199. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  200. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  201. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_column.py +0 -0
  202. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_constraints.py +0 -0
  203. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_generator.py +0 -0
  204. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  205. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  206. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_schema.py +0 -0
  207. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_utils.py +0 -0
  208. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_compiler.py +0 -0
  209. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_configurable_task.py +0 -0
  210. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_dataset_metadata.py +0 -0
  211. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_engine_errors.py +0 -0
  212. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_model_provider.py +0 -0
  213. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_secret_resolver.py +0 -0
  214. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/test_validation.py +0 -0
  215. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_local_callable.py +0 -0
  216. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_python.py +0 -0
  217. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_remote.py +0 -0
  218. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_sql.py +0 -0
  219. {data_designer_engine-0.4.0 → data_designer_engine-0.4.0rc2}/tests/test_plugin_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-engine
3
- Version: 0.4.0
3
+ Version: 0.4.0rc2
4
4
  Summary: Generation engine for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0'
32
- __version_tuple__ = version_tuple = (0, 4, 0)
31
+ __version__ = version = '0.4.0rc2'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,7 +12,7 @@ from data_designer.config.column_configs import (
12
12
  LLMStructuredColumnConfig,
13
13
  LLMTextColumnConfig,
14
14
  )
15
- from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
15
+ from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
16
16
  from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
17
17
  from data_designer.engine.column_generators.utils.prompt_renderer import (
18
18
  PromptType,
@@ -66,7 +66,7 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
66
66
  for context in self.config.multi_modal_context:
67
67
  multi_modal_context.extend(context.get_contexts(deserialized_record))
68
68
 
69
- response, trace = self.model.generate(
69
+ response, reasoning_trace = self.model.generate(
70
70
  prompt=self.prompt_renderer.render(
71
71
  record=deserialized_record,
72
72
  prompt_template=self.config.prompt,
@@ -87,11 +87,8 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
87
87
  serialized_output = self.response_recipe.serialize_output(response)
88
88
  data[self.config.name] = self._process_serialized_output(serialized_output)
89
89
 
90
- should_save_trace = (
91
- self.config.with_trace or self.resource_provider.run_config.debug_override_save_all_column_traces
92
- )
93
- if should_save_trace:
94
- data[self.config.name + TRACE_COLUMN_POSTFIX] = [message.to_dict() for message in trace]
90
+ if reasoning_trace:
91
+ data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace
95
92
 
96
93
  return data
97
94
 
@@ -34,7 +34,6 @@ from data_designer.engine.dataset_builders.multi_column_configs import MultiColu
34
34
  from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
35
35
  from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
36
36
  from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
37
- from data_designer.engine.dataset_builders.utils.progress_tracker import ProgressTracker
38
37
  from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
39
38
  from data_designer.engine.processing.processors.base import Processor
40
39
  from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
@@ -222,18 +221,16 @@ class ColumnWiseDatasetBuilder:
222
221
  "generator so concurrency through threads is not supported."
223
222
  )
224
223
 
225
- progress_tracker = ProgressTracker(
226
- total_records=self.batch_manager.num_records_batch,
227
- label=f"{generator.config.column_type} column '{generator.config.name}'",
224
+ logger.info(
225
+ f"🐙 Processing {generator.config.column_type} column '{generator.config.name}' "
226
+ f"with {max_workers} concurrent workers"
228
227
  )
229
- progress_tracker.log_start(max_workers)
230
-
231
228
  settings = self._resource_provider.run_config
232
229
  with ConcurrentThreadExecutor(
233
230
  max_workers=max_workers,
234
231
  column_name=generator.config.name,
235
- result_callback=self._make_result_callback(progress_tracker),
236
- error_callback=self._make_error_callback(progress_tracker),
232
+ result_callback=self._worker_result_callback,
233
+ error_callback=self._worker_error_callback,
237
234
  shutdown_error_rate=settings.shutdown_error_rate,
238
235
  shutdown_error_window=settings.shutdown_error_window,
239
236
  disable_early_shutdown=settings.disable_early_shutdown,
@@ -241,26 +238,10 @@ class ColumnWiseDatasetBuilder:
241
238
  for i, record in self.batch_manager.iter_current_batch():
242
239
  executor.submit(lambda record: generator.generate(record), record, context={"index": i})
243
240
 
244
- progress_tracker.log_final()
245
-
246
241
  if len(self._records_to_drop) > 0:
247
242
  self.batch_manager.drop_records(self._records_to_drop)
248
243
  self._records_to_drop.clear()
249
244
 
250
- def _make_result_callback(self, progress_tracker: ProgressTracker) -> Callable[[dict], None]:
251
- def callback(result: dict, *, context: dict | None = None) -> None:
252
- self._worker_result_callback(result, context=context)
253
- progress_tracker.record_success()
254
-
255
- return callback
256
-
257
- def _make_error_callback(self, progress_tracker: ProgressTracker) -> Callable[[Exception], None]:
258
- def callback(exc: Exception, *, context: dict | None = None) -> None:
259
- self._worker_error_callback(exc, context=context)
260
- progress_tracker.record_failure()
261
-
262
- return callback
263
-
264
245
  def _write_processed_batch(self, dataframe: pd.DataFrame) -> None:
265
246
  self.batch_manager.update_records(dataframe.to_dict(orient="records"))
266
247
  self.batch_manager.write()
@@ -18,7 +18,7 @@ from data_designer.engine.models.errors import (
18
18
  from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs
19
19
  from data_designer.engine.models.parsers.errors import ParserException
20
20
  from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
21
- from data_designer.engine.models.utils import ChatMessage, prompt_to_messages
21
+ from data_designer.engine.models.utils import prompt_to_messages, str_to_message
22
22
  from data_designer.engine.secret_resolver import SecretResolver
23
23
  from data_designer.lazy_heavy_imports import litellm
24
24
 
@@ -67,17 +67,16 @@ class ModelFacade:
67
67
  return self._usage_stats
68
68
 
69
69
  def completion(
70
- self, messages: list[ChatMessage], skip_usage_tracking: bool = False, **kwargs
70
+ self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs
71
71
  ) -> litellm.ModelResponse:
72
- message_payloads = [message.to_dict() for message in messages]
73
72
  logger.debug(
74
73
  f"Prompting model {self.model_name!r}...",
75
- extra={"model": self.model_name, "messages": message_payloads},
74
+ extra={"model": self.model_name, "messages": messages},
76
75
  )
77
76
  response = None
78
77
  kwargs = self.consolidate_kwargs(**kwargs)
79
78
  try:
80
- response = self._router.completion(model=self.model_name, messages=message_payloads, **kwargs)
79
+ response = self._router.completion(model=self.model_name, messages=messages, **kwargs)
81
80
  logger.debug(
82
81
  f"Received completion from model {self.model_name!r}",
83
82
  extra={
@@ -150,7 +149,7 @@ class ModelFacade:
150
149
  skip_usage_tracking: bool = False,
151
150
  purpose: str | None = None,
152
151
  **kwargs,
153
- ) -> tuple[Any, list[ChatMessage]]:
152
+ ) -> tuple[Any, str | None]:
154
153
  """Generate a parsed output with correction steps.
155
154
 
156
155
  This generation call will attempt to generate an output which is
@@ -183,12 +182,6 @@ class ModelFacade:
183
182
  It is expected to be used by the @catch_llm_exceptions decorator.
184
183
  **kwargs: Additional arguments to pass to the model.
185
184
 
186
- Returns:
187
- A tuple containing:
188
- - The parsed output object from the parser.
189
- - The full trace of ChatMessage entries in the conversation, including any
190
- corrections and reasoning traces. Callers can decide whether to store this.
191
-
192
185
  Raises:
193
186
  GenerationValidationFailureError: If the maximum number of retries or
194
187
  correction steps are met and the last response failures on
@@ -197,17 +190,29 @@ class ModelFacade:
197
190
  output_obj = None
198
191
  curr_num_correction_steps = 0
199
192
  curr_num_restarts = 0
193
+ curr_generation_attempt = 0
194
+ max_generation_attempts = (max_correction_steps + 1) * (max_conversation_restarts + 1)
200
195
 
201
196
  starting_messages = prompt_to_messages(
202
197
  user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context
203
198
  )
204
- messages: list[ChatMessage] = deepcopy(starting_messages)
199
+ messages = deepcopy(starting_messages)
205
200
 
206
201
  while True:
202
+ curr_generation_attempt += 1
203
+ logger.debug(
204
+ f"Starting generation attempt {curr_generation_attempt} of {max_generation_attempts} attempts."
205
+ )
206
+
207
207
  completion_response = self.completion(messages, skip_usage_tracking=skip_usage_tracking, **kwargs)
208
208
  response = completion_response.choices[0].message.content or ""
209
209
  reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None)
210
- messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None))
210
+
211
+ if reasoning_trace:
212
+ ## There are generally some extra newlines with how these get parsed.
213
+ response = response.strip()
214
+ reasoning_trace = reasoning_trace.strip()
215
+
211
216
  curr_num_correction_steps += 1
212
217
 
213
218
  try:
@@ -218,23 +223,21 @@ class ModelFacade:
218
223
  raise GenerationValidationFailureError(
219
224
  "Unsuccessful generation attempt. No retries were attempted."
220
225
  ) from exc
221
-
222
226
  if curr_num_correction_steps <= max_correction_steps:
223
- # Add user message with error for correction
224
- messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc))))
225
-
227
+ ## Add turns to loop-back errors for correction
228
+ messages += [
229
+ str_to_message(content=response, role="assistant"),
230
+ str_to_message(content=str(get_exception_primary_cause(exc)), role="user"),
231
+ ]
226
232
  elif curr_num_restarts < max_conversation_restarts:
227
233
  curr_num_correction_steps = 0
228
234
  curr_num_restarts += 1
229
235
  messages = deepcopy(starting_messages)
230
-
231
236
  else:
232
237
  raise GenerationValidationFailureError(
233
- f"Unsuccessful generation despite {max_correction_steps} correction steps "
234
- f"and {max_conversation_restarts} conversation restarts."
238
+ f"Unsuccessful generation attempt despite {max_generation_attempts} attempts."
235
239
  ) from exc
236
-
237
- return output_obj, messages
240
+ return output_obj, reasoning_trace
238
241
 
239
242
  def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
240
243
  provider = self._model_provider_registry.get_provider(model_config.provider)
@@ -0,0 +1,38 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Any
7
+
8
+
9
+ def prompt_to_messages(
10
+ *,
11
+ user_prompt: str,
12
+ system_prompt: str | None = None,
13
+ multi_modal_context: list[dict[str, Any]] | None = None,
14
+ ) -> list[dict[str, str | list[dict]]]:
15
+ """Convert a user and system prompt into Messages format.
16
+
17
+ Args:
18
+ user_prompt (str): A user prompt.
19
+ system_prompt (str, optional): An optional system prompt.
20
+ """
21
+ user_content = user_prompt
22
+ if multi_modal_context and len(multi_modal_context) > 0:
23
+ user_content = []
24
+ for context in multi_modal_context:
25
+ user_content.append(context)
26
+ user_content.append({"type": "text", "text": user_prompt})
27
+ return (
28
+ [
29
+ str_to_message(content=system_prompt, role="system"),
30
+ str_to_message(content=user_content, role="user"),
31
+ ]
32
+ if system_prompt
33
+ else [str_to_message(content=user_content, role="user")]
34
+ )
35
+
36
+
37
+ def str_to_message(content: str | list[dict], role: str = "user") -> dict[str, str | list[dict]]:
38
+ return {"content": content, "role": role}
@@ -12,15 +12,14 @@ from data_designer.config.column_configs import (
12
12
  LLMTextColumnConfig,
13
13
  )
14
14
  from data_designer.config.run_config import RunConfig
15
- from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
16
15
  from data_designer.engine.column_generators.generators.base import GenerationStrategy
17
16
  from data_designer.engine.column_generators.generators.llm_completion import (
17
+ REASONING_TRACE_COLUMN_POSTFIX,
18
18
  LLMCodeCellGenerator,
19
19
  LLMJudgeCellGenerator,
20
20
  LLMStructuredCellGenerator,
21
21
  LLMTextCellGenerator,
22
22
  )
23
- from data_designer.engine.models.utils import ChatMessage
24
23
 
25
24
 
26
25
  def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwargs):
@@ -68,14 +67,14 @@ def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwar
68
67
  )
69
68
 
70
69
 
71
- def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output"):
70
+ def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output", reasoning=None):
72
71
  """Helper function to setup common generate method mocks."""
73
72
  mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
74
73
  mock_response_recipe.serialize_output.return_value = {"result": output}
75
- mock_model.generate.return_value = ({"result": output}, [])
74
+ mock_model.generate.return_value = ({"result": output}, reasoning)
76
75
 
77
76
 
78
- def test_generate_method() -> None:
77
+ def test_generate_method():
79
78
  generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
80
79
 
81
80
  # Test basic generation
@@ -88,19 +87,16 @@ def test_generate_method() -> None:
88
87
  assert mock_model.generate.call_args[1]["max_correction_steps"] == 2
89
88
  assert mock_model.generate.call_args[1]["max_conversation_restarts"] == 7
90
89
  assert result["test_column"] == {"result": "test_output"}
91
- assert "test_column" + TRACE_COLUMN_POSTFIX not in result
90
+ assert "test_column" + REASONING_TRACE_COLUMN_POSTFIX not in result
92
91
 
93
- # Test with full trace enabled
92
+ # Test with reasoning trace
94
93
  mock_model.reset_mock()
95
94
  mock_prompt_renderer.reset_mock()
96
- generator.resource_provider.run_config.debug_override_save_all_column_traces = True
97
- mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
98
- mock_response_recipe.serialize_output.return_value = {"result": "test_output"}
99
- mock_model.generate.return_value = ({"result": "test_output"}, [ChatMessage.as_user("x")])
95
+ _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, reasoning="reasoning_trace")
100
96
  result = generator.generate(data)
101
97
 
102
98
  assert result["test_column"] == {"result": "test_output"}
103
- assert result["test_column" + TRACE_COLUMN_POSTFIX] == [{"role": "user", "content": "x"}]
99
+ assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "reasoning_trace"
104
100
 
105
101
  # Test multi-modal context is None
106
102
  call_args = mock_model.generate.call_args
@@ -239,7 +235,7 @@ def test_generate_with_errors(error_type, error_message):
239
235
 
240
236
  if error_type == "serialization":
241
237
  mock_response_recipe.serialize_output.side_effect = Exception(error_message)
242
- mock_model.generate.return_value = ({"result": "test_output"}, [])
238
+ mock_model.generate.return_value = ({"result": "test_output"}, None)
243
239
  elif error_type == "model":
244
240
  mock_model.generate.side_effect = Exception(error_message)
245
241
  elif error_type == "prompt_render":
@@ -253,12 +249,13 @@ def test_generate_with_errors(error_type, error_message):
253
249
 
254
250
  def test_generate_with_complex_data():
255
251
  generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
256
- _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output")
252
+ _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output", "complex_reasoning")
257
253
 
258
254
  data = {"input": "test_input", "nested": {"key": "value"}, "list": [1, 2, 3], "json_string": '{"key": "value"}'}
259
255
  result = generator.generate(data)
260
256
 
261
257
  assert result["test_column"] == {"result": "complex_output"}
258
+ assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "complex_reasoning"
262
259
  assert result["input"] == "test_input"
263
260
  assert result["nested"] == {"key": "value"}
264
261
  assert result["list"] == [1, 2, 3]
@@ -344,7 +341,7 @@ def test_generator_output_type_handling(
344
341
  mock_response_recipe.serialize_output.return_value = serialized_output
345
342
  stub_resource_provider.model_registry.get_model.return_value.generate.return_value = (
346
343
  {"result": "raw_output"},
347
- [],
344
+ None,
348
345
  )
349
346
 
350
347
  data = {"input": "test_input"}
@@ -378,7 +378,6 @@ def test_fan_out_with_threads_uses_early_shutdown_settings_from_resource_provide
378
378
  mock_generator.config.column_type = "llm_text"
379
379
 
380
380
  builder.batch_manager = Mock()
381
- builder.batch_manager.num_records_batch = 10
382
381
  builder.batch_manager.iter_current_batch.return_value = []
383
382
 
384
383
  builder._fan_out_with_threads(mock_generator, max_workers=4)
@@ -1,7 +1,7 @@
1
1
  # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
- from typing import Any
4
+ from collections import namedtuple
5
5
  from unittest.mock import patch
6
6
 
7
7
  import pytest
@@ -10,33 +10,14 @@ from litellm.types.utils import Choices, EmbeddingResponse, Message, ModelRespon
10
10
  from data_designer.engine.models.errors import ModelGenerationValidationFailureError
11
11
  from data_designer.engine.models.facade import ModelFacade
12
12
  from data_designer.engine.models.parsers.errors import ParserException
13
- from data_designer.engine.models.utils import ChatMessage
14
13
 
14
+ MockMessage = namedtuple("MockMessage", ["content"])
15
+ MockChoice = namedtuple("MockChoice", ["message"])
16
+ MockCompletion = namedtuple("MockCompletion", ["choices"])
15
17
 
16
- class FakeMessage:
17
- """Unified fake message class for mocking LLM completion responses."""
18
18
 
19
- def __init__(
20
- self,
21
- content: str | None,
22
- reasoning_content: str | None = None,
23
- ) -> None:
24
- self.content = content
25
- self.reasoning_content = reasoning_content
26
-
27
-
28
- class FakeChoice:
29
- def __init__(self, message: FakeMessage) -> None:
30
- self.message = message
31
-
32
-
33
- class FakeResponse:
34
- def __init__(self, message: FakeMessage) -> None:
35
- self.choices = [FakeChoice(message)]
36
-
37
-
38
- def mock_oai_response_object(response_text: str) -> FakeResponse:
39
- return FakeResponse(FakeMessage(content=response_text))
19
+ def mock_oai_response_object(response_text: str) -> MockCompletion:
20
+ return MockCompletion(choices=[MockChoice(message=MockMessage(content=response_text))])
40
21
 
41
22
 
42
23
  @pytest.fixture
@@ -49,8 +30,8 @@ def stub_model_facade(stub_model_configs, stub_secrets_resolver, stub_model_prov
49
30
 
50
31
 
51
32
  @pytest.fixture
52
- def stub_completion_messages() -> list[ChatMessage]:
53
- return [ChatMessage.as_user("test")]
33
+ def stub_completion_messages():
34
+ return [{"role": "user", "content": "test"}]
54
35
 
55
36
 
56
37
  @pytest.fixture
@@ -112,29 +93,17 @@ def test_generate(
112
93
  @pytest.mark.parametrize(
113
94
  "system_prompt,expected_messages",
114
95
  [
115
- ("", [ChatMessage.as_user("does not matter")]),
116
- ("hello!", [ChatMessage.as_system("hello!"), ChatMessage.as_user("does not matter")]),
96
+ ("", [{"role": "user", "content": "does not matter"}]),
97
+ ("hello!", [{"content": "hello!", "role": "system"}, {"role": "user", "content": "does not matter"}]),
117
98
  ],
118
99
  )
119
100
  @patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True)
120
- def test_generate_with_system_prompt(
121
- mock_completion: Any,
122
- stub_model_facade: ModelFacade,
123
- system_prompt: str,
124
- expected_messages: list[ChatMessage],
125
- ) -> None:
126
- # Capture messages at call time since they get mutated after the call
127
- captured_messages = []
128
-
129
- def capture_and_return(*args: Any, **kwargs: Any) -> ModelResponse:
130
- captured_messages.append(list(args[1])) # Copy the messages list
131
- return ModelResponse(choices=Choices(message=Message(content="Hello!")))
132
-
133
- mock_completion.side_effect = capture_and_return
101
+ def test_generate_with_system_prompt(mock_completion, stub_model_facade, system_prompt, expected_messages):
102
+ mock_completion.return_value = ModelResponse(choices=Choices(message=Message(content="Hello!")))
134
103
 
135
104
  stub_model_facade.generate(prompt="does not matter", system_prompt=system_prompt, parser=lambda x: x)
136
105
  assert mock_completion.call_count == 1
137
- assert captured_messages[0] == expected_messages
106
+ assert mock_completion.call_args[0][1] == expected_messages
138
107
 
139
108
 
140
109
  def test_model_alias_property(stub_model_facade, stub_model_configs):
@@ -182,31 +151,26 @@ def test_consolidate_kwargs(stub_model_configs, stub_model_facade):
182
151
  )
183
152
  @patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
184
153
  def test_completion_success(
185
- mock_router_completion: Any,
186
- stub_completion_messages: list[ChatMessage],
187
- stub_model_configs: Any,
188
- stub_model_facade: ModelFacade,
189
- stub_expected_completion_response: ModelResponse,
190
- skip_usage_tracking: bool,
191
- ) -> None:
154
+ mock_router_completion,
155
+ stub_completion_messages,
156
+ stub_model_configs,
157
+ stub_model_facade,
158
+ stub_expected_completion_response,
159
+ skip_usage_tracking,
160
+ ):
192
161
  mock_router_completion.side_effect = lambda self, model, messages, **kwargs: stub_expected_completion_response
193
162
  result = stub_model_facade.completion(stub_completion_messages, skip_usage_tracking=skip_usage_tracking)
194
- expected_messages = [message.to_dict() for message in stub_completion_messages]
195
163
  assert result == stub_expected_completion_response
196
164
  assert mock_router_completion.call_count == 1
197
165
  assert mock_router_completion.call_args[1] == {
198
166
  "model": "stub-model-text",
199
- "messages": expected_messages,
167
+ "messages": stub_completion_messages,
200
168
  **stub_model_configs[0].inference_parameters.generate_kwargs,
201
169
  }
202
170
 
203
171
 
204
172
  @patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
205
- def test_completion_with_exception(
206
- mock_router_completion: Any,
207
- stub_completion_messages: list[ChatMessage],
208
- stub_model_facade: ModelFacade,
209
- ) -> None:
173
+ def test_completion_with_exception(mock_router_completion, stub_completion_messages, stub_model_facade):
210
174
  mock_router_completion.side_effect = Exception("Router error")
211
175
 
212
176
  with pytest.raises(Exception, match="Router error"):
@@ -215,15 +179,15 @@ def test_completion_with_exception(
215
179
 
216
180
  @patch("data_designer.engine.models.facade.CustomRouter.completion", autospec=True)
217
181
  def test_completion_with_kwargs(
218
- mock_router_completion: Any,
219
- stub_completion_messages: list[ChatMessage],
220
- stub_model_configs: Any,
221
- stub_model_facade: ModelFacade,
222
- stub_expected_completion_response: ModelResponse,
223
- ) -> None:
182
+ mock_router_completion,
183
+ stub_completion_messages,
184
+ stub_model_configs,
185
+ stub_model_facade,
186
+ stub_expected_completion_response,
187
+ ):
224
188
  captured_kwargs = {}
225
189
 
226
- def mock_completion(self: Any, model: str, messages: list[dict[str, Any]], **kwargs: Any) -> ModelResponse:
190
+ def mock_completion(self, model, messages, **kwargs):
227
191
  captured_kwargs.update(kwargs)
228
192
  return stub_expected_completion_response
229
193
 
@@ -0,0 +1,36 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from data_designer.engine.models.utils import prompt_to_messages, str_to_message
5
+
6
+
7
+ def test_str_to_message():
8
+ assert str_to_message("hello") == {"content": "hello", "role": "user"}
9
+ assert str_to_message("hello", role="system") == {"content": "hello", "role": "system"}
10
+ assert str_to_message([{"type": "text", "text": "hello"}]) == {
11
+ "content": [{"type": "text", "text": "hello"}],
12
+ "role": "user",
13
+ }
14
+ assert str_to_message([{"type": "text", "text": "hello"}], role="system") == {
15
+ "content": [{"type": "text", "text": "hello"}],
16
+ "role": "system",
17
+ }
18
+
19
+
20
+ def test_prompt_to_messages():
21
+ stub_system_prompt = "some system prompt"
22
+ mult_modal_context = {"type": "image_url", "image_url": {"url": "http://example.com/image.png"}}
23
+ assert prompt_to_messages(user_prompt="hello") == [{"content": "hello", "role": "user"}]
24
+ assert prompt_to_messages(user_prompt="hello", system_prompt=stub_system_prompt) == [
25
+ {"content": stub_system_prompt, "role": "system"},
26
+ {"content": "hello", "role": "user"},
27
+ ]
28
+ assert prompt_to_messages(user_prompt="hello", multi_modal_context=[mult_modal_context]) == [
29
+ {"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"}
30
+ ]
31
+ assert prompt_to_messages(
32
+ user_prompt="hello", system_prompt=stub_system_prompt, multi_modal_context=[mult_modal_context]
33
+ ) == [
34
+ {"content": stub_system_prompt, "role": "system"},
35
+ {"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"},
36
+ ]