data-designer-engine 0.4.0rc1__tar.gz → 0.4.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (219) hide show
  1. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/PKG-INFO +1 -1
  2. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/_version.py +2 -2
  3. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/llm_completion.py +10 -7
  4. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/column_wise_builder.py +24 -5
  5. data_designer_engine-0.4.0rc3/src/data_designer/engine/dataset_builders/utils/progress_tracker.py +122 -0
  6. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/facade.py +23 -26
  7. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/registry.py +5 -0
  8. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/telemetry.py +8 -5
  9. data_designer_engine-0.4.0rc3/src/data_designer/engine/models/utils.py +101 -0
  10. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/schema_transform.py +27 -5
  11. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_llm_completion_generators.py +15 -12
  12. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_column_wise_builder.py +1 -0
  13. data_designer_engine-0.4.0rc3/tests/engine/dataset_builders/utils/test_progress_tracker.py +290 -0
  14. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_facade.py +65 -29
  15. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_model_registry.py +48 -0
  16. data_designer_engine-0.4.0rc3/tests/engine/models/test_model_utils.py +23 -0
  17. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_schema_transform.py +60 -2
  18. data_designer_engine-0.4.0rc1/src/data_designer/engine/models/utils.py +0 -38
  19. data_designer_engine-0.4.0rc1/tests/engine/models/test_model_utils.py +0 -36
  20. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/.gitignore +0 -0
  21. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/README.md +0 -0
  22. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/pyproject.toml +0 -0
  23. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/__init__.py +0 -0
  24. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
  25. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
  26. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  27. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/column_statistics.py +0 -0
  28. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
  29. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/errors.py +0 -0
  30. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
  31. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
  32. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/__init__.py +0 -0
  33. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
  34. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/base.py +0 -0
  35. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
  36. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  37. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  38. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  39. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  40. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/registry.py +0 -0
  41. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  42. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
  43. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
  44. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  45. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/compiler.py +0 -0
  46. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/configurable_task.py +0 -0
  47. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
  48. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  49. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  50. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
  51. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
  52. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  53. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  54. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  55. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  56. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/errors.py +0 -0
  57. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/model_provider.py +0 -0
  58. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/__init__.py +0 -0
  59. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/errors.py +0 -0
  60. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/factory.py +0 -0
  61. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/litellm_overrides.py +0 -0
  62. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/__init__.py +0 -0
  63. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/errors.py +0 -0
  64. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/parser.py +0 -0
  65. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
  66. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  67. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/parsers/types.py +0 -0
  68. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/recipes/base.py +0 -0
  69. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  70. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/models/usage.py +0 -0
  71. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
  72. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/ast.py +0 -0
  73. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  74. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  75. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/ginja/record.py +0 -0
  76. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
  77. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  78. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  79. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  80. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  81. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/base.py +0 -0
  82. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
  83. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/processors/registry.py +0 -0
  84. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/processing/utils.py +0 -0
  85. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/base.py +0 -0
  86. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  87. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/registry/errors.py +0 -0
  88. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  89. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  90. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/managed_storage.py +0 -0
  91. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/resource_provider.py +0 -0
  92. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/resources/seed_reader.py +0 -0
  93. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/column.py +0 -0
  94. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
  95. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
  96. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  97. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  98. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
  99. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  100. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
  101. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  102. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  103. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  104. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  105. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
  106. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  107. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  108. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  109. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
  110. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  111. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  112. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  113. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  114. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/secret_resolver.py +0 -0
  115. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/__init__.py +0 -0
  116. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/stubs.py +0 -0
  117. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/testing/utils.py +0 -0
  118. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validation.py +0 -0
  119. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/__init__.py +0 -0
  120. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/base.py +0 -0
  121. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/local_callable.py +0 -0
  122. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/python.py +0 -0
  123. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/remote.py +0 -0
  124. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/src/data_designer/engine/validators/sql.py +0 -0
  125. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/conftest.py +0 -0
  126. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/column_profilers/test_base.py +0 -0
  127. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  128. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/conftest.py +0 -0
  129. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  130. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  131. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  132. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  133. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  134. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/test_errors.py +0 -0
  135. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  136. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  137. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/__init__.py +0 -0
  138. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  139. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_embedding.py +0 -0
  140. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_expression.py +0 -0
  141. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  142. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  143. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/generators/test_validation.py +0 -0
  144. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/test_registry.py +0 -0
  145. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  146. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
  147. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  148. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  149. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/conftest.py +0 -0
  150. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  151. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  152. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  153. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  154. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  155. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  156. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/conftest.py +0 -0
  157. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_parser.py +0 -0
  158. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  159. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  160. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  161. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  162. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  163. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/stub_secrets.json +0 -0
  164. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_litellm_overrides.py +0 -0
  165. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_model_errors.py +0 -0
  166. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/models/test_usage.py +0 -0
  167. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/__init__.py +0 -0
  168. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/__init__.py +0 -0
  169. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_ast.py +0 -0
  170. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_environment.py +0 -0
  171. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  172. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/ginja/test_record.py +0 -0
  173. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/__init__.py +0 -0
  174. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  175. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  176. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_types.py +0 -0
  177. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  178. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/__init__.py +0 -0
  179. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_drop_columns.py +0 -0
  180. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/processors/test_registry.py +0 -0
  181. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/processing/test_utils.py +0 -0
  182. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/registry/__init__.py +0 -0
  183. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/registry/conftest.py +0 -0
  184. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_base.py +0 -0
  185. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_data_designer_registry.py +0 -0
  186. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/registry/test_errors.py +0 -0
  187. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/__init__.py +0 -0
  188. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/conftest.py +0 -0
  189. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  190. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  191. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_managed_storage.py +0 -0
  192. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_resource_provider.py +0 -0
  193. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/resources/test_seed_reader.py +0 -0
  194. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/conftest.py +0 -0
  195. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  196. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  197. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  198. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  199. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  200. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  201. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_column.py +0 -0
  202. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_constraints.py +0 -0
  203. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_generator.py +0 -0
  204. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  205. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  206. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_schema.py +0 -0
  207. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/sampling_gen/test_utils.py +0 -0
  208. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_compiler.py +0 -0
  209. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_configurable_task.py +0 -0
  210. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_dataset_metadata.py +0 -0
  211. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_engine_errors.py +0 -0
  212. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_model_provider.py +0 -0
  213. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_secret_resolver.py +0 -0
  214. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/test_validation.py +0 -0
  215. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_local_callable.py +0 -0
  216. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_python.py +0 -0
  217. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_remote.py +0 -0
  218. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/engine/validators/test_sql.py +0 -0
  219. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc3}/tests/test_plugin_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-engine
3
- Version: 0.4.0rc1
3
+ Version: 0.4.0rc3
4
4
  Summary: Generation engine for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0rc1'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc1')
31
+ __version__ = version = '0.4.0rc3'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -12,7 +12,7 @@ from data_designer.config.column_configs import (
12
12
  LLMStructuredColumnConfig,
13
13
  LLMTextColumnConfig,
14
14
  )
15
- from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
15
+ from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
16
16
  from data_designer.engine.column_generators.generators.base import ColumnGeneratorWithModel, GenerationStrategy
17
17
  from data_designer.engine.column_generators.utils.prompt_renderer import (
18
18
  PromptType,
@@ -62,11 +62,11 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
62
62
 
63
63
  multi_modal_context = None
64
64
  if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0:
65
- multi_modal_context = [
66
- context.get_context(deserialized_record) for context in self.config.multi_modal_context
67
- ]
65
+ multi_modal_context = []
66
+ for context in self.config.multi_modal_context:
67
+ multi_modal_context.extend(context.get_contexts(deserialized_record))
68
68
 
69
- response, reasoning_trace = self.model.generate(
69
+ response, trace = self.model.generate(
70
70
  prompt=self.prompt_renderer.render(
71
71
  record=deserialized_record,
72
72
  prompt_template=self.config.prompt,
@@ -87,8 +87,11 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
87
87
  serialized_output = self.response_recipe.serialize_output(response)
88
88
  data[self.config.name] = self._process_serialized_output(serialized_output)
89
89
 
90
- if reasoning_trace:
91
- data[self.config.name + REASONING_TRACE_COLUMN_POSTFIX] = reasoning_trace
90
+ should_save_trace = (
91
+ self.config.with_trace or self.resource_provider.run_config.debug_override_save_all_column_traces
92
+ )
93
+ if should_save_trace:
94
+ data[self.config.name + TRACE_COLUMN_POSTFIX] = [message.to_dict() for message in trace]
92
95
 
93
96
  return data
94
97
 
@@ -34,6 +34,7 @@ from data_designer.engine.dataset_builders.multi_column_configs import MultiColu
34
34
  from data_designer.engine.dataset_builders.utils.concurrency import ConcurrentThreadExecutor
35
35
  from data_designer.engine.dataset_builders.utils.config_compiler import compile_dataset_builder_column_configs
36
36
  from data_designer.engine.dataset_builders.utils.dataset_batch_manager import DatasetBatchManager
37
+ from data_designer.engine.dataset_builders.utils.progress_tracker import ProgressTracker
37
38
  from data_designer.engine.models.telemetry import InferenceEvent, NemoSourceEnum, TaskStatusEnum, TelemetryHandler
38
39
  from data_designer.engine.processing.processors.base import Processor
39
40
  from data_designer.engine.processing.processors.drop_columns import DropColumnsProcessor
@@ -221,16 +222,18 @@ class ColumnWiseDatasetBuilder:
221
222
  "generator so concurrency through threads is not supported."
222
223
  )
223
224
 
224
- logger.info(
225
- f"🐙 Processing {generator.config.column_type} column '{generator.config.name}' "
226
- f"with {max_workers} concurrent workers"
225
+ progress_tracker = ProgressTracker(
226
+ total_records=self.batch_manager.num_records_batch,
227
+ label=f"{generator.config.column_type} column '{generator.config.name}'",
227
228
  )
229
+ progress_tracker.log_start(max_workers)
230
+
228
231
  settings = self._resource_provider.run_config
229
232
  with ConcurrentThreadExecutor(
230
233
  max_workers=max_workers,
231
234
  column_name=generator.config.name,
232
- result_callback=self._worker_result_callback,
233
- error_callback=self._worker_error_callback,
235
+ result_callback=self._make_result_callback(progress_tracker),
236
+ error_callback=self._make_error_callback(progress_tracker),
234
237
  shutdown_error_rate=settings.shutdown_error_rate,
235
238
  shutdown_error_window=settings.shutdown_error_window,
236
239
  disable_early_shutdown=settings.disable_early_shutdown,
@@ -238,10 +241,26 @@ class ColumnWiseDatasetBuilder:
238
241
  for i, record in self.batch_manager.iter_current_batch():
239
242
  executor.submit(lambda record: generator.generate(record), record, context={"index": i})
240
243
 
244
+ progress_tracker.log_final()
245
+
241
246
  if len(self._records_to_drop) > 0:
242
247
  self.batch_manager.drop_records(self._records_to_drop)
243
248
  self._records_to_drop.clear()
244
249
 
250
+ def _make_result_callback(self, progress_tracker: ProgressTracker) -> Callable[[dict], None]:
251
+ def callback(result: dict, *, context: dict | None = None) -> None:
252
+ self._worker_result_callback(result, context=context)
253
+ progress_tracker.record_success()
254
+
255
+ return callback
256
+
257
+ def _make_error_callback(self, progress_tracker: ProgressTracker) -> Callable[[Exception], None]:
258
+ def callback(exc: Exception, *, context: dict | None = None) -> None:
259
+ self._worker_error_callback(exc, context=context)
260
+ progress_tracker.record_failure()
261
+
262
+ return callback
263
+
245
264
  def _write_processed_batch(self, dataframe: pd.DataFrame) -> None:
246
265
  self.batch_manager.update_records(dataframe.to_dict(orient="records"))
247
266
  self.batch_manager.write()
@@ -0,0 +1,122 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ import logging
7
+ import time
8
+ from threading import Lock
9
+
10
+ from data_designer.logging import RandomEmoji
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ class ProgressTracker:
16
+ """
17
+ Thread-safe progress tracker for monitoring concurrent task completion.
18
+
19
+ Tracks completed, successful, and failed task counts and logs progress
20
+ at configurable intervals. Designed for use with ConcurrentThreadExecutor
21
+ to provide visibility into long-running batch operations.
22
+
23
+ Example usage:
24
+ tracker = ProgressTracker(total_records=100, label="LLM_TEXT column 'response'")
25
+ tracker.log_start(max_workers=8)
26
+
27
+ # In callbacks from ConcurrentThreadExecutor:
28
+ tracker.record_success() # or tracker.record_failure()
29
+
30
+ # After executor completes:
31
+ tracker.log_final()
32
+ """
33
+
34
+ def __init__(self, total_records: int, label: str, log_interval_percent: int = 10):
35
+ """
36
+ Initialize the progress tracker.
37
+
38
+ Args:
39
+ total_records: Total number of records to process.
40
+ label: Human-readable label for log messages (e.g., "LLM_TEXT column 'response'").
41
+ log_interval_percent: How often to log progress as a percentage (default 10%).
42
+ """
43
+ self.total_records = total_records
44
+ self.label = label
45
+
46
+ self.completed = 0
47
+ self.success = 0
48
+ self.failed = 0
49
+
50
+ interval_fraction = max(1, log_interval_percent) / 100.0
51
+ self.log_interval = max(1, int(total_records * interval_fraction)) if total_records > 0 else 1
52
+ self.next_log_at = self.log_interval
53
+
54
+ self.start_time = time.perf_counter()
55
+ self.lock = Lock()
56
+ self._random_emoji = RandomEmoji()
57
+
58
+ def log_start(self, max_workers: int) -> None:
59
+ """Log the start of processing with worker count and interval information."""
60
+ logger.info(
61
+ "🐙 Processing %s with %d concurrent workers",
62
+ self.label,
63
+ max_workers,
64
+ )
65
+ logger.info(
66
+ "🧭 %s will report progress every %d record(s).",
67
+ self.label,
68
+ self.log_interval,
69
+ )
70
+
71
+ def record_success(self) -> None:
72
+ """Record a successful task completion and log progress if at interval."""
73
+ self._record_completion(success=True)
74
+
75
+ def record_failure(self) -> None:
76
+ """Record a failed task completion and log progress if at interval."""
77
+ self._record_completion(success=False)
78
+
79
+ def log_final(self) -> None:
80
+ """Log final progress summary."""
81
+ with self.lock:
82
+ if self.completed > 0:
83
+ self._log_progress_unlocked()
84
+
85
+ def _record_completion(self, *, success: bool) -> None:
86
+ should_log = False
87
+ with self.lock:
88
+ self.completed += 1
89
+ if success:
90
+ self.success += 1
91
+ else:
92
+ self.failed += 1
93
+
94
+ if self.completed >= self.next_log_at and self.completed < self.total_records:
95
+ should_log = True
96
+ while self.next_log_at <= self.completed:
97
+ self.next_log_at += self.log_interval
98
+
99
+ if should_log:
100
+ with self.lock:
101
+ self._log_progress_unlocked()
102
+
103
+ def _log_progress_unlocked(self) -> None:
104
+ """Log current progress. Must be called while holding the lock."""
105
+ elapsed = time.perf_counter() - self.start_time
106
+ rate = self.completed / elapsed if elapsed > 0 else 0.0
107
+ remaining = max(0, self.total_records - self.completed)
108
+ eta = f"{(remaining / rate):.1f}s" if rate > 0 else "unknown"
109
+ percent = (self.completed / self.total_records) * 100 if self.total_records else 100.0
110
+
111
+ logger.info(
112
+ " |-- %s %s progress: %d/%d (%.0f%%) complete, %d ok, %d failed, %.2f rec/s, eta %s",
113
+ self._random_emoji.progress(percent),
114
+ self.label,
115
+ self.completed,
116
+ self.total_records,
117
+ percent,
118
+ self.success,
119
+ self.failed,
120
+ rate,
121
+ eta,
122
+ )
@@ -18,7 +18,7 @@ from data_designer.engine.models.errors import (
18
18
  from data_designer.engine.models.litellm_overrides import CustomRouter, LiteLLMRouterDefaultKwargs
19
19
  from data_designer.engine.models.parsers.errors import ParserException
20
20
  from data_designer.engine.models.usage import ModelUsageStats, RequestUsageStats, TokenUsageStats
21
- from data_designer.engine.models.utils import prompt_to_messages, str_to_message
21
+ from data_designer.engine.models.utils import ChatMessage, prompt_to_messages
22
22
  from data_designer.engine.secret_resolver import SecretResolver
23
23
  from data_designer.lazy_heavy_imports import litellm
24
24
 
@@ -67,16 +67,17 @@ class ModelFacade:
67
67
  return self._usage_stats
68
68
 
69
69
  def completion(
70
- self, messages: list[dict[str, str]], skip_usage_tracking: bool = False, **kwargs
70
+ self, messages: list[ChatMessage], skip_usage_tracking: bool = False, **kwargs
71
71
  ) -> litellm.ModelResponse:
72
+ message_payloads = [message.to_dict() for message in messages]
72
73
  logger.debug(
73
74
  f"Prompting model {self.model_name!r}...",
74
- extra={"model": self.model_name, "messages": messages},
75
+ extra={"model": self.model_name, "messages": message_payloads},
75
76
  )
76
77
  response = None
77
78
  kwargs = self.consolidate_kwargs(**kwargs)
78
79
  try:
79
- response = self._router.completion(model=self.model_name, messages=messages, **kwargs)
80
+ response = self._router.completion(model=self.model_name, messages=message_payloads, **kwargs)
80
81
  logger.debug(
81
82
  f"Received completion from model {self.model_name!r}",
82
83
  extra={
@@ -149,7 +150,7 @@ class ModelFacade:
149
150
  skip_usage_tracking: bool = False,
150
151
  purpose: str | None = None,
151
152
  **kwargs,
152
- ) -> tuple[Any, str | None]:
153
+ ) -> tuple[Any, list[ChatMessage]]:
153
154
  """Generate a parsed output with correction steps.
154
155
 
155
156
  This generation call will attempt to generate an output which is
@@ -182,6 +183,12 @@ class ModelFacade:
182
183
  It is expected to be used by the @catch_llm_exceptions decorator.
183
184
  **kwargs: Additional arguments to pass to the model.
184
185
 
186
+ Returns:
187
+ A tuple containing:
188
+ - The parsed output object from the parser.
189
+ - The full trace of ChatMessage entries in the conversation, including any
190
+ corrections and reasoning traces. Callers can decide whether to store this.
191
+
185
192
  Raises:
186
193
  GenerationValidationFailureError: If the maximum number of retries or
187
194
  correction steps are met and the last response failures on
@@ -190,29 +197,17 @@ class ModelFacade:
190
197
  output_obj = None
191
198
  curr_num_correction_steps = 0
192
199
  curr_num_restarts = 0
193
- curr_generation_attempt = 0
194
- max_generation_attempts = (max_correction_steps + 1) * (max_conversation_restarts + 1)
195
200
 
196
201
  starting_messages = prompt_to_messages(
197
202
  user_prompt=prompt, system_prompt=system_prompt, multi_modal_context=multi_modal_context
198
203
  )
199
- messages = deepcopy(starting_messages)
204
+ messages: list[ChatMessage] = deepcopy(starting_messages)
200
205
 
201
206
  while True:
202
- curr_generation_attempt += 1
203
- logger.debug(
204
- f"Starting generation attempt {curr_generation_attempt} of {max_generation_attempts} attempts."
205
- )
206
-
207
207
  completion_response = self.completion(messages, skip_usage_tracking=skip_usage_tracking, **kwargs)
208
208
  response = completion_response.choices[0].message.content or ""
209
209
  reasoning_trace = getattr(completion_response.choices[0].message, "reasoning_content", None)
210
-
211
- if reasoning_trace:
212
- ## There are generally some extra newlines with how these get parsed.
213
- response = response.strip()
214
- reasoning_trace = reasoning_trace.strip()
215
-
210
+ messages.append(ChatMessage.as_assistant(content=response, reasoning_content=reasoning_trace or None))
216
211
  curr_num_correction_steps += 1
217
212
 
218
213
  try:
@@ -223,21 +218,23 @@ class ModelFacade:
223
218
  raise GenerationValidationFailureError(
224
219
  "Unsuccessful generation attempt. No retries were attempted."
225
220
  ) from exc
221
+
226
222
  if curr_num_correction_steps <= max_correction_steps:
227
- ## Add turns to loop-back errors for correction
228
- messages += [
229
- str_to_message(content=response, role="assistant"),
230
- str_to_message(content=str(get_exception_primary_cause(exc)), role="user"),
231
- ]
223
+ # Add user message with error for correction
224
+ messages.append(ChatMessage.as_user(content=str(get_exception_primary_cause(exc))))
225
+
232
226
  elif curr_num_restarts < max_conversation_restarts:
233
227
  curr_num_correction_steps = 0
234
228
  curr_num_restarts += 1
235
229
  messages = deepcopy(starting_messages)
230
+
236
231
  else:
237
232
  raise GenerationValidationFailureError(
238
- f"Unsuccessful generation attempt despite {max_generation_attempts} attempts."
233
+ f"Unsuccessful generation despite {max_correction_steps} correction steps "
234
+ f"and {max_conversation_restarts} conversation restarts."
239
235
  ) from exc
240
- return output_obj, reasoning_trace
236
+
237
+ return output_obj, messages
241
238
 
242
239
  def _get_litellm_deployment(self, model_config: ModelConfig) -> litellm.DeploymentTypedDict:
243
240
  provider = self._model_provider_registry.get_provider(model_config.provider)
@@ -107,6 +107,11 @@ class ModelRegistry:
107
107
  def run_health_check(self, model_aliases: list[str]) -> None:
108
108
  logger.info("🩺 Running health checks for models...")
109
109
  for model_alias in model_aliases:
110
+ model_config = self.get_model_config(model_alias=model_alias)
111
+ if model_config.skip_health_check:
112
+ logger.info(f" |-- ⏭️ Skipping health check for model alias {model_alias!r} (skip_health_check=True)")
113
+ continue
114
+
110
115
  model = self.get_model(model_alias=model_alias)
111
116
  logger.info(
112
117
  f" |-- 👀 Checking {model.model_name!r} in provider named {model.model_provider_name!r} for model alias {model.model_alias!r}..."
@@ -8,6 +8,7 @@ Environment variables:
8
8
  - NEMO_TELEMETRY_ENABLED: Whether telemetry is enabled.
9
9
  - NEMO_DEPLOYMENT_TYPE: The deployment type the event came from.
10
10
  - NEMO_TELEMETRY_ENDPOINT: The endpoint to send the telemetry events to.
11
+ - NEMO_SESSION_PREFIX: Optional prefix to add to session IDs.
11
12
  """
12
13
 
13
14
  from __future__ import annotations
@@ -18,15 +19,12 @@ import platform
18
19
  from dataclasses import dataclass
19
20
  from datetime import datetime, timezone
20
21
  from enum import Enum
21
- from typing import TYPE_CHECKING, Any, ClassVar
22
+ from typing import Any, ClassVar
22
23
 
23
24
  from pydantic import BaseModel, Field
24
25
 
25
26
  from data_designer.lazy_heavy_imports import httpx
26
27
 
27
- if TYPE_CHECKING:
28
- import httpx
29
-
30
28
  TELEMETRY_ENABLED = os.getenv("NEMO_TELEMETRY_ENABLED", "true").lower() in ("1", "true", "yes")
31
29
  CLIENT_ID = "184482118588404"
32
30
  NEMO_TELEMETRY_VERSION = "nemo-telemetry/1.0"
@@ -35,6 +33,7 @@ NEMO_TELEMETRY_ENDPOINT = os.getenv(
35
33
  "NEMO_TELEMETRY_ENDPOINT", "https://events.telemetry.data.nvidia.com/v1.1/events/json"
36
34
  ).lower()
37
35
  CPU_ARCHITECTURE = platform.uname().machine
36
+ SESSION_PREFIX = os.getenv("NEMO_SESSION_PREFIX")
38
37
 
39
38
 
40
39
  class NemoSourceEnum(str, Enum):
@@ -231,7 +230,11 @@ class TelemetryHandler:
231
230
  self._timer_task: asyncio.Task | None = None
232
231
  self._running = False
233
232
  self._source_client_version = source_client_version
234
- self._session_id = session_id
233
+ # Apply session prefix if environment variable is set
234
+ if SESSION_PREFIX:
235
+ self._session_id = f"{SESSION_PREFIX}{session_id}"
236
+ else:
237
+ self._session_id = session_id
235
238
 
236
239
  async def astart(self) -> None:
237
240
  if self._running:
@@ -0,0 +1,101 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ from dataclasses import dataclass, field
7
+ from typing import Any, Literal
8
+
9
+
10
+ @dataclass
11
+ class ChatMessage:
12
+ """A chat message in an LLM conversation.
13
+
14
+ This dataclass represents messages exchanged in a conversation with an LLM,
15
+ supporting various message types including user prompts, assistant responses,
16
+ system instructions, and tool interactions.
17
+
18
+ Attributes:
19
+ role: The role of the message sender. One of 'user', 'assistant', 'system', or 'tool'.
20
+ content: The message content. Can be a string or a list of content blocks
21
+ for multimodal messages (e.g., text + images).
22
+ reasoning_content: Optional reasoning/thinking content from the assistant,
23
+ typically from extended thinking or chain-of-thought models.
24
+ tool_calls: Optional list of tool calls requested by the assistant.
25
+ Each tool call contains 'id', 'type', and 'function' keys.
26
+ tool_call_id: Optional ID linking a tool response to its corresponding
27
+ tool call. Required for messages with role='tool'.
28
+ """
29
+
30
+ role: Literal["user", "assistant", "system", "tool"]
31
+ content: str | list[dict[str, Any]] = ""
32
+ reasoning_content: str | None = None
33
+ tool_calls: list[dict[str, Any]] = field(default_factory=list)
34
+ tool_call_id: str | None = None
35
+
36
+ def to_dict(self) -> dict[str, Any]:
37
+ """Convert the message to a dictionary format for API calls.
38
+
39
+ Returns:
40
+ A dictionary containing the message fields. Only includes non-empty
41
+ optional fields to keep the output clean.
42
+ """
43
+ result: dict[str, Any] = {"role": self.role, "content": self.content}
44
+ if self.reasoning_content:
45
+ result["reasoning_content"] = self.reasoning_content
46
+ if self.tool_calls:
47
+ result["tool_calls"] = self.tool_calls
48
+ if self.tool_call_id:
49
+ result["tool_call_id"] = self.tool_call_id
50
+ return result
51
+
52
+ @classmethod
53
+ def as_user(cls, content: str | list[dict[str, Any]]) -> ChatMessage:
54
+ """Create a user message."""
55
+ return cls(role="user", content=content)
56
+
57
+ @classmethod
58
+ def as_assistant(
59
+ cls,
60
+ content: str = "",
61
+ reasoning_content: str | None = None,
62
+ tool_calls: list[dict[str, Any]] | None = None,
63
+ ) -> ChatMessage:
64
+ """Create an assistant message."""
65
+ return cls(
66
+ role="assistant",
67
+ content=content,
68
+ reasoning_content=reasoning_content,
69
+ tool_calls=tool_calls or [],
70
+ )
71
+
72
+ @classmethod
73
+ def as_system(cls, content: str) -> ChatMessage:
74
+ """Create a system message."""
75
+ return cls(role="system", content=content)
76
+
77
+ @classmethod
78
+ def as_tool(cls, content: str, tool_call_id: str) -> ChatMessage:
79
+ """Create a tool response message."""
80
+ return cls(role="tool", content=content, tool_call_id=tool_call_id)
81
+
82
+
83
+ def prompt_to_messages(
84
+ *,
85
+ user_prompt: str,
86
+ system_prompt: str | None = None,
87
+ multi_modal_context: list[dict[str, Any]] | None = None,
88
+ ) -> list[ChatMessage]:
89
+ """Convert a user and system prompt into ChatMessage list.
90
+
91
+ Args:
92
+ user_prompt (str): A user prompt.
93
+ system_prompt (str, optional): An optional system prompt.
94
+ """
95
+ user_content: str | list[dict[str, Any]] = user_prompt
96
+ if multi_modal_context:
97
+ user_content = [*multi_modal_context, {"type": "text", "text": user_prompt}]
98
+
99
+ if system_prompt:
100
+ return [ChatMessage.as_system(system_prompt), ChatMessage.as_user(user_content)]
101
+ return [ChatMessage.as_user(user_content)]
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
 
6
6
  import json
7
7
  import logging
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  from data_designer.config.processors import SchemaTransformProcessorConfig
11
11
  from data_designer.engine.dataset_builders.artifact_storage import BatchStage
@@ -20,6 +20,26 @@ if TYPE_CHECKING:
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
+ def _json_escape_record(record: dict[str, Any]) -> dict[str, Any]:
24
+ """Escape record values for safe insertion into a JSON template."""
25
+
26
+ def escape_for_json_string(s: str) -> str:
27
+ """Use json.dumps to escape, then strip the surrounding quotes."""
28
+ return json.dumps(s)[1:-1]
29
+
30
+ escaped = {}
31
+ for key, value in record.items():
32
+ if isinstance(value, str):
33
+ escaped[key] = escape_for_json_string(value)
34
+ elif isinstance(value, (dict, list)):
35
+ escaped[key] = escape_for_json_string(json.dumps(value))
36
+ elif value is None:
37
+ escaped[key] = "null"
38
+ else:
39
+ escaped[key] = str(value)
40
+ return escaped
41
+
42
+
23
43
  class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[SchemaTransformProcessorConfig]):
24
44
  @property
25
45
  def template_as_str(self) -> str:
@@ -27,10 +47,12 @@ class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[Schema
27
47
 
28
48
  def process(self, data: pd.DataFrame, *, current_batch_number: int | None = None) -> pd.DataFrame:
29
49
  self.prepare_jinja2_template_renderer(self.template_as_str, data.columns.to_list())
30
- formatted_records = [
31
- json.loads(self.render_template(deserialize_json_values(record)).replace("\n", "\\n"))
32
- for record in data.to_dict(orient="records")
33
- ]
50
+ formatted_records = []
51
+ for record in data.to_dict(orient="records"):
52
+ deserialized = deserialize_json_values(record)
53
+ escaped = _json_escape_record(deserialized)
54
+ rendered = self.render_template(escaped)
55
+ formatted_records.append(json.loads(rendered))
34
56
  formatted_data = pd.DataFrame(formatted_records)
35
57
  if current_batch_number is not None:
36
58
  self.artifact_storage.write_batch_to_parquet_file(
@@ -12,14 +12,15 @@ from data_designer.config.column_configs import (
12
12
  LLMTextColumnConfig,
13
13
  )
14
14
  from data_designer.config.run_config import RunConfig
15
+ from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
15
16
  from data_designer.engine.column_generators.generators.base import GenerationStrategy
16
17
  from data_designer.engine.column_generators.generators.llm_completion import (
17
- REASONING_TRACE_COLUMN_POSTFIX,
18
18
  LLMCodeCellGenerator,
19
19
  LLMJudgeCellGenerator,
20
20
  LLMStructuredCellGenerator,
21
21
  LLMTextCellGenerator,
22
22
  )
23
+ from data_designer.engine.models.utils import ChatMessage
23
24
 
24
25
 
25
26
  def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwargs):
@@ -67,14 +68,14 @@ def _create_generator_with_mocks(config_class=LLMTextColumnConfig, **config_kwar
67
68
  )
68
69
 
69
70
 
70
- def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output", reasoning=None):
71
+ def _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, output="test_output"):
71
72
  """Helper function to setup common generate method mocks."""
72
73
  mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
73
74
  mock_response_recipe.serialize_output.return_value = {"result": output}
74
- mock_model.generate.return_value = ({"result": output}, reasoning)
75
+ mock_model.generate.return_value = ({"result": output}, [])
75
76
 
76
77
 
77
- def test_generate_method():
78
+ def test_generate_method() -> None:
78
79
  generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
79
80
 
80
81
  # Test basic generation
@@ -87,16 +88,19 @@ def test_generate_method():
87
88
  assert mock_model.generate.call_args[1]["max_correction_steps"] == 2
88
89
  assert mock_model.generate.call_args[1]["max_conversation_restarts"] == 7
89
90
  assert result["test_column"] == {"result": "test_output"}
90
- assert "test_column" + REASONING_TRACE_COLUMN_POSTFIX not in result
91
+ assert "test_column" + TRACE_COLUMN_POSTFIX not in result
91
92
 
92
- # Test with reasoning trace
93
+ # Test with full trace enabled
93
94
  mock_model.reset_mock()
94
95
  mock_prompt_renderer.reset_mock()
95
- _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, reasoning="reasoning_trace")
96
+ generator.resource_provider.run_config.debug_override_save_all_column_traces = True
97
+ mock_prompt_renderer.render.side_effect = ["rendered_user_prompt", "rendered_system_prompt"]
98
+ mock_response_recipe.serialize_output.return_value = {"result": "test_output"}
99
+ mock_model.generate.return_value = ({"result": "test_output"}, [ChatMessage.as_user("x")])
96
100
  result = generator.generate(data)
97
101
 
98
102
  assert result["test_column"] == {"result": "test_output"}
99
- assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "reasoning_trace"
103
+ assert result["test_column" + TRACE_COLUMN_POSTFIX] == [{"role": "user", "content": "x"}]
100
104
 
101
105
  # Test multi-modal context is None
102
106
  call_args = mock_model.generate.call_args
@@ -235,7 +239,7 @@ def test_generate_with_errors(error_type, error_message):
235
239
 
236
240
  if error_type == "serialization":
237
241
  mock_response_recipe.serialize_output.side_effect = Exception(error_message)
238
- mock_model.generate.return_value = ({"result": "test_output"}, None)
242
+ mock_model.generate.return_value = ({"result": "test_output"}, [])
239
243
  elif error_type == "model":
240
244
  mock_model.generate.side_effect = Exception(error_message)
241
245
  elif error_type == "prompt_render":
@@ -249,13 +253,12 @@ def test_generate_with_errors(error_type, error_message):
249
253
 
250
254
  def test_generate_with_complex_data():
251
255
  generator, _, mock_model, _, _, mock_prompt_renderer, mock_response_recipe = _create_generator_with_mocks()
252
- _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output", "complex_reasoning")
256
+ _setup_generate_mocks(mock_prompt_renderer, mock_response_recipe, mock_model, "complex_output")
253
257
 
254
258
  data = {"input": "test_input", "nested": {"key": "value"}, "list": [1, 2, 3], "json_string": '{"key": "value"}'}
255
259
  result = generator.generate(data)
256
260
 
257
261
  assert result["test_column"] == {"result": "complex_output"}
258
- assert result["test_column" + REASONING_TRACE_COLUMN_POSTFIX] == "complex_reasoning"
259
262
  assert result["input"] == "test_input"
260
263
  assert result["nested"] == {"key": "value"}
261
264
  assert result["list"] == [1, 2, 3]
@@ -341,7 +344,7 @@ def test_generator_output_type_handling(
341
344
  mock_response_recipe.serialize_output.return_value = serialized_output
342
345
  stub_resource_provider.model_registry.get_model.return_value.generate.return_value = (
343
346
  {"result": "raw_output"},
344
- None,
347
+ [],
345
348
  )
346
349
 
347
350
  data = {"input": "test_input"}