data-designer-engine 0.4.0rc1__tar.gz → 0.4.0rc2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/PKG-INFO +1 -1
  2. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/_version.py +2 -2
  3. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/llm_completion.py +3 -3
  4. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/registry.py +5 -0
  5. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/telemetry.py +8 -5
  6. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/schema_transform.py +27 -5
  7. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_registry.py +48 -0
  8. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_utils.py +2 -2
  9. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_schema_transform.py +60 -2
  10. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/.gitignore +0 -0
  11. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/README.md +0 -0
  12. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/pyproject.toml +0 -0
  13. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/__init__.py +0 -0
  14. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/base.py +0 -0
  15. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/judge_score_profiler.py +0 -0
  16. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_profilers/registry.py +0 -0
  17. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/column_statistics.py +0 -0
  18. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/dataset_profiler.py +0 -0
  19. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/errors.py +0 -0
  20. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/column_statistics_calculations.py +0 -0
  21. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/analysis/utils/judge_score_processing.py +0 -0
  22. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/__init__.py +0 -0
  23. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/__init__.py +0 -0
  24. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/base.py +0 -0
  25. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/embedding.py +0 -0
  26. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/expression.py +0 -0
  27. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/samplers.py +0 -0
  28. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/seed_dataset.py +0 -0
  29. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/generators/validation.py +0 -0
  30. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/registry.py +0 -0
  31. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/errors.py +0 -0
  32. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/generator_classification.py +0 -0
  33. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/judge_score_factory.py +0 -0
  34. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/column_generators/utils/prompt_renderer.py +0 -0
  35. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/compiler.py +0 -0
  36. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/configurable_task.py +0 -0
  37. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/artifact_storage.py +0 -0
  38. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/column_wise_builder.py +0 -0
  39. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/errors.py +0 -0
  40. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/multi_column_configs.py +0 -0
  41. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/__init__.py +0 -0
  42. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/concurrency.py +0 -0
  43. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/config_compiler.py +0 -0
  44. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dag.py +0 -0
  45. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/dataset_batch_manager.py +0 -0
  46. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/dataset_builders/utils/errors.py +0 -0
  47. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/errors.py +0 -0
  48. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/model_provider.py +0 -0
  49. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/__init__.py +0 -0
  50. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/errors.py +0 -0
  51. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/facade.py +0 -0
  52. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/factory.py +0 -0
  53. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/litellm_overrides.py +0 -0
  54. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/__init__.py +0 -0
  55. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/errors.py +0 -0
  56. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/parser.py +0 -0
  57. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/postprocessors.py +0 -0
  58. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/tag_parsers.py +0 -0
  59. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/parsers/types.py +0 -0
  60. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/base.py +0 -0
  61. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/recipes/response_recipes.py +0 -0
  62. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/usage.py +0 -0
  63. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/models/utils.py +1 -1
  64. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/__init__.py +0 -0
  65. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/ast.py +0 -0
  66. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/environment.py +0 -0
  67. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/exceptions.py +0 -0
  68. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/ginja/record.py +0 -0
  69. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/__init__.py +0 -0
  70. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/exceptions.py +0 -0
  71. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/schema_transformers.py +0 -0
  72. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/types.py +0 -0
  73. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/gsonschema/validators.py +0 -0
  74. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/base.py +0 -0
  75. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/drop_columns.py +0 -0
  76. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/processors/registry.py +0 -0
  77. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/processing/utils.py +0 -0
  78. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/base.py +0 -0
  79. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/data_designer_registry.py +0 -0
  80. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/registry/errors.py +0 -0
  81. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_generator.py +0 -0
  82. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_dataset_repository.py +0 -0
  83. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/managed_storage.py +0 -0
  84. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/resource_provider.py +0 -0
  85. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/resources/seed_reader.py +0 -0
  86. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/column.py +0 -0
  87. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/constraints.py +0 -0
  88. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/base.py +0 -0
  89. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/errors.py +0 -0
  90. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/data_sources/sources.py +0 -0
  91. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/__init__.py +0 -0
  92. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/assets/zip_area_code_map.parquet +0 -0
  93. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/dataset_based_person_fields.py +0 -0
  94. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/email_address_utils.py +0 -0
  95. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/errors.py +0 -0
  96. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/national_id_utils.py +0 -0
  97. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/person.py +0 -0
  98. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/entities/phone_number.py +0 -0
  99. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/errors.py +0 -0
  100. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/generator.py +0 -0
  101. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/jinja_utils.py +0 -0
  102. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/people_gen.py +0 -0
  103. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/person_constants.py +0 -0
  104. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema.py +0 -0
  105. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/schema_builder.py +0 -0
  106. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/sampling_gen/utils.py +0 -0
  107. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/secret_resolver.py +0 -0
  108. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/__init__.py +0 -0
  109. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/stubs.py +0 -0
  110. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/testing/utils.py +0 -0
  111. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validation.py +0 -0
  112. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/__init__.py +0 -0
  113. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/base.py +0 -0
  114. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/local_callable.py +0 -0
  115. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/python.py +0 -0
  116. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/remote.py +0 -0
  117. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/src/data_designer/engine/validators/sql.py +0 -0
  118. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/conftest.py +0 -0
  119. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_base.py +0 -0
  120. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/column_profilers/test_judge_score_profiler.py +0 -0
  121. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/conftest.py +0 -0
  122. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_column_statistics_calculator.py +0 -0
  123. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/column_configs.json +0 -0
  124. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/dataset.json +0 -0
  125. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_data/artifacts/dataset/metadata.json +0 -0
  126. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_dataset_profiler.py +0 -0
  127. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/test_errors.py +0 -0
  128. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_column_statistics_calculations.py +0 -0
  129. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/analysis/utils/test_judge_score_processing.py +0 -0
  130. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/__init__.py +0 -0
  131. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_column_generator_base.py +0 -0
  132. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_embedding.py +0 -0
  133. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_expression.py +0 -0
  134. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_llm_completion_generators.py +0 -0
  135. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_samplers.py +0 -0
  136. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_seed_dataset.py +0 -0
  137. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/generators/test_validation.py +0 -0
  138. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/test_registry.py +0 -0
  139. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_column_generator_errors.py +0 -0
  140. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_generator_classification.py +0 -0
  141. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_judge_score_factory.py +0 -0
  142. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/column_generators/utils/test_prompt_renderer.py +0 -0
  143. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/conftest.py +0 -0
  144. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_artifact_storage.py +0 -0
  145. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_column_wise_builder.py +0 -0
  146. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/test_multi_column_configs.py +0 -0
  147. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_concurrency.py +0 -0
  148. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_config_compiler.py +0 -0
  149. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dag.py +0 -0
  150. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/dataset_builders/utils/test_dataset_batch_manager.py +0 -0
  151. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/conftest.py +0 -0
  152. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parser.py +0 -0
  153. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_parsers_types.py +0 -0
  154. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_postprocessors.py +0 -0
  155. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/parsers/test_tag_parsers.py +0 -0
  156. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_recipe_base.py +0 -0
  157. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/recipes/test_response_recipes.py +0 -0
  158. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/stub_secrets.json +0 -0
  159. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_facade.py +0 -0
  160. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_litellm_overrides.py +0 -0
  161. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_model_errors.py +0 -0
  162. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/models/test_usage.py +0 -0
  163. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/__init__.py +0 -0
  164. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/__init__.py +0 -0
  165. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_ast.py +0 -0
  166. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_environment.py +0 -0
  167. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_exceptions.py +0 -0
  168. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/ginja/test_record.py +0 -0
  169. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/__init__.py +0 -0
  170. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_exceptions.py +0 -0
  171. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_schema_transformers.py +0 -0
  172. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_types.py +0 -0
  173. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/gsonschema/test_validators.py +0 -0
  174. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/__init__.py +0 -0
  175. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_drop_columns.py +0 -0
  176. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/processors/test_registry.py +0 -0
  177. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/processing/test_utils.py +0 -0
  178. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/registry/__init__.py +0 -0
  179. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/registry/conftest.py +0 -0
  180. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_base.py +0 -0
  181. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_data_designer_registry.py +0 -0
  182. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/registry/test_errors.py +0 -0
  183. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/__init__.py +0 -0
  184. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/conftest.py +0 -0
  185. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_generator.py +0 -0
  186. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_dataset_repository.py +0 -0
  187. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_managed_storage.py +0 -0
  188. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_resource_provider.py +0 -0
  189. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/resources/test_seed_reader.py +0 -0
  190. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/conftest.py +0 -0
  191. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sampler_errors.py +0 -0
  192. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/data_sources/test_sources.py +0 -0
  193. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_email_address_utils.py +0 -0
  194. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_national_id_utils.py +0 -0
  195. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_person.py +0 -0
  196. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/entities/test_phone_number.py +0 -0
  197. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_column.py +0 -0
  198. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_constraints.py +0 -0
  199. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_generator.py +0 -0
  200. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_jinja_utils.py +0 -0
  201. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_people_gen.py +0 -0
  202. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_schema.py +0 -0
  203. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/sampling_gen/test_utils.py +0 -0
  204. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_compiler.py +0 -0
  205. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_configurable_task.py +0 -0
  206. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_dataset_metadata.py +0 -0
  207. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_engine_errors.py +0 -0
  208. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_model_provider.py +0 -0
  209. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_secret_resolver.py +0 -0
  210. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/test_validation.py +0 -0
  211. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_local_callable.py +0 -0
  212. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_python.py +0 -0
  213. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_remote.py +0 -0
  214. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/engine/validators/test_sql.py +0 -0
  215. {data_designer_engine-0.4.0rc1 → data_designer_engine-0.4.0rc2}/tests/test_plugin_manager.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-engine
3
- Version: 0.4.0rc1
3
+ Version: 0.4.0rc2
4
4
  Summary: Generation engine for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0rc1'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc1')
31
+ __version__ = version = '0.4.0rc2'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -62,9 +62,9 @@ class ColumnGeneratorWithModelChatCompletion(ColumnGeneratorWithModel[TaskConfig
62
62
 
63
63
  multi_modal_context = None
64
64
  if self.config.multi_modal_context is not None and len(self.config.multi_modal_context) > 0:
65
- multi_modal_context = [
66
- context.get_context(deserialized_record) for context in self.config.multi_modal_context
67
- ]
65
+ multi_modal_context = []
66
+ for context in self.config.multi_modal_context:
67
+ multi_modal_context.extend(context.get_contexts(deserialized_record))
68
68
 
69
69
  response, reasoning_trace = self.model.generate(
70
70
  prompt=self.prompt_renderer.render(
@@ -107,6 +107,11 @@ class ModelRegistry:
107
107
  def run_health_check(self, model_aliases: list[str]) -> None:
108
108
  logger.info("🩺 Running health checks for models...")
109
109
  for model_alias in model_aliases:
110
+ model_config = self.get_model_config(model_alias=model_alias)
111
+ if model_config.skip_health_check:
112
+ logger.info(f" |-- ⏭️ Skipping health check for model alias {model_alias!r} (skip_health_check=True)")
113
+ continue
114
+
110
115
  model = self.get_model(model_alias=model_alias)
111
116
  logger.info(
112
117
  f" |-- 👀 Checking {model.model_name!r} in provider named {model.model_provider_name!r} for model alias {model.model_alias!r}..."
@@ -8,6 +8,7 @@ Environment variables:
8
8
  - NEMO_TELEMETRY_ENABLED: Whether telemetry is enabled.
9
9
  - NEMO_DEPLOYMENT_TYPE: The deployment type the event came from.
10
10
  - NEMO_TELEMETRY_ENDPOINT: The endpoint to send the telemetry events to.
11
+ - NEMO_SESSION_PREFIX: Optional prefix to add to session IDs.
11
12
  """
12
13
 
13
14
  from __future__ import annotations
@@ -18,15 +19,12 @@ import platform
18
19
  from dataclasses import dataclass
19
20
  from datetime import datetime, timezone
20
21
  from enum import Enum
21
- from typing import TYPE_CHECKING, Any, ClassVar
22
+ from typing import Any, ClassVar
22
23
 
23
24
  from pydantic import BaseModel, Field
24
25
 
25
26
  from data_designer.lazy_heavy_imports import httpx
26
27
 
27
- if TYPE_CHECKING:
28
- import httpx
29
-
30
28
  TELEMETRY_ENABLED = os.getenv("NEMO_TELEMETRY_ENABLED", "true").lower() in ("1", "true", "yes")
31
29
  CLIENT_ID = "184482118588404"
32
30
  NEMO_TELEMETRY_VERSION = "nemo-telemetry/1.0"
@@ -35,6 +33,7 @@ NEMO_TELEMETRY_ENDPOINT = os.getenv(
35
33
  "NEMO_TELEMETRY_ENDPOINT", "https://events.telemetry.data.nvidia.com/v1.1/events/json"
36
34
  ).lower()
37
35
  CPU_ARCHITECTURE = platform.uname().machine
36
+ SESSION_PREFIX = os.getenv("NEMO_SESSION_PREFIX")
38
37
 
39
38
 
40
39
  class NemoSourceEnum(str, Enum):
@@ -231,7 +230,11 @@ class TelemetryHandler:
231
230
  self._timer_task: asyncio.Task | None = None
232
231
  self._running = False
233
232
  self._source_client_version = source_client_version
234
- self._session_id = session_id
233
+ # Apply session prefix if environment variable is set
234
+ if SESSION_PREFIX:
235
+ self._session_id = f"{SESSION_PREFIX}{session_id}"
236
+ else:
237
+ self._session_id = session_id
235
238
 
236
239
  async def astart(self) -> None:
237
240
  if self._running:
@@ -5,7 +5,7 @@ from __future__ import annotations
5
5
 
6
6
  import json
7
7
  import logging
8
- from typing import TYPE_CHECKING
8
+ from typing import TYPE_CHECKING, Any
9
9
 
10
10
  from data_designer.config.processors import SchemaTransformProcessorConfig
11
11
  from data_designer.engine.dataset_builders.artifact_storage import BatchStage
@@ -20,6 +20,26 @@ if TYPE_CHECKING:
20
20
  logger = logging.getLogger(__name__)
21
21
 
22
22
 
23
+ def _json_escape_record(record: dict[str, Any]) -> dict[str, Any]:
24
+ """Escape record values for safe insertion into a JSON template."""
25
+
26
+ def escape_for_json_string(s: str) -> str:
27
+ """Use json.dumps to escape, then strip the surrounding quotes."""
28
+ return json.dumps(s)[1:-1]
29
+
30
+ escaped = {}
31
+ for key, value in record.items():
32
+ if isinstance(value, str):
33
+ escaped[key] = escape_for_json_string(value)
34
+ elif isinstance(value, (dict, list)):
35
+ escaped[key] = escape_for_json_string(json.dumps(value))
36
+ elif value is None:
37
+ escaped[key] = "null"
38
+ else:
39
+ escaped[key] = str(value)
40
+ return escaped
41
+
42
+
23
43
  class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[SchemaTransformProcessorConfig]):
24
44
  @property
25
45
  def template_as_str(self) -> str:
@@ -27,10 +47,12 @@ class SchemaTransformProcessor(WithJinja2UserTemplateRendering, Processor[Schema
27
47
 
28
48
  def process(self, data: pd.DataFrame, *, current_batch_number: int | None = None) -> pd.DataFrame:
29
49
  self.prepare_jinja2_template_renderer(self.template_as_str, data.columns.to_list())
30
- formatted_records = [
31
- json.loads(self.render_template(deserialize_json_values(record)).replace("\n", "\\n"))
32
- for record in data.to_dict(orient="records")
33
- ]
50
+ formatted_records = []
51
+ for record in data.to_dict(orient="records"):
52
+ deserialized = deserialize_json_values(record)
53
+ escaped = _json_escape_record(deserialized)
54
+ rendered = self.render_template(escaped)
55
+ formatted_records.append(json.loads(rendered))
34
56
  formatted_data = pd.DataFrame(formatted_records)
35
57
  if current_batch_number is not None:
36
58
  self.artifact_storage.write_batch_to_parquet_file(
@@ -313,6 +313,54 @@ def test_run_health_check_embedding_authentication_error(
313
313
  mock_generate_text_embeddings.assert_called_once()
314
314
 
315
315
 
316
+ @patch("data_designer.engine.models.facade.ModelFacade.completion", autospec=True)
317
+ def test_run_health_check_skip_health_check_flag(
318
+ mock_completion,
319
+ stub_secrets_resolver,
320
+ stub_model_provider_registry,
321
+ ):
322
+ # Create model configs: one with skip_health_check=True, others with default (False)
323
+ model_configs = [
324
+ ModelConfig(
325
+ alias="skip-model",
326
+ model="skip-model-id",
327
+ provider="stub-model-provider",
328
+ inference_parameters=ChatCompletionInferenceParams(),
329
+ skip_health_check=True,
330
+ ),
331
+ ModelConfig(
332
+ alias="check-model",
333
+ model="check-model-id",
334
+ provider="stub-model-provider",
335
+ inference_parameters=ChatCompletionInferenceParams(),
336
+ skip_health_check=False,
337
+ ),
338
+ ModelConfig(
339
+ alias="default-model",
340
+ model="default-model-id",
341
+ provider="stub-model-provider",
342
+ inference_parameters=ChatCompletionInferenceParams(),
343
+ ),
344
+ ]
345
+
346
+ # Create a fresh model registry with the test configs
347
+ model_registry = create_model_registry(
348
+ model_configs=model_configs,
349
+ secret_resolver=stub_secrets_resolver,
350
+ model_provider_registry=stub_model_provider_registry,
351
+ )
352
+
353
+ model_aliases = ["skip-model", "check-model", "default-model"]
354
+ model_registry.run_health_check(model_aliases)
355
+
356
+ # Only check-model and default-model should be checked (skip-model is skipped)
357
+ assert mock_completion.call_count == 2 # check-model and default-model
358
+
359
+ # Verify the correct models were called
360
+ called_model_aliases = {call[0][0].model_alias for call in mock_completion.call_args_list}
361
+ assert called_model_aliases == {"check-model", "default-model"}
362
+
363
+
316
364
  @pytest.mark.parametrize(
317
365
  "alias,expected_result,expected_error",
318
366
  [
@@ -26,11 +26,11 @@ def test_prompt_to_messages():
26
26
  {"content": "hello", "role": "user"},
27
27
  ]
28
28
  assert prompt_to_messages(user_prompt="hello", multi_modal_context=[mult_modal_context]) == [
29
- {"content": [{"type": "text", "text": "hello"}, mult_modal_context], "role": "user"}
29
+ {"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"}
30
30
  ]
31
31
  assert prompt_to_messages(
32
32
  user_prompt="hello", system_prompt=stub_system_prompt, multi_modal_context=[mult_modal_context]
33
33
  ) == [
34
34
  {"content": stub_system_prompt, "role": "system"},
35
- {"content": [{"type": "text", "text": "hello"}, mult_modal_context], "role": "user"},
35
+ {"content": [mult_modal_context, {"type": "text", "text": "hello"}], "role": "user"},
36
36
  ]
@@ -129,7 +129,65 @@ def test_process_with_json_serialized_values(stub_processor: SchemaTransformProc
129
129
  assert written_dataframe is not None
130
130
  assert len(written_dataframe) == 2
131
131
 
132
- # Verify that nested JSON values are properly deserialized in template rendering
132
+ # Verify that nested JSON values are properly serialized as JSON strings in template rendering
133
133
  first_output = written_dataframe.iloc[0].to_dict()
134
134
  assert first_output["text"] == "hello"
135
- assert first_output["value"] == "{'nested': 'value1'}"
135
+ # Nested JSON should be properly serialized as JSON string (not Python repr)
136
+ assert first_output["value"] == '{"nested": "value1"}'
137
+
138
+
139
+ def test_process_with_special_characters_in_llm_output(stub_processor: SchemaTransformProcessor) -> None:
140
+ """Test that LLM outputs with special characters are properly escaped for JSON.
141
+
142
+ This addresses GitHub issue #227 where SchemaTransformProcessor fails with JSONDecodeError
143
+ when LLM-generated content contains quotes, backslashes, or newlines.
144
+ """
145
+ df_with_special_chars = pd.DataFrame(
146
+ {
147
+ "col1": [
148
+ 'He said "Hello"',
149
+ "Line1\nLine2",
150
+ "Path: C:\\Users\\test",
151
+ "Tab\there",
152
+ ],
153
+ "col2": [1, 2, 3, 4],
154
+ }
155
+ )
156
+
157
+ # Process should not raise JSONDecodeError
158
+ stub_processor.process(df_with_special_chars, current_batch_number=0)
159
+ written_dataframe: pd.DataFrame = stub_processor.artifact_storage.write_batch_to_parquet_file.call_args.kwargs[
160
+ "dataframe"
161
+ ]
162
+
163
+ # Verify all rows were processed successfully
164
+ assert written_dataframe is not None
165
+ assert len(written_dataframe) == 4
166
+
167
+ # Verify the special characters are preserved in the output
168
+ outputs = written_dataframe.to_dict(orient="records")
169
+ assert outputs[0]["text"] == 'He said "Hello"'
170
+ assert outputs[1]["text"] == "Line1\nLine2"
171
+ assert outputs[2]["text"] == "Path: C:\\Users\\test"
172
+ assert outputs[3]["text"] == "Tab\there"
173
+
174
+
175
+ def test_process_with_mixed_special_characters(stub_processor: SchemaTransformProcessor) -> None:
176
+ """Test complex LLM output with multiple types of special characters."""
177
+ df_complex = pd.DataFrame(
178
+ {
179
+ "col1": [
180
+ 'She replied: "I\'m not sure about that\\nLet me think..."',
181
+ ],
182
+ "col2": [42],
183
+ }
184
+ )
185
+
186
+ stub_processor.process(df_complex, current_batch_number=0)
187
+ written_dataframe: pd.DataFrame = stub_processor.artifact_storage.write_batch_to_parquet_file.call_args.kwargs[
188
+ "dataframe"
189
+ ]
190
+
191
+ assert len(written_dataframe) == 1
192
+ output = written_dataframe.iloc[0].to_dict()
193
+ assert output["text"] == 'She replied: "I\'m not sure about that\\nLet me think..."'
@@ -21,9 +21,9 @@ def prompt_to_messages(
21
21
  user_content = user_prompt
22
22
  if multi_modal_context and len(multi_modal_context) > 0:
23
23
  user_content = []
24
- user_content.append({"type": "text", "text": user_prompt})
25
24
  for context in multi_modal_context:
26
25
  user_content.append(context)
26
+ user_content.append({"type": "text", "text": user_prompt})
27
27
  return (
28
28
  [
29
29
  str_to_message(content=system_prompt, role="system"),