data-designer-config 0.4.0rc2__tar.gz → 0.4.0rc3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/PKG-INFO +1 -1
  2. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/_version.py +2 -2
  3. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/column_configs.py +13 -7
  4. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/run_config.py +5 -0
  5. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/code_lang.py +13 -2
  6. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/constants.py +1 -1
  7. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/logging.py +15 -0
  8. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_columns.py +1 -1
  9. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_code_lang.py +1 -1
  10. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/test_logging.py +51 -0
  11. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/.gitignore +0 -0
  12. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/README.md +0 -0
  13. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/pyproject.toml +0 -0
  14. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/__init__.py +0 -0
  15. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/__init__.py +0 -0
  16. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/column_profilers.py +0 -0
  17. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/column_statistics.py +0 -0
  18. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
  19. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/utils/errors.py +0 -0
  20. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/analysis/utils/reporting.py +0 -0
  21. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/base.py +0 -0
  22. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/column_types.py +0 -0
  23. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/config_builder.py +0 -0
  24. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/data_designer_config.py +0 -0
  25. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/dataset_builders.py +0 -0
  26. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/dataset_metadata.py +0 -0
  27. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/default_model_settings.py +0 -0
  28. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/errors.py +0 -0
  29. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/interface.py +0 -0
  30. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/models.py +0 -0
  31. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/preview_results.py +0 -0
  32. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/processors.py +0 -0
  33. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/sampler_constraints.py +0 -0
  34. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/sampler_params.py +0 -0
  35. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/seed.py +0 -0
  36. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/seed_source.py +0 -0
  37. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/seed_source_types.py +0 -0
  38. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/testing/__init__.py +0 -0
  39. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/testing/fixtures.py +0 -0
  40. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/errors.py +0 -0
  41. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/info.py +0 -0
  42. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/io_helpers.py +0 -0
  43. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/misc.py +0 -0
  44. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/numerical_helpers.py +0 -0
  45. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/type_helpers.py +0 -0
  46. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/utils/visualization.py +0 -0
  47. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/config/validator_params.py +0 -0
  48. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/errors.py +0 -0
  49. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/lazy_heavy_imports.py +0 -0
  50. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/plugin_manager.py +0 -0
  51. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/plugins/__init__.py +0 -0
  52. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/plugins/errors.py +0 -0
  53. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/plugins/plugin.py +0 -0
  54. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/src/data_designer/plugins/registry.py +0 -0
  55. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/analysis/conftest.py +0 -0
  56. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/analysis/test_column_statistics.py +0 -0
  57. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
  58. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/analysis/utils/test_reporting.py +0 -0
  59. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_config_builder.py +0 -0
  60. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_data_designer_config.py +0 -0
  61. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_default_model_settings.py +0 -0
  62. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_models.py +0 -0
  63. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_processors.py +0 -0
  64. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_sampler_constraints.py +0 -0
  65. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_sampler_params.py +0 -0
  66. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_seed.py +0 -0
  67. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_seed_source.py +0 -0
  68. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/test_validator_params.py +0 -0
  69. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/__init__.py +0 -0
  70. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_info.py +0 -0
  71. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_io_helpers.py +0 -0
  72. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_misc.py +0 -0
  73. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_type_helpers.py +0 -0
  74. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/config/utils/test_visualization.py +0 -0
  75. {data_designer_config-0.4.0rc2 → data_designer_config-0.4.0rc3}/tests/conftest.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-config
3
- Version: 0.4.0rc2
3
+ Version: 0.4.0rc3
4
4
  Summary: Configuration layer for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0rc2'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
31
+ __version__ = version = '0.4.0rc3'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -14,7 +14,7 @@ from data_designer.config.errors import InvalidConfigError
14
14
  from data_designer.config.models import ImageContext
15
15
  from data_designer.config.sampler_params import SamplerParamsT, SamplerType
16
16
  from data_designer.config.utils.code_lang import CodeLang
17
- from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
17
+ from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
18
18
  from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
19
19
  from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
20
20
 
@@ -143,8 +143,8 @@ class LLMTextColumnConfig(SingleColumnConfig):
143
143
 
144
144
  LLM text columns generate free-form text content using language models via LiteLLM.
145
145
  Prompts support Jinja2 templating to reference values from other columns, enabling
146
- context-aware generation. The generated text can optionally include reasoning traces
147
- when models support extended thinking.
146
+ context-aware generation. The generated text can optionally include message traces
147
+ capturing the full conversation history.
148
148
 
149
149
  Attributes:
150
150
  prompt: Prompt template for text generation. Supports Jinja2 syntax to
@@ -159,6 +159,10 @@ class LLMTextColumnConfig(SingleColumnConfig):
159
159
  `LLMStructuredColumnConfig` for structured output, `LLMCodeColumnConfig` for code.
160
160
  multi_modal_context: Optional list of image contexts for multi-modal generation.
161
161
  Enables vision-capable models to generate text based on image inputs.
162
+ with_trace: If True, creates a `{column_name}__trace` column containing the full
163
+ ordered message history (system/user/assistant) for the generation.
164
+ Can be overridden globally via `RunConfig.debug_override_save_all_column_traces`.
165
+ Defaults to False.
162
166
  column_type: Discriminator field, always "llm-text" for this configuration type.
163
167
  """
164
168
 
@@ -166,6 +170,7 @@ class LLMTextColumnConfig(SingleColumnConfig):
166
170
  model_alias: str
167
171
  system_prompt: str | None = None
168
172
  multi_modal_context: list[ImageContext] | None = None
173
+ with_trace: bool = False
169
174
  column_type: Literal["llm-text"] = "llm-text"
170
175
 
171
176
  @staticmethod
@@ -186,14 +191,15 @@ class LLMTextColumnConfig(SingleColumnConfig):
186
191
 
187
192
  @property
188
193
  def side_effect_columns(self) -> list[str]:
189
- """Returns the reasoning trace column, which may be generated alongside the main column.
194
+ """Returns the trace column, which may be generated alongside the main column.
190
195
 
191
- Reasoning traces are only returned if the served model parses and returns reasoning content.
196
+ Traces are generated when `with_trace=True` on the column config or
197
+ when `RunConfig.debug_override_save_all_column_traces=True` globally.
192
198
 
193
199
  Returns:
194
- List containing the reasoning trace column name.
200
+ List containing the trace column name.
195
201
  """
196
- return [f"{self.name}{REASONING_TRACE_COLUMN_POSTFIX}"]
202
+ return [f"{self.name}{TRACE_COLUMN_POSTFIX}"]
197
203
 
198
204
  @model_validator(mode="after")
199
205
  def assert_prompt_valid_jinja(self) -> Self:
@@ -33,6 +33,10 @@ class RunConfig(ConfigBase):
33
33
  max_conversation_correction_steps: Maximum number of correction rounds permitted within a
34
34
  single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
35
35
  Default is 0.
36
+ debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
37
+ and includes `__trace` columns for ALL LLM generations, containing the full ordered
38
+ message history (system/user/assistant) for the final generation attempt.
39
+ Useful for debugging. Default is False.
36
40
  """
37
41
 
38
42
  disable_early_shutdown: bool = False
@@ -42,6 +46,7 @@ class RunConfig(ConfigBase):
42
46
  non_inference_max_parallel_workers: int = Field(default=4, ge=1)
43
47
  max_conversation_restarts: int = Field(default=5, ge=0)
44
48
  max_conversation_correction_steps: int = Field(default=0, ge=0)
49
+ debug_override_save_all_column_traces: bool = False
45
50
 
46
51
  @model_validator(mode="after")
47
52
  def normalize_shutdown_settings(self) -> Self:
@@ -7,9 +7,14 @@ from enum import Enum
7
7
 
8
8
 
9
9
  class CodeLang(str, Enum):
10
+ BASH = "bash"
11
+ C = "c"
12
+ COBOL = "cobol"
13
+ CPP = "cpp"
14
+ CSHARP = "csharp"
10
15
  GO = "go"
11
- JAVASCRIPT = "javascript"
12
16
  JAVA = "java"
17
+ JAVASCRIPT = "javascript"
13
18
  KOTLIN = "kotlin"
14
19
  PYTHON = "python"
15
20
  RUBY = "ruby"
@@ -63,15 +68,21 @@ def code_lang_to_syntax_lexer(code_lang: CodeLang | str) -> str:
63
68
  Reference: https://pygments.org/docs/lexers/
64
69
  """
65
70
  code_lang_to_lexer = {
71
+ CodeLang.BASH: "bash",
72
+ CodeLang.C: "c",
73
+ CodeLang.COBOL: "cobol",
74
+ CodeLang.CPP: "cpp",
75
+ CodeLang.CSHARP: "csharp",
66
76
  CodeLang.GO: "golang",
67
- CodeLang.JAVASCRIPT: "javascript",
68
77
  CodeLang.JAVA: "java",
78
+ CodeLang.JAVASCRIPT: "javascript",
69
79
  CodeLang.KOTLIN: "kotlin",
70
80
  CodeLang.PYTHON: "python",
71
81
  CodeLang.RUBY: "ruby",
72
82
  CodeLang.RUST: "rust",
73
83
  CodeLang.SCALA: "scala",
74
84
  CodeLang.SWIFT: "swift",
85
+ CodeLang.TYPESCRIPT: "typescript",
75
86
  CodeLang.SQL_SQLITE: "sql",
76
87
  CodeLang.SQL_ANSI: "sql",
77
88
  CodeLang.SQL_TSQL: "tsql",
@@ -166,7 +166,7 @@ MIN_TEMPERATURE = 0.0
166
166
  MAX_TOP_P = 1.0
167
167
  MIN_TOP_P = 0.0
168
168
  MIN_MAX_TOKENS = 1
169
- REASONING_TRACE_COLUMN_POSTFIX = "__reasoning_trace"
169
+ TRACE_COLUMN_POSTFIX = "__trace"
170
170
 
171
171
  AVAILABLE_LOCALES = [
172
172
  "ar_AA",
@@ -50,6 +50,14 @@ class LoggingConfig:
50
50
  class RandomEmoji:
51
51
  """A generator for various themed emoji collections."""
52
52
 
53
+ def __init__(self) -> None:
54
+ self._progress_style = random.choice(_PROGRESS_STYLES)
55
+
56
+ def progress(self, percent: float) -> str:
57
+ """Get a progress emoji based on completion percentage (0-100)."""
58
+ phase_idx = min(int(percent / 25), len(self._progress_style) - 1)
59
+ return self._progress_style[phase_idx]
60
+
53
61
  @staticmethod
54
62
  def cooking() -> str:
55
63
  """Get a random cooking or food preparation emoji."""
@@ -163,3 +171,10 @@ def _make_stream_formatter() -> logging.Formatter:
163
171
 
164
172
 
165
173
  _DEFAULT_NOISY_LOGGERS = ["httpx", "matplotlib"]
174
+
175
+
176
+ _PROGRESS_STYLES: list[list[str]] = [
177
+ ["🌑", "🌘", "🌗", "🌖", "🌕"], # Moon phases
178
+ ["🌧️", "🌦️", "⛅", "🌤️", "☀️"], # Weather (storm to sun)
179
+ ["🥚", "🐣", "🐥", "🐤", "🐔"], # Hatching (egg to chicken)
180
+ ]
@@ -85,7 +85,7 @@ def test_llm_text_column_config():
85
85
  assert llm_text_column_config.system_prompt == stub_system_prompt
86
86
  assert llm_text_column_config.column_type == DataDesignerColumnType.LLM_TEXT
87
87
  assert set(llm_text_column_config.required_columns) == {"some_column", "some_other_column"}
88
- assert llm_text_column_config.side_effect_columns == ["test_llm_text__reasoning_trace"]
88
+ assert llm_text_column_config.side_effect_columns == ["test_llm_text__trace"]
89
89
 
90
90
  # invalid prompt
91
91
  with pytest.raises(
@@ -26,7 +26,7 @@ def test_parse_dialect():
26
26
 
27
27
 
28
28
  def test_supported_values():
29
- assert len(CodeLang.supported_values()) == 16
29
+ assert len(CodeLang.supported_values()) == 21
30
30
 
31
31
 
32
32
  def test_code_lang_to_syntax_lexer():
@@ -208,3 +208,54 @@ def test_random_emoji_randomness():
208
208
  emojis = [RandomEmoji.magic() for _ in range(100)]
209
209
  # If we get 100 samples, we should get at least 2 different emojis
210
210
  assert len(set(emojis)) > 1
211
+
212
+
213
+ def test_random_emoji_progress_returns_valid_emoji() -> None:
214
+ emoji_gen = RandomEmoji()
215
+ emoji = emoji_gen.progress(50.0)
216
+ assert emoji is not None
217
+ assert len(emoji) > 0
218
+
219
+
220
+ def test_random_emoji_progress_is_deterministic() -> None:
221
+ emoji_gen = RandomEmoji()
222
+ # Same percentage should always return the same emoji for a given instance
223
+ assert emoji_gen.progress(0.0) == emoji_gen.progress(0.0)
224
+ assert emoji_gen.progress(50.0) == emoji_gen.progress(50.0)
225
+ assert emoji_gen.progress(100.0) == emoji_gen.progress(100.0)
226
+
227
+
228
+ def test_random_emoji_progress_phases_are_distinct() -> None:
229
+ emoji_gen = RandomEmoji()
230
+ # Each 25% phase should return a different emoji
231
+ phase_emojis = [
232
+ emoji_gen.progress(0.0), # phase 0
233
+ emoji_gen.progress(25.0), # phase 1
234
+ emoji_gen.progress(50.0), # phase 2
235
+ emoji_gen.progress(75.0), # phase 3
236
+ emoji_gen.progress(100.0), # phase 4
237
+ ]
238
+ # All 5 phases should have distinct emojis
239
+ assert len(set(phase_emojis)) == 5
240
+
241
+
242
+ def test_random_emoji_progress_phase_boundaries() -> None:
243
+ emoji_gen = RandomEmoji()
244
+ # Values within the same phase should return the same emoji
245
+ assert emoji_gen.progress(0.0) == emoji_gen.progress(24.9)
246
+ assert emoji_gen.progress(25.0) == emoji_gen.progress(49.9)
247
+ assert emoji_gen.progress(50.0) == emoji_gen.progress(74.9)
248
+ assert emoji_gen.progress(75.0) == emoji_gen.progress(99.9)
249
+ # Phase transitions should return different emojis
250
+ assert emoji_gen.progress(24.9) != emoji_gen.progress(25.0)
251
+ assert emoji_gen.progress(49.9) != emoji_gen.progress(50.0)
252
+ assert emoji_gen.progress(74.9) != emoji_gen.progress(75.0)
253
+ assert emoji_gen.progress(99.9) != emoji_gen.progress(100.0)
254
+
255
+
256
+ def test_random_emoji_progress_clamps_over_100() -> None:
257
+ emoji_gen = RandomEmoji()
258
+ emoji_100 = emoji_gen.progress(100.0)
259
+ emoji_over = emoji_gen.progress(150.0)
260
+ # Both should return the same final emoji
261
+ assert emoji_100 == emoji_over