data-designer-config 0.4.0rc2__py3-none-any.whl → 0.4.0rc3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0rc2'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc2')
31
+ __version__ = version = '0.4.0rc3'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -14,7 +14,7 @@ from data_designer.config.errors import InvalidConfigError
14
14
  from data_designer.config.models import ImageContext
15
15
  from data_designer.config.sampler_params import SamplerParamsT, SamplerType
16
16
  from data_designer.config.utils.code_lang import CodeLang
17
- from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
17
+ from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
18
18
  from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
19
19
  from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
20
20
 
@@ -143,8 +143,8 @@ class LLMTextColumnConfig(SingleColumnConfig):
143
143
 
144
144
  LLM text columns generate free-form text content using language models via LiteLLM.
145
145
  Prompts support Jinja2 templating to reference values from other columns, enabling
146
- context-aware generation. The generated text can optionally include reasoning traces
147
- when models support extended thinking.
146
+ context-aware generation. The generated text can optionally include message traces
147
+ capturing the full conversation history.
148
148
 
149
149
  Attributes:
150
150
  prompt: Prompt template for text generation. Supports Jinja2 syntax to
@@ -159,6 +159,10 @@ class LLMTextColumnConfig(SingleColumnConfig):
159
159
  `LLMStructuredColumnConfig` for structured output, `LLMCodeColumnConfig` for code.
160
160
  multi_modal_context: Optional list of image contexts for multi-modal generation.
161
161
  Enables vision-capable models to generate text based on image inputs.
162
+ with_trace: If True, creates a `{column_name}__trace` column containing the full
163
+ ordered message history (system/user/assistant) for the generation.
164
+ Can be overridden globally via `RunConfig.debug_override_save_all_column_traces`.
165
+ Defaults to False.
162
166
  column_type: Discriminator field, always "llm-text" for this configuration type.
163
167
  """
164
168
 
@@ -166,6 +170,7 @@ class LLMTextColumnConfig(SingleColumnConfig):
166
170
  model_alias: str
167
171
  system_prompt: str | None = None
168
172
  multi_modal_context: list[ImageContext] | None = None
173
+ with_trace: bool = False
169
174
  column_type: Literal["llm-text"] = "llm-text"
170
175
 
171
176
  @staticmethod
@@ -186,14 +191,15 @@ class LLMTextColumnConfig(SingleColumnConfig):
186
191
 
187
192
  @property
188
193
  def side_effect_columns(self) -> list[str]:
189
- """Returns the reasoning trace column, which may be generated alongside the main column.
194
+ """Returns the trace column, which may be generated alongside the main column.
190
195
 
191
- Reasoning traces are only returned if the served model parses and returns reasoning content.
196
+ Traces are generated when `with_trace=True` on the column config or
197
+ when `RunConfig.debug_override_save_all_column_traces=True` globally.
192
198
 
193
199
  Returns:
194
- List containing the reasoning trace column name.
200
+ List containing the trace column name.
195
201
  """
196
- return [f"{self.name}{REASONING_TRACE_COLUMN_POSTFIX}"]
202
+ return [f"{self.name}{TRACE_COLUMN_POSTFIX}"]
197
203
 
198
204
  @model_validator(mode="after")
199
205
  def assert_prompt_valid_jinja(self) -> Self:
@@ -33,6 +33,10 @@ class RunConfig(ConfigBase):
33
33
  max_conversation_correction_steps: Maximum number of correction rounds permitted within a
34
34
  single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
35
35
  Default is 0.
36
+ debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
37
+ and includes `__trace` columns for ALL LLM generations, containing the full ordered
38
+ message history (system/user/assistant) for the final generation attempt.
39
+ Useful for debugging. Default is False.
36
40
  """
37
41
 
38
42
  disable_early_shutdown: bool = False
@@ -42,6 +46,7 @@ class RunConfig(ConfigBase):
42
46
  non_inference_max_parallel_workers: int = Field(default=4, ge=1)
43
47
  max_conversation_restarts: int = Field(default=5, ge=0)
44
48
  max_conversation_correction_steps: int = Field(default=0, ge=0)
49
+ debug_override_save_all_column_traces: bool = False
45
50
 
46
51
  @model_validator(mode="after")
47
52
  def normalize_shutdown_settings(self) -> Self:
@@ -7,9 +7,14 @@ from enum import Enum
7
7
 
8
8
 
9
9
  class CodeLang(str, Enum):
10
+ BASH = "bash"
11
+ C = "c"
12
+ COBOL = "cobol"
13
+ CPP = "cpp"
14
+ CSHARP = "csharp"
10
15
  GO = "go"
11
- JAVASCRIPT = "javascript"
12
16
  JAVA = "java"
17
+ JAVASCRIPT = "javascript"
13
18
  KOTLIN = "kotlin"
14
19
  PYTHON = "python"
15
20
  RUBY = "ruby"
@@ -63,15 +68,21 @@ def code_lang_to_syntax_lexer(code_lang: CodeLang | str) -> str:
63
68
  Reference: https://pygments.org/docs/lexers/
64
69
  """
65
70
  code_lang_to_lexer = {
71
+ CodeLang.BASH: "bash",
72
+ CodeLang.C: "c",
73
+ CodeLang.COBOL: "cobol",
74
+ CodeLang.CPP: "cpp",
75
+ CodeLang.CSHARP: "csharp",
66
76
  CodeLang.GO: "golang",
67
- CodeLang.JAVASCRIPT: "javascript",
68
77
  CodeLang.JAVA: "java",
78
+ CodeLang.JAVASCRIPT: "javascript",
69
79
  CodeLang.KOTLIN: "kotlin",
70
80
  CodeLang.PYTHON: "python",
71
81
  CodeLang.RUBY: "ruby",
72
82
  CodeLang.RUST: "rust",
73
83
  CodeLang.SCALA: "scala",
74
84
  CodeLang.SWIFT: "swift",
85
+ CodeLang.TYPESCRIPT: "typescript",
75
86
  CodeLang.SQL_SQLITE: "sql",
76
87
  CodeLang.SQL_ANSI: "sql",
77
88
  CodeLang.SQL_TSQL: "tsql",
@@ -166,7 +166,7 @@ MIN_TEMPERATURE = 0.0
166
166
  MAX_TOP_P = 1.0
167
167
  MIN_TOP_P = 0.0
168
168
  MIN_MAX_TOKENS = 1
169
- REASONING_TRACE_COLUMN_POSTFIX = "__reasoning_trace"
169
+ TRACE_COLUMN_POSTFIX = "__trace"
170
170
 
171
171
  AVAILABLE_LOCALES = [
172
172
  "ar_AA",
data_designer/logging.py CHANGED
@@ -50,6 +50,14 @@ class LoggingConfig:
50
50
  class RandomEmoji:
51
51
  """A generator for various themed emoji collections."""
52
52
 
53
+ def __init__(self) -> None:
54
+ self._progress_style = random.choice(_PROGRESS_STYLES)
55
+
56
+ def progress(self, percent: float) -> str:
57
+ """Get a progress emoji based on completion percentage (0-100)."""
58
+ phase_idx = min(int(percent / 25), len(self._progress_style) - 1)
59
+ return self._progress_style[phase_idx]
60
+
53
61
  @staticmethod
54
62
  def cooking() -> str:
55
63
  """Get a random cooking or food preparation emoji."""
@@ -163,3 +171,10 @@ def _make_stream_formatter() -> logging.Formatter:
163
171
 
164
172
 
165
173
  _DEFAULT_NOISY_LOGGERS = ["httpx", "matplotlib"]
174
+
175
+
176
+ _PROGRESS_STYLES: list[list[str]] = [
177
+ ["🌑", "🌘", "🌗", "🌖", "🌕"], # Moon phases
178
+ ["🌧️", "🌦️", "⛅", "🌤️", "☀️"], # Weather (storm to sun)
179
+ ["🥚", "🐣", "🐥", "🐤", "🐔"], # Hatching (egg to chicken)
180
+ ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-config
3
- Version: 0.4.0rc2
3
+ Version: 0.4.0rc3
4
4
  Summary: Configuration layer for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -1,11 +1,11 @@
1
1
  data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
2
2
  data_designer/lazy_heavy_imports.py,sha256=5X04vUj9TYbKgfDmY2qvhzRf5-IZWKOanIpi3_u7fmM,1662
3
- data_designer/logging.py,sha256=_x-tDj34ClrgSnU57Dh0mZdDxrnA73vgs09KooUvbEA,5444
3
+ data_designer/logging.py,sha256=Xq2cRwxmDJ-r8_s9NWnk15efLRsrKm5iVScHy6HkjiE,6044
4
4
  data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
5
5
  data_designer/config/__init__.py,sha256=MWzRZhXA41sTpc0sL_xq2baA3kSlV37alT6g8RlP8dU,4919
6
- data_designer/config/_version.py,sha256=FvItxCBzPigrdVpFPfL1gQeV1-km5r7nCNGUzrYebTU,714
6
+ data_designer/config/_version.py,sha256=DTeqqEr2nu3d0J3qZZfx7FpCHf1Ai3akZDbCReLKmgY,714
7
7
  data_designer/config/base.py,sha256=IGj6sy_GnKzC94uu2rdxe12EqR_AmGJ6O3rl2MxOv6g,2449
8
- data_designer/config/column_configs.py,sha256=JBYIeqmIiwdZzA-NXTw4qghs2Y30RZPQ_-koqPVcQ9g,20384
8
+ data_designer/config/column_configs.py,sha256=QEHXbxljbGEfOEnzNsiR3_CRpaCukQsayBbHQyhMhbc,20720
9
9
  data_designer/config/column_types.py,sha256=xGXuu0EBy3Y5Jd74f2VM6x5jHq72GmK9leA6qOnAz8c,5423
10
10
  data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLBAW1AmhajI,24707
11
11
  data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
@@ -17,7 +17,7 @@ data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75Iqdp
17
17
  data_designer/config/models.py,sha256=_NctRk4brgBeb5q5V7r_hXE5OORlLh6SCVZP0eu2LGo,16721
18
18
  data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
19
19
  data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
20
- data_designer/config/run_config.py,sha256=oJ163DpHXu9PzST5Hn9px-bIP9DYjIkCO7UGB93J7bI,2663
20
+ data_designer/config/run_config.py,sha256=m_rrqEmNHR533AYJ_OR5yq0a9Pegy9vPGZgyfD4x9cI,3052
21
21
  data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
22
22
  data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
23
23
  data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
@@ -32,8 +32,8 @@ data_designer/config/analysis/utils/errors.py,sha256=pvmdQ_YuIlWW4NFw-cX_rOoQf-G
32
32
  data_designer/config/analysis/utils/reporting.py,sha256=teTzd1OHtpI4vbIinGOGsKXyNldO3F5eqbNdAztF0_s,7066
33
33
  data_designer/config/testing/__init__.py,sha256=vxFrIOqDoDfOx-MWjC5lb_hvmB4kRKvh1QdTv--QYFM,222
34
34
  data_designer/config/testing/fixtures.py,sha256=J1bcWjerAIoVUIZBVPbUcuvEa2laj_kspVcLS7UZMbo,10876
35
- data_designer/config/utils/code_lang.py,sha256=EqMJh1GL5ysUZIoyqx_6vmqenUKHm4J-RQtKXiA4EPg,2354
36
- data_designer/config/utils/constants.py,sha256=eqDQ57b8B0v5qRSO0He45LEjSxtfxlsPtHRvBu1xkw0,8973
35
+ data_designer/config/utils/code_lang.py,sha256=nUeWjuzSYBVF5gwOiUE2-EsYCEDzRZaw31RIivt7GPI,2638
36
+ data_designer/config/utils/constants.py,sha256=lprfeF_bIzGJ_oGrZBhvHEbLVgrGfFtVbCdWJHf_6B8,8953
37
37
  data_designer/config/utils/errors.py,sha256=HCjer0YrF0bMn5j8gmgWaLb0395LAr_hxMD1ftOsOc8,520
38
38
  data_designer/config/utils/info.py,sha256=yOa4U8kI_CY4OfCKZxCm2okU8klAiThvyjKM5tG-F0A,3469
39
39
  data_designer/config/utils/io_helpers.py,sha256=kzvOR7QgqijkqU-O2enIlpCWwHvzc3oRaEl4Lsjh1Do,8466
@@ -45,6 +45,6 @@ data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8t
45
45
  data_designer/plugins/errors.py,sha256=d7FMed3ueQvZHwuhwyPLzF4E34bO1mdj3aBVEw6p34o,386
46
46
  data_designer/plugins/plugin.py,sha256=TVyyOaQBWAt0FQwUmtihTZ9MDJD85HwggrQ3L9CviPQ,5367
47
47
  data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQRE_dY,3053
48
- data_designer_config-0.4.0rc2.dist-info/METADATA,sha256=sEtB9CR6HT9TCz7nptAitc_pH9N62tDBddnEFMw9MYM,2286
49
- data_designer_config-0.4.0rc2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
- data_designer_config-0.4.0rc2.dist-info/RECORD,,
48
+ data_designer_config-0.4.0rc3.dist-info/METADATA,sha256=7FQwABHHNXZOEomFztSQi4heVz8ioAouK_vIhreW0Tw,2286
49
+ data_designer_config-0.4.0rc3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
+ data_designer_config-0.4.0rc3.dist-info/RECORD,,