data-designer-config 0.4.0__py3-none-any.whl → 0.4.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
28
28
  commit_id: COMMIT_ID
29
29
  __commit_id__: COMMIT_ID
30
30
 
31
- __version__ = version = '0.4.0'
32
- __version_tuple__ = version_tuple = (0, 4, 0)
31
+ __version__ = version = '0.4.0rc1'
32
+ __version_tuple__ = version_tuple = (0, 4, 0, 'rc1')
33
33
 
34
34
  __commit_id__ = commit_id = None
@@ -14,7 +14,7 @@ from data_designer.config.errors import InvalidConfigError
14
14
  from data_designer.config.models import ImageContext
15
15
  from data_designer.config.sampler_params import SamplerParamsT, SamplerType
16
16
  from data_designer.config.utils.code_lang import CodeLang
17
- from data_designer.config.utils.constants import TRACE_COLUMN_POSTFIX
17
+ from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
18
18
  from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
19
19
  from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
20
20
 
@@ -143,8 +143,8 @@ class LLMTextColumnConfig(SingleColumnConfig):
143
143
 
144
144
  LLM text columns generate free-form text content using language models via LiteLLM.
145
145
  Prompts support Jinja2 templating to reference values from other columns, enabling
146
- context-aware generation. The generated text can optionally include message traces
147
- capturing the full conversation history.
146
+ context-aware generation. The generated text can optionally include reasoning traces
147
+ when models support extended thinking.
148
148
 
149
149
  Attributes:
150
150
  prompt: Prompt template for text generation. Supports Jinja2 syntax to
@@ -159,10 +159,6 @@ class LLMTextColumnConfig(SingleColumnConfig):
159
159
  `LLMStructuredColumnConfig` for structured output, `LLMCodeColumnConfig` for code.
160
160
  multi_modal_context: Optional list of image contexts for multi-modal generation.
161
161
  Enables vision-capable models to generate text based on image inputs.
162
- with_trace: If True, creates a `{column_name}__trace` column containing the full
163
- ordered message history (system/user/assistant) for the generation.
164
- Can be overridden globally via `RunConfig.debug_override_save_all_column_traces`.
165
- Defaults to False.
166
162
  column_type: Discriminator field, always "llm-text" for this configuration type.
167
163
  """
168
164
 
@@ -170,7 +166,6 @@ class LLMTextColumnConfig(SingleColumnConfig):
170
166
  model_alias: str
171
167
  system_prompt: str | None = None
172
168
  multi_modal_context: list[ImageContext] | None = None
173
- with_trace: bool = False
174
169
  column_type: Literal["llm-text"] = "llm-text"
175
170
 
176
171
  @staticmethod
@@ -191,15 +186,14 @@ class LLMTextColumnConfig(SingleColumnConfig):
191
186
 
192
187
  @property
193
188
  def side_effect_columns(self) -> list[str]:
194
- """Returns the trace column, which may be generated alongside the main column.
189
+ """Returns the reasoning trace column, which may be generated alongside the main column.
195
190
 
196
- Traces are generated when `with_trace=True` on the column config or
197
- when `RunConfig.debug_override_save_all_column_traces=True` globally.
191
+ Reasoning traces are only returned if the served model parses and returns reasoning content.
198
192
 
199
193
  Returns:
200
- List containing the trace column name.
194
+ List containing the reasoning trace column name.
201
195
  """
202
- return [f"{self.name}{TRACE_COLUMN_POSTFIX}"]
196
+ return [f"{self.name}{REASONING_TRACE_COLUMN_POSTFIX}"]
203
197
 
204
198
  @model_validator(mode="after")
205
199
  def assert_prompt_valid_jinja(self) -> Self:
@@ -3,7 +3,6 @@
3
3
 
4
4
  from __future__ import annotations
5
5
 
6
- import json
7
6
  import logging
8
7
  from abc import ABC, abstractmethod
9
8
  from enum import Enum
@@ -66,7 +65,7 @@ class ModalityContext(ABC, BaseModel):
66
65
  data_type: ModalityDataType
67
66
 
68
67
  @abstractmethod
69
- def get_contexts(self, record: dict) -> list[dict[str, Any]]: ...
68
+ def get_context(self, record: dict) -> dict[str, Any]: ...
70
69
 
71
70
 
72
71
  class ImageContext(ModalityContext):
@@ -82,53 +81,25 @@ class ImageContext(ModalityContext):
82
81
  modality: Modality = Modality.IMAGE
83
82
  image_format: ImageFormat | None = None
84
83
 
85
- def get_contexts(self, record: dict) -> list[dict[str, Any]]:
86
- """Get the contexts for the image modality.
84
+ def get_context(self, record: dict) -> dict[str, Any]:
85
+ """Get the context for the image modality.
87
86
 
88
87
  Args:
89
- record: The record containing the image data. The data can be:
90
- - A JSON serialized list of strings
91
- - A list of strings
92
- - A single string
88
+ record: The record containing the image data.
93
89
 
94
90
  Returns:
95
- A list of image contexts.
91
+ The context for the image modality.
96
92
  """
97
- raw_value = record[self.column_name]
98
-
99
- # Normalize to list of strings
100
- if isinstance(raw_value, str):
101
- # Try to parse as JSON first
102
- try:
103
- parsed_value = json.loads(raw_value)
104
- if isinstance(parsed_value, list):
105
- context_values = parsed_value
106
- else:
107
- context_values = [raw_value]
108
- except (json.JSONDecodeError, TypeError):
109
- context_values = [raw_value]
110
- elif isinstance(raw_value, list):
111
- context_values = raw_value
112
- elif hasattr(raw_value, "__iter__") and not isinstance(raw_value, (str, bytes, dict)):
113
- # Handle array-like objects (numpy arrays, pandas Series, etc.)
114
- context_values = list(raw_value)
93
+ context = dict(type="image_url")
94
+ context_value = record[self.column_name]
95
+ if self.data_type == ModalityDataType.URL:
96
+ context["image_url"] = context_value
115
97
  else:
116
- context_values = [raw_value]
117
-
118
- # Build context list
119
- contexts = []
120
- for context_value in context_values:
121
- context = dict(type="image_url")
122
- if self.data_type == ModalityDataType.URL:
123
- context["image_url"] = context_value
124
- else:
125
- context["image_url"] = {
126
- "url": f"data:image/{self.image_format.value};base64,{context_value}",
127
- "format": self.image_format.value,
128
- }
129
- contexts.append(context)
130
-
131
- return contexts
98
+ context["image_url"] = {
99
+ "url": f"data:image/{self.image_format.value};base64,{context_value}",
100
+ "format": self.image_format.value,
101
+ }
102
+ return context
132
103
 
133
104
  @model_validator(mode="after")
134
105
  def _validate_image_format(self) -> Self:
@@ -428,14 +399,12 @@ class ModelConfig(ConfigBase):
428
399
  inference_parameters: Inference parameters for the model (temperature, top_p, max_tokens, etc.).
429
400
  The generation_type is determined by the type of inference_parameters.
430
401
  provider: Optional model provider name if using custom providers.
431
- skip_health_check: Whether to skip the health check for this model. Defaults to False.
432
402
  """
433
403
 
434
404
  alias: str
435
405
  model: str
436
406
  inference_parameters: InferenceParamsT = Field(default_factory=ChatCompletionInferenceParams)
437
407
  provider: str | None = None
438
- skip_health_check: bool = False
439
408
 
440
409
  @property
441
410
  def generation_type(self) -> GenerationType:
@@ -33,10 +33,6 @@ class RunConfig(ConfigBase):
33
33
  max_conversation_correction_steps: Maximum number of correction rounds permitted within a
34
34
  single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
35
35
  Default is 0.
36
- debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
37
- and includes `__trace` columns for ALL LLM generations, containing the full ordered
38
- message history (system/user/assistant) for the final generation attempt.
39
- Useful for debugging. Default is False.
40
36
  """
41
37
 
42
38
  disable_early_shutdown: bool = False
@@ -46,7 +42,6 @@ class RunConfig(ConfigBase):
46
42
  non_inference_max_parallel_workers: int = Field(default=4, ge=1)
47
43
  max_conversation_restarts: int = Field(default=5, ge=0)
48
44
  max_conversation_correction_steps: int = Field(default=0, ge=0)
49
- debug_override_save_all_column_traces: bool = False
50
45
 
51
46
  @model_validator(mode="after")
52
47
  def normalize_shutdown_settings(self) -> Self:
@@ -7,14 +7,9 @@ from enum import Enum
7
7
 
8
8
 
9
9
  class CodeLang(str, Enum):
10
- BASH = "bash"
11
- C = "c"
12
- COBOL = "cobol"
13
- CPP = "cpp"
14
- CSHARP = "csharp"
15
10
  GO = "go"
16
- JAVA = "java"
17
11
  JAVASCRIPT = "javascript"
12
+ JAVA = "java"
18
13
  KOTLIN = "kotlin"
19
14
  PYTHON = "python"
20
15
  RUBY = "ruby"
@@ -68,21 +63,15 @@ def code_lang_to_syntax_lexer(code_lang: CodeLang | str) -> str:
68
63
  Reference: https://pygments.org/docs/lexers/
69
64
  """
70
65
  code_lang_to_lexer = {
71
- CodeLang.BASH: "bash",
72
- CodeLang.C: "c",
73
- CodeLang.COBOL: "cobol",
74
- CodeLang.CPP: "cpp",
75
- CodeLang.CSHARP: "csharp",
76
66
  CodeLang.GO: "golang",
77
- CodeLang.JAVA: "java",
78
67
  CodeLang.JAVASCRIPT: "javascript",
68
+ CodeLang.JAVA: "java",
79
69
  CodeLang.KOTLIN: "kotlin",
80
70
  CodeLang.PYTHON: "python",
81
71
  CodeLang.RUBY: "ruby",
82
72
  CodeLang.RUST: "rust",
83
73
  CodeLang.SCALA: "scala",
84
74
  CodeLang.SWIFT: "swift",
85
- CodeLang.TYPESCRIPT: "typescript",
86
75
  CodeLang.SQL_SQLITE: "sql",
87
76
  CodeLang.SQL_ANSI: "sql",
88
77
  CodeLang.SQL_TSQL: "tsql",
@@ -166,7 +166,7 @@ MIN_TEMPERATURE = 0.0
166
166
  MAX_TOP_P = 1.0
167
167
  MIN_TOP_P = 0.0
168
168
  MIN_MAX_TOKENS = 1
169
- TRACE_COLUMN_POSTFIX = "__trace"
169
+ REASONING_TRACE_COLUMN_POSTFIX = "__reasoning_trace"
170
170
 
171
171
  AVAILABLE_LOCALES = [
172
172
  "ar_AA",
data_designer/logging.py CHANGED
@@ -50,14 +50,6 @@ class LoggingConfig:
50
50
  class RandomEmoji:
51
51
  """A generator for various themed emoji collections."""
52
52
 
53
- def __init__(self) -> None:
54
- self._progress_style = random.choice(_PROGRESS_STYLES)
55
-
56
- def progress(self, percent: float) -> str:
57
- """Get a progress emoji based on completion percentage (0-100)."""
58
- phase_idx = min(int(percent / 25), len(self._progress_style) - 1)
59
- return self._progress_style[phase_idx]
60
-
61
53
  @staticmethod
62
54
  def cooking() -> str:
63
55
  """Get a random cooking or food preparation emoji."""
@@ -171,10 +163,3 @@ def _make_stream_formatter() -> logging.Formatter:
171
163
 
172
164
 
173
165
  _DEFAULT_NOISY_LOGGERS = ["httpx", "matplotlib"]
174
-
175
-
176
- _PROGRESS_STYLES: list[list[str]] = [
177
- ["🌑", "🌘", "🌗", "🌖", "🌕"], # Moon phases
178
- ["🌧️", "🌦️", "⛅", "🌤️", "☀️"], # Weather (storm to sun)
179
- ["🥚", "🐣", "🐥", "🐤", "🐔"], # Hatching (egg to chicken)
180
- ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-config
3
- Version: 0.4.0
3
+ Version: 0.4.0rc1
4
4
  Summary: Configuration layer for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -1,11 +1,11 @@
1
1
  data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
2
2
  data_designer/lazy_heavy_imports.py,sha256=5X04vUj9TYbKgfDmY2qvhzRf5-IZWKOanIpi3_u7fmM,1662
3
- data_designer/logging.py,sha256=Xq2cRwxmDJ-r8_s9NWnk15efLRsrKm5iVScHy6HkjiE,6044
3
+ data_designer/logging.py,sha256=_x-tDj34ClrgSnU57Dh0mZdDxrnA73vgs09KooUvbEA,5444
4
4
  data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
5
5
  data_designer/config/__init__.py,sha256=MWzRZhXA41sTpc0sL_xq2baA3kSlV37alT6g8RlP8dU,4919
6
- data_designer/config/_version.py,sha256=2_0GUP7yBCXRus-qiJKxQD62z172WSs1sQ6DVpPsbmM,704
6
+ data_designer/config/_version.py,sha256=yib4WPM_pEWXdpIHBdFnf29aurTH5f4xrnwVlv7cijo,714
7
7
  data_designer/config/base.py,sha256=IGj6sy_GnKzC94uu2rdxe12EqR_AmGJ6O3rl2MxOv6g,2449
8
- data_designer/config/column_configs.py,sha256=QEHXbxljbGEfOEnzNsiR3_CRpaCukQsayBbHQyhMhbc,20720
8
+ data_designer/config/column_configs.py,sha256=JBYIeqmIiwdZzA-NXTw4qghs2Y30RZPQ_-koqPVcQ9g,20384
9
9
  data_designer/config/column_types.py,sha256=xGXuu0EBy3Y5Jd74f2VM6x5jHq72GmK9leA6qOnAz8c,5423
10
10
  data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLBAW1AmhajI,24707
11
11
  data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
@@ -14,10 +14,10 @@ data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhF
14
14
  data_designer/config/default_model_settings.py,sha256=c-llH2otfG0tMCMsxoz3ZcS1nFxIQQPfRedFXAydDbc,4868
15
15
  data_designer/config/errors.py,sha256=JhvUYecfLmP0gZjQzqA3OmfaSs9TRlC5E-ubnV_-3gs,560
16
16
  data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75IqdpFLfk,1676
17
- data_designer/config/models.py,sha256=_NctRk4brgBeb5q5V7r_hXE5OORlLh6SCVZP0eu2LGo,16721
17
+ data_designer/config/models.py,sha256=OekrXEVnI9WdHzEVk-8fO0NtxLZtjKVtCL03RY8qwYs,15457
18
18
  data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
19
19
  data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
20
- data_designer/config/run_config.py,sha256=m_rrqEmNHR533AYJ_OR5yq0a9Pegy9vPGZgyfD4x9cI,3052
20
+ data_designer/config/run_config.py,sha256=oJ163DpHXu9PzST5Hn9px-bIP9DYjIkCO7UGB93J7bI,2663
21
21
  data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
22
22
  data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
23
23
  data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
@@ -32,8 +32,8 @@ data_designer/config/analysis/utils/errors.py,sha256=pvmdQ_YuIlWW4NFw-cX_rOoQf-G
32
32
  data_designer/config/analysis/utils/reporting.py,sha256=teTzd1OHtpI4vbIinGOGsKXyNldO3F5eqbNdAztF0_s,7066
33
33
  data_designer/config/testing/__init__.py,sha256=vxFrIOqDoDfOx-MWjC5lb_hvmB4kRKvh1QdTv--QYFM,222
34
34
  data_designer/config/testing/fixtures.py,sha256=J1bcWjerAIoVUIZBVPbUcuvEa2laj_kspVcLS7UZMbo,10876
35
- data_designer/config/utils/code_lang.py,sha256=nUeWjuzSYBVF5gwOiUE2-EsYCEDzRZaw31RIivt7GPI,2638
36
- data_designer/config/utils/constants.py,sha256=lprfeF_bIzGJ_oGrZBhvHEbLVgrGfFtVbCdWJHf_6B8,8953
35
+ data_designer/config/utils/code_lang.py,sha256=EqMJh1GL5ysUZIoyqx_6vmqenUKHm4J-RQtKXiA4EPg,2354
36
+ data_designer/config/utils/constants.py,sha256=eqDQ57b8B0v5qRSO0He45LEjSxtfxlsPtHRvBu1xkw0,8973
37
37
  data_designer/config/utils/errors.py,sha256=HCjer0YrF0bMn5j8gmgWaLb0395LAr_hxMD1ftOsOc8,520
38
38
  data_designer/config/utils/info.py,sha256=yOa4U8kI_CY4OfCKZxCm2okU8klAiThvyjKM5tG-F0A,3469
39
39
  data_designer/config/utils/io_helpers.py,sha256=kzvOR7QgqijkqU-O2enIlpCWwHvzc3oRaEl4Lsjh1Do,8466
@@ -45,6 +45,6 @@ data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8t
45
45
  data_designer/plugins/errors.py,sha256=d7FMed3ueQvZHwuhwyPLzF4E34bO1mdj3aBVEw6p34o,386
46
46
  data_designer/plugins/plugin.py,sha256=TVyyOaQBWAt0FQwUmtihTZ9MDJD85HwggrQ3L9CviPQ,5367
47
47
  data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQRE_dY,3053
48
- data_designer_config-0.4.0.dist-info/METADATA,sha256=l06rdZe6t1jKhqvgkH0ZYSTXX-UUVsjQ-ZIfwD_mwvA,2283
49
- data_designer_config-0.4.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
- data_designer_config-0.4.0.dist-info/RECORD,,
48
+ data_designer_config-0.4.0rc1.dist-info/METADATA,sha256=EvLUDu0U0oK7FuylbCr44meSR8H5Ty-bt-EdUIagmi4,2286
49
+ data_designer_config-0.4.0rc1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
+ data_designer_config-0.4.0rc1.dist-info/RECORD,,