data-designer-config 0.4.0__tar.gz → 0.4.0rc1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/PKG-INFO +1 -1
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/_version.py +2 -2
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/column_configs.py +7 -13
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/models.py +14 -45
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/run_config.py +0 -5
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/code_lang.py +2 -13
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/constants.py +1 -1
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/logging.py +0 -15
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_columns.py +1 -1
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_models.py +9 -147
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_code_lang.py +1 -1
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/test_logging.py +0 -51
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/.gitignore +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/README.md +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/pyproject.toml +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/__init__.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/__init__.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/column_profilers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/column_statistics.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/dataset_profiler.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/utils/errors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/analysis/utils/reporting.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/base.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/column_types.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/config_builder.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/data_designer_config.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/dataset_builders.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/dataset_metadata.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/default_model_settings.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/errors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/interface.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/preview_results.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/processors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/sampler_constraints.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/sampler_params.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/seed.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/seed_source.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/seed_source_types.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/testing/__init__.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/testing/fixtures.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/errors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/info.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/io_helpers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/misc.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/numerical_helpers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/type_helpers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/visualization.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/validator_params.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/errors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/lazy_heavy_imports.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugin_manager.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/__init__.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/errors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/plugin.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/registry.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/analysis/conftest.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/analysis/test_column_statistics.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/analysis/test_dataset_profiler_results.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/analysis/utils/test_reporting.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_config_builder.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_data_designer_config.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_default_model_settings.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_processors.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_sampler_constraints.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_sampler_params.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_seed.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_seed_source.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_validator_params.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/__init__.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_info.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_io_helpers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_misc.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_type_helpers.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_visualization.py +0 -0
- {data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/conftest.py +0 -0
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/_version.py
RENAMED
|
@@ -28,7 +28,7 @@ version_tuple: VERSION_TUPLE
|
|
|
28
28
|
commit_id: COMMIT_ID
|
|
29
29
|
__commit_id__: COMMIT_ID
|
|
30
30
|
|
|
31
|
-
__version__ = version = '0.4.
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 4, 0)
|
|
31
|
+
__version__ = version = '0.4.0rc1'
|
|
32
|
+
__version_tuple__ = version_tuple = (0, 4, 0, 'rc1')
|
|
33
33
|
|
|
34
34
|
__commit_id__ = commit_id = None
|
|
@@ -14,7 +14,7 @@ from data_designer.config.errors import InvalidConfigError
|
|
|
14
14
|
from data_designer.config.models import ImageContext
|
|
15
15
|
from data_designer.config.sampler_params import SamplerParamsT, SamplerType
|
|
16
16
|
from data_designer.config.utils.code_lang import CodeLang
|
|
17
|
-
from data_designer.config.utils.constants import
|
|
17
|
+
from data_designer.config.utils.constants import REASONING_TRACE_COLUMN_POSTFIX
|
|
18
18
|
from data_designer.config.utils.misc import assert_valid_jinja2_template, extract_keywords_from_jinja2_template
|
|
19
19
|
from data_designer.config.validator_params import ValidatorParamsT, ValidatorType
|
|
20
20
|
|
|
@@ -143,8 +143,8 @@ class LLMTextColumnConfig(SingleColumnConfig):
|
|
|
143
143
|
|
|
144
144
|
LLM text columns generate free-form text content using language models via LiteLLM.
|
|
145
145
|
Prompts support Jinja2 templating to reference values from other columns, enabling
|
|
146
|
-
context-aware generation. The generated text can optionally include
|
|
147
|
-
|
|
146
|
+
context-aware generation. The generated text can optionally include reasoning traces
|
|
147
|
+
when models support extended thinking.
|
|
148
148
|
|
|
149
149
|
Attributes:
|
|
150
150
|
prompt: Prompt template for text generation. Supports Jinja2 syntax to
|
|
@@ -159,10 +159,6 @@ class LLMTextColumnConfig(SingleColumnConfig):
|
|
|
159
159
|
`LLMStructuredColumnConfig` for structured output, `LLMCodeColumnConfig` for code.
|
|
160
160
|
multi_modal_context: Optional list of image contexts for multi-modal generation.
|
|
161
161
|
Enables vision-capable models to generate text based on image inputs.
|
|
162
|
-
with_trace: If True, creates a `{column_name}__trace` column containing the full
|
|
163
|
-
ordered message history (system/user/assistant) for the generation.
|
|
164
|
-
Can be overridden globally via `RunConfig.debug_override_save_all_column_traces`.
|
|
165
|
-
Defaults to False.
|
|
166
162
|
column_type: Discriminator field, always "llm-text" for this configuration type.
|
|
167
163
|
"""
|
|
168
164
|
|
|
@@ -170,7 +166,6 @@ class LLMTextColumnConfig(SingleColumnConfig):
|
|
|
170
166
|
model_alias: str
|
|
171
167
|
system_prompt: str | None = None
|
|
172
168
|
multi_modal_context: list[ImageContext] | None = None
|
|
173
|
-
with_trace: bool = False
|
|
174
169
|
column_type: Literal["llm-text"] = "llm-text"
|
|
175
170
|
|
|
176
171
|
@staticmethod
|
|
@@ -191,15 +186,14 @@ class LLMTextColumnConfig(SingleColumnConfig):
|
|
|
191
186
|
|
|
192
187
|
@property
|
|
193
188
|
def side_effect_columns(self) -> list[str]:
|
|
194
|
-
"""Returns the trace column, which may be generated alongside the main column.
|
|
189
|
+
"""Returns the reasoning trace column, which may be generated alongside the main column.
|
|
195
190
|
|
|
196
|
-
|
|
197
|
-
when `RunConfig.debug_override_save_all_column_traces=True` globally.
|
|
191
|
+
Reasoning traces are only returned if the served model parses and returns reasoning content.
|
|
198
192
|
|
|
199
193
|
Returns:
|
|
200
|
-
List containing the trace column name.
|
|
194
|
+
List containing the reasoning trace column name.
|
|
201
195
|
"""
|
|
202
|
-
return [f"{self.name}{
|
|
196
|
+
return [f"{self.name}{REASONING_TRACE_COLUMN_POSTFIX}"]
|
|
203
197
|
|
|
204
198
|
@model_validator(mode="after")
|
|
205
199
|
def assert_prompt_valid_jinja(self) -> Self:
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/models.py
RENAMED
|
@@ -3,7 +3,6 @@
|
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
|
-
import json
|
|
7
6
|
import logging
|
|
8
7
|
from abc import ABC, abstractmethod
|
|
9
8
|
from enum import Enum
|
|
@@ -66,7 +65,7 @@ class ModalityContext(ABC, BaseModel):
|
|
|
66
65
|
data_type: ModalityDataType
|
|
67
66
|
|
|
68
67
|
@abstractmethod
|
|
69
|
-
def
|
|
68
|
+
def get_context(self, record: dict) -> dict[str, Any]: ...
|
|
70
69
|
|
|
71
70
|
|
|
72
71
|
class ImageContext(ModalityContext):
|
|
@@ -82,53 +81,25 @@ class ImageContext(ModalityContext):
|
|
|
82
81
|
modality: Modality = Modality.IMAGE
|
|
83
82
|
image_format: ImageFormat | None = None
|
|
84
83
|
|
|
85
|
-
def
|
|
86
|
-
"""Get the
|
|
84
|
+
def get_context(self, record: dict) -> dict[str, Any]:
|
|
85
|
+
"""Get the context for the image modality.
|
|
87
86
|
|
|
88
87
|
Args:
|
|
89
|
-
record: The record containing the image data.
|
|
90
|
-
- A JSON serialized list of strings
|
|
91
|
-
- A list of strings
|
|
92
|
-
- A single string
|
|
88
|
+
record: The record containing the image data.
|
|
93
89
|
|
|
94
90
|
Returns:
|
|
95
|
-
|
|
91
|
+
The context for the image modality.
|
|
96
92
|
"""
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
# Try to parse as JSON first
|
|
102
|
-
try:
|
|
103
|
-
parsed_value = json.loads(raw_value)
|
|
104
|
-
if isinstance(parsed_value, list):
|
|
105
|
-
context_values = parsed_value
|
|
106
|
-
else:
|
|
107
|
-
context_values = [raw_value]
|
|
108
|
-
except (json.JSONDecodeError, TypeError):
|
|
109
|
-
context_values = [raw_value]
|
|
110
|
-
elif isinstance(raw_value, list):
|
|
111
|
-
context_values = raw_value
|
|
112
|
-
elif hasattr(raw_value, "__iter__") and not isinstance(raw_value, (str, bytes, dict)):
|
|
113
|
-
# Handle array-like objects (numpy arrays, pandas Series, etc.)
|
|
114
|
-
context_values = list(raw_value)
|
|
93
|
+
context = dict(type="image_url")
|
|
94
|
+
context_value = record[self.column_name]
|
|
95
|
+
if self.data_type == ModalityDataType.URL:
|
|
96
|
+
context["image_url"] = context_value
|
|
115
97
|
else:
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
context = dict(type="image_url")
|
|
122
|
-
if self.data_type == ModalityDataType.URL:
|
|
123
|
-
context["image_url"] = context_value
|
|
124
|
-
else:
|
|
125
|
-
context["image_url"] = {
|
|
126
|
-
"url": f"data:image/{self.image_format.value};base64,{context_value}",
|
|
127
|
-
"format": self.image_format.value,
|
|
128
|
-
}
|
|
129
|
-
contexts.append(context)
|
|
130
|
-
|
|
131
|
-
return contexts
|
|
98
|
+
context["image_url"] = {
|
|
99
|
+
"url": f"data:image/{self.image_format.value};base64,{context_value}",
|
|
100
|
+
"format": self.image_format.value,
|
|
101
|
+
}
|
|
102
|
+
return context
|
|
132
103
|
|
|
133
104
|
@model_validator(mode="after")
|
|
134
105
|
def _validate_image_format(self) -> Self:
|
|
@@ -428,14 +399,12 @@ class ModelConfig(ConfigBase):
|
|
|
428
399
|
inference_parameters: Inference parameters for the model (temperature, top_p, max_tokens, etc.).
|
|
429
400
|
The generation_type is determined by the type of inference_parameters.
|
|
430
401
|
provider: Optional model provider name if using custom providers.
|
|
431
|
-
skip_health_check: Whether to skip the health check for this model. Defaults to False.
|
|
432
402
|
"""
|
|
433
403
|
|
|
434
404
|
alias: str
|
|
435
405
|
model: str
|
|
436
406
|
inference_parameters: InferenceParamsT = Field(default_factory=ChatCompletionInferenceParams)
|
|
437
407
|
provider: str | None = None
|
|
438
|
-
skip_health_check: bool = False
|
|
439
408
|
|
|
440
409
|
@property
|
|
441
410
|
def generation_type(self) -> GenerationType:
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/run_config.py
RENAMED
|
@@ -33,10 +33,6 @@ class RunConfig(ConfigBase):
|
|
|
33
33
|
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
34
34
|
single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
|
|
35
35
|
Default is 0.
|
|
36
|
-
debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
|
|
37
|
-
and includes `__trace` columns for ALL LLM generations, containing the full ordered
|
|
38
|
-
message history (system/user/assistant) for the final generation attempt.
|
|
39
|
-
Useful for debugging. Default is False.
|
|
40
36
|
"""
|
|
41
37
|
|
|
42
38
|
disable_early_shutdown: bool = False
|
|
@@ -46,7 +42,6 @@ class RunConfig(ConfigBase):
|
|
|
46
42
|
non_inference_max_parallel_workers: int = Field(default=4, ge=1)
|
|
47
43
|
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
48
44
|
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
49
|
-
debug_override_save_all_column_traces: bool = False
|
|
50
45
|
|
|
51
46
|
@model_validator(mode="after")
|
|
52
47
|
def normalize_shutdown_settings(self) -> Self:
|
|
@@ -7,14 +7,9 @@ from enum import Enum
|
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class CodeLang(str, Enum):
|
|
10
|
-
BASH = "bash"
|
|
11
|
-
C = "c"
|
|
12
|
-
COBOL = "cobol"
|
|
13
|
-
CPP = "cpp"
|
|
14
|
-
CSHARP = "csharp"
|
|
15
10
|
GO = "go"
|
|
16
|
-
JAVA = "java"
|
|
17
11
|
JAVASCRIPT = "javascript"
|
|
12
|
+
JAVA = "java"
|
|
18
13
|
KOTLIN = "kotlin"
|
|
19
14
|
PYTHON = "python"
|
|
20
15
|
RUBY = "ruby"
|
|
@@ -68,21 +63,15 @@ def code_lang_to_syntax_lexer(code_lang: CodeLang | str) -> str:
|
|
|
68
63
|
Reference: https://pygments.org/docs/lexers/
|
|
69
64
|
"""
|
|
70
65
|
code_lang_to_lexer = {
|
|
71
|
-
CodeLang.BASH: "bash",
|
|
72
|
-
CodeLang.C: "c",
|
|
73
|
-
CodeLang.COBOL: "cobol",
|
|
74
|
-
CodeLang.CPP: "cpp",
|
|
75
|
-
CodeLang.CSHARP: "csharp",
|
|
76
66
|
CodeLang.GO: "golang",
|
|
77
|
-
CodeLang.JAVA: "java",
|
|
78
67
|
CodeLang.JAVASCRIPT: "javascript",
|
|
68
|
+
CodeLang.JAVA: "java",
|
|
79
69
|
CodeLang.KOTLIN: "kotlin",
|
|
80
70
|
CodeLang.PYTHON: "python",
|
|
81
71
|
CodeLang.RUBY: "ruby",
|
|
82
72
|
CodeLang.RUST: "rust",
|
|
83
73
|
CodeLang.SCALA: "scala",
|
|
84
74
|
CodeLang.SWIFT: "swift",
|
|
85
|
-
CodeLang.TYPESCRIPT: "typescript",
|
|
86
75
|
CodeLang.SQL_SQLITE: "sql",
|
|
87
76
|
CodeLang.SQL_ANSI: "sql",
|
|
88
77
|
CodeLang.SQL_TSQL: "tsql",
|
|
@@ -50,14 +50,6 @@ class LoggingConfig:
|
|
|
50
50
|
class RandomEmoji:
|
|
51
51
|
"""A generator for various themed emoji collections."""
|
|
52
52
|
|
|
53
|
-
def __init__(self) -> None:
|
|
54
|
-
self._progress_style = random.choice(_PROGRESS_STYLES)
|
|
55
|
-
|
|
56
|
-
def progress(self, percent: float) -> str:
|
|
57
|
-
"""Get a progress emoji based on completion percentage (0-100)."""
|
|
58
|
-
phase_idx = min(int(percent / 25), len(self._progress_style) - 1)
|
|
59
|
-
return self._progress_style[phase_idx]
|
|
60
|
-
|
|
61
53
|
@staticmethod
|
|
62
54
|
def cooking() -> str:
|
|
63
55
|
"""Get a random cooking or food preparation emoji."""
|
|
@@ -171,10 +163,3 @@ def _make_stream_formatter() -> logging.Formatter:
|
|
|
171
163
|
|
|
172
164
|
|
|
173
165
|
_DEFAULT_NOISY_LOGGERS = ["httpx", "matplotlib"]
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
_PROGRESS_STYLES: list[list[str]] = [
|
|
177
|
-
["🌑", "🌘", "🌗", "🌖", "🌕"], # Moon phases
|
|
178
|
-
["🌧️", "🌦️", "⛅", "🌤️", "☀️"], # Weather (storm to sun)
|
|
179
|
-
["🥚", "🐣", "🐥", "🐤", "🐔"], # Hatching (egg to chicken)
|
|
180
|
-
]
|
|
@@ -85,7 +85,7 @@ def test_llm_text_column_config():
|
|
|
85
85
|
assert llm_text_column_config.system_prompt == stub_system_prompt
|
|
86
86
|
assert llm_text_column_config.column_type == DataDesignerColumnType.LLM_TEXT
|
|
87
87
|
assert set(llm_text_column_config.required_columns) == {"some_column", "some_other_column"}
|
|
88
|
-
assert llm_text_column_config.side_effect_columns == ["
|
|
88
|
+
assert llm_text_column_config.side_effect_columns == ["test_llm_text__reasoning_trace"]
|
|
89
89
|
|
|
90
90
|
# invalid prompt
|
|
91
91
|
with pytest.raises(
|
|
@@ -4,7 +4,6 @@
|
|
|
4
4
|
import json
|
|
5
5
|
import tempfile
|
|
6
6
|
from collections import Counter
|
|
7
|
-
from typing import TYPE_CHECKING
|
|
8
7
|
|
|
9
8
|
import pytest
|
|
10
9
|
import yaml
|
|
@@ -25,159 +24,22 @@ from data_designer.config.models import (
|
|
|
25
24
|
UniformDistributionParams,
|
|
26
25
|
load_model_configs,
|
|
27
26
|
)
|
|
28
|
-
from data_designer.lazy_heavy_imports import np
|
|
29
27
|
|
|
30
|
-
if TYPE_CHECKING:
|
|
31
|
-
import numpy as np
|
|
32
28
|
|
|
33
|
-
|
|
34
|
-
def test_image_context_get_contexts_single_string():
|
|
35
|
-
"""Test get_contexts with a single string value."""
|
|
36
|
-
image_context = ImageContext(
|
|
37
|
-
column_name="image_base64", data_type=ModalityDataType.BASE64, image_format=ImageFormat.PNG
|
|
38
|
-
)
|
|
39
|
-
assert image_context.get_contexts({"image_base64": "somebase64encodedimagestring"}) == [
|
|
40
|
-
{
|
|
41
|
-
"type": "image_url",
|
|
42
|
-
"image_url": {"url": "data:image/png;base64,somebase64encodedimagestring", "format": "png"},
|
|
43
|
-
}
|
|
44
|
-
]
|
|
45
|
-
|
|
46
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
47
|
-
assert image_context.get_contexts({"image_url": "https://example.com/examle_image.png"}) == [
|
|
48
|
-
{
|
|
49
|
-
"type": "image_url",
|
|
50
|
-
"image_url": "https://example.com/examle_image.png",
|
|
51
|
-
}
|
|
52
|
-
]
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
def test_image_context_get_contexts_list_of_strings():
|
|
56
|
-
"""Test get_contexts with a list of strings."""
|
|
57
|
-
image_context = ImageContext(
|
|
58
|
-
column_name="image_base64", data_type=ModalityDataType.BASE64, image_format=ImageFormat.PNG
|
|
59
|
-
)
|
|
60
|
-
assert image_context.get_contexts({"image_base64": ["image1base64", "image2base64", "image3base64"]}) == [
|
|
61
|
-
{
|
|
62
|
-
"type": "image_url",
|
|
63
|
-
"image_url": {"url": "data:image/png;base64,image1base64", "format": "png"},
|
|
64
|
-
},
|
|
65
|
-
{
|
|
66
|
-
"type": "image_url",
|
|
67
|
-
"image_url": {"url": "data:image/png;base64,image2base64", "format": "png"},
|
|
68
|
-
},
|
|
69
|
-
{
|
|
70
|
-
"type": "image_url",
|
|
71
|
-
"image_url": {"url": "data:image/png;base64,image3base64", "format": "png"},
|
|
72
|
-
},
|
|
73
|
-
]
|
|
74
|
-
|
|
75
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
76
|
-
assert image_context.get_contexts(
|
|
77
|
-
{"image_url": ["https://example.com/image1.png", "https://example.com/image2.png"]}
|
|
78
|
-
) == [
|
|
79
|
-
{
|
|
80
|
-
"type": "image_url",
|
|
81
|
-
"image_url": "https://example.com/image1.png",
|
|
82
|
-
},
|
|
83
|
-
{
|
|
84
|
-
"type": "image_url",
|
|
85
|
-
"image_url": "https://example.com/image2.png",
|
|
86
|
-
},
|
|
87
|
-
]
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def test_image_context_get_contexts_numpy_array():
|
|
91
|
-
"""Test get_contexts with numpy arrays (happens after parquet serialization)."""
|
|
92
|
-
image_context = ImageContext(
|
|
93
|
-
column_name="image_base64", data_type=ModalityDataType.BASE64, image_format=ImageFormat.PNG
|
|
94
|
-
)
|
|
95
|
-
numpy_array = np.array(["image1base64", "image2base64"])
|
|
96
|
-
assert image_context.get_contexts({"image_base64": numpy_array}) == [
|
|
97
|
-
{
|
|
98
|
-
"type": "image_url",
|
|
99
|
-
"image_url": {"url": "data:image/png;base64,image1base64", "format": "png"},
|
|
100
|
-
},
|
|
101
|
-
{
|
|
102
|
-
"type": "image_url",
|
|
103
|
-
"image_url": {"url": "data:image/png;base64,image2base64", "format": "png"},
|
|
104
|
-
},
|
|
105
|
-
]
|
|
106
|
-
|
|
107
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
108
|
-
numpy_array = np.array(["https://example.com/image1.png", "https://example.com/image2.png"])
|
|
109
|
-
assert image_context.get_contexts({"image_url": numpy_array}) == [
|
|
110
|
-
{
|
|
111
|
-
"type": "image_url",
|
|
112
|
-
"image_url": "https://example.com/image1.png",
|
|
113
|
-
},
|
|
114
|
-
{
|
|
115
|
-
"type": "image_url",
|
|
116
|
-
"image_url": "https://example.com/image2.png",
|
|
117
|
-
},
|
|
118
|
-
]
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
def test_image_context_get_contexts_json_serialized_list():
|
|
122
|
-
"""Test get_contexts with a JSON serialized list of strings."""
|
|
29
|
+
def test_image_context_get_context():
|
|
123
30
|
image_context = ImageContext(
|
|
124
31
|
column_name="image_base64", data_type=ModalityDataType.BASE64, image_format=ImageFormat.PNG
|
|
125
32
|
)
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
{
|
|
129
|
-
|
|
130
|
-
"image_url": {"url": "data:image/png;base64,image1base64", "format": "png"},
|
|
131
|
-
},
|
|
132
|
-
{
|
|
133
|
-
"type": "image_url",
|
|
134
|
-
"image_url": {"url": "data:image/png;base64,image2base64", "format": "png"},
|
|
135
|
-
},
|
|
136
|
-
]
|
|
137
|
-
|
|
138
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
139
|
-
json_str = json.dumps(["https://example.com/image1.png", "https://example.com/image2.png"])
|
|
140
|
-
assert image_context.get_contexts({"image_url": json_str}) == [
|
|
141
|
-
{
|
|
142
|
-
"type": "image_url",
|
|
143
|
-
"image_url": "https://example.com/image1.png",
|
|
144
|
-
},
|
|
145
|
-
{
|
|
146
|
-
"type": "image_url",
|
|
147
|
-
"image_url": "https://example.com/image2.png",
|
|
148
|
-
},
|
|
149
|
-
]
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
def test_image_context_get_contexts_json_string_not_list():
|
|
153
|
-
"""Test get_contexts with a JSON string that isn't a list (should treat as single string)."""
|
|
154
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
155
|
-
json_str = json.dumps({"nested": "object"})
|
|
156
|
-
# Should treat the entire JSON string as a single image URL
|
|
157
|
-
assert image_context.get_contexts({"image_url": json_str}) == [
|
|
158
|
-
{
|
|
159
|
-
"type": "image_url",
|
|
160
|
-
"image_url": json_str,
|
|
161
|
-
}
|
|
162
|
-
]
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
def test_image_context_get_contexts_invalid_json():
|
|
166
|
-
"""Test get_contexts with invalid JSON string (should treat as single string)."""
|
|
167
|
-
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
168
|
-
invalid_json = "not a valid json string"
|
|
169
|
-
assert image_context.get_contexts({"image_url": invalid_json}) == [
|
|
170
|
-
{
|
|
171
|
-
"type": "image_url",
|
|
172
|
-
"image_url": invalid_json,
|
|
173
|
-
}
|
|
174
|
-
]
|
|
175
|
-
|
|
33
|
+
assert image_context.get_context({"image_base64": "somebase64encodedimagestring"}) == {
|
|
34
|
+
"type": "image_url",
|
|
35
|
+
"image_url": {"url": "data:image/png;base64,somebase64encodedimagestring", "format": "png"},
|
|
36
|
+
}
|
|
176
37
|
|
|
177
|
-
def test_image_context_get_contexts_empty_list():
|
|
178
|
-
"""Test get_contexts with an empty list."""
|
|
179
38
|
image_context = ImageContext(column_name="image_url", data_type=ModalityDataType.URL)
|
|
180
|
-
assert image_context.
|
|
39
|
+
assert image_context.get_context({"image_url": "https://example.com/examle_image.png"}) == {
|
|
40
|
+
"type": "image_url",
|
|
41
|
+
"image_url": "https://example.com/examle_image.png",
|
|
42
|
+
}
|
|
181
43
|
|
|
182
44
|
|
|
183
45
|
def test_image_context_validate_image_format():
|
|
@@ -208,54 +208,3 @@ def test_random_emoji_randomness():
|
|
|
208
208
|
emojis = [RandomEmoji.magic() for _ in range(100)]
|
|
209
209
|
# If we get 100 samples, we should get at least 2 different emojis
|
|
210
210
|
assert len(set(emojis)) > 1
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
def test_random_emoji_progress_returns_valid_emoji() -> None:
|
|
214
|
-
emoji_gen = RandomEmoji()
|
|
215
|
-
emoji = emoji_gen.progress(50.0)
|
|
216
|
-
assert emoji is not None
|
|
217
|
-
assert len(emoji) > 0
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def test_random_emoji_progress_is_deterministic() -> None:
|
|
221
|
-
emoji_gen = RandomEmoji()
|
|
222
|
-
# Same percentage should always return the same emoji for a given instance
|
|
223
|
-
assert emoji_gen.progress(0.0) == emoji_gen.progress(0.0)
|
|
224
|
-
assert emoji_gen.progress(50.0) == emoji_gen.progress(50.0)
|
|
225
|
-
assert emoji_gen.progress(100.0) == emoji_gen.progress(100.0)
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
def test_random_emoji_progress_phases_are_distinct() -> None:
|
|
229
|
-
emoji_gen = RandomEmoji()
|
|
230
|
-
# Each 25% phase should return a different emoji
|
|
231
|
-
phase_emojis = [
|
|
232
|
-
emoji_gen.progress(0.0), # phase 0
|
|
233
|
-
emoji_gen.progress(25.0), # phase 1
|
|
234
|
-
emoji_gen.progress(50.0), # phase 2
|
|
235
|
-
emoji_gen.progress(75.0), # phase 3
|
|
236
|
-
emoji_gen.progress(100.0), # phase 4
|
|
237
|
-
]
|
|
238
|
-
# All 5 phases should have distinct emojis
|
|
239
|
-
assert len(set(phase_emojis)) == 5
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def test_random_emoji_progress_phase_boundaries() -> None:
|
|
243
|
-
emoji_gen = RandomEmoji()
|
|
244
|
-
# Values within the same phase should return the same emoji
|
|
245
|
-
assert emoji_gen.progress(0.0) == emoji_gen.progress(24.9)
|
|
246
|
-
assert emoji_gen.progress(25.0) == emoji_gen.progress(49.9)
|
|
247
|
-
assert emoji_gen.progress(50.0) == emoji_gen.progress(74.9)
|
|
248
|
-
assert emoji_gen.progress(75.0) == emoji_gen.progress(99.9)
|
|
249
|
-
# Phase transitions should return different emojis
|
|
250
|
-
assert emoji_gen.progress(24.9) != emoji_gen.progress(25.0)
|
|
251
|
-
assert emoji_gen.progress(49.9) != emoji_gen.progress(50.0)
|
|
252
|
-
assert emoji_gen.progress(74.9) != emoji_gen.progress(75.0)
|
|
253
|
-
assert emoji_gen.progress(99.9) != emoji_gen.progress(100.0)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def test_random_emoji_progress_clamps_over_100() -> None:
|
|
257
|
-
emoji_gen = RandomEmoji()
|
|
258
|
-
emoji_100 = emoji_gen.progress(100.0)
|
|
259
|
-
emoji_over = emoji_gen.progress(150.0)
|
|
260
|
-
# Both should return the same final emoji
|
|
261
|
-
assert emoji_100 == emoji_over
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/base.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/errors.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/interface.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/processors.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/seed.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/seed_source.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/info.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/config/utils/misc.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/lazy_heavy_imports.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugin_manager.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/__init__.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/errors.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/plugin.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/src/data_designer/plugins/registry.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/analysis/conftest.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_config_builder.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_processors.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_sampler_params.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_seed_source.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/test_validator_params.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_info.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_io_helpers.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_misc.py
RENAMED
|
File without changes
|
{data_designer_config-0.4.0 → data_designer_config-0.4.0rc1}/tests/config/utils/test_type_helpers.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|