data-designer-config 0.4.0rc3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data_designer/config/__init__.py +219 -142
- data_designer/config/base.py +37 -39
- data_designer/config/column_configs.py +147 -61
- data_designer/config/column_types.py +5 -1
- data_designer/config/config_builder.py +103 -3
- data_designer/config/custom_column.py +64 -0
- data_designer/config/data_designer_config.py +5 -1
- data_designer/config/exportable_config.py +59 -0
- data_designer/config/mcp.py +109 -0
- data_designer/config/run_config.py +1 -6
- data_designer/config/utils/constants.py +9 -0
- data_designer/config/utils/trace_type.py +24 -0
- data_designer/config/utils/visualization.py +6 -0
- {data_designer_config-0.4.0rc3.dist-info → data_designer_config-0.5.0rc1.dist-info}/METADATA +1 -1
- {data_designer_config-0.4.0rc3.dist-info → data_designer_config-0.5.0rc1.dist-info}/RECORD +16 -13
- data_designer/config/_version.py +0 -34
- {data_designer_config-0.4.0rc3.dist-info → data_designer_config-0.5.0rc1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from typing import Annotated, Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
from typing_extensions import TypeAlias
|
|
10
|
+
|
|
11
|
+
from data_designer.config.base import ConfigBase
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class MCPProvider(ConfigBase):
|
|
15
|
+
"""Configuration for a remote MCP server connection.
|
|
16
|
+
|
|
17
|
+
MCPProvider is used to connect to pre-existing MCP servers via SSE (Server-Sent Events)
|
|
18
|
+
transport. For local subprocess-based MCP servers, use LocalStdioMCPProvider instead.
|
|
19
|
+
|
|
20
|
+
Attributes:
|
|
21
|
+
name (str): Unique name used to reference this MCP provider.
|
|
22
|
+
endpoint (str): SSE endpoint URL for connecting to the remote MCP server.
|
|
23
|
+
api_key (str | None): Optional API key for authentication. Defaults to None.
|
|
24
|
+
provider_type (Literal["sse"]): Transport type discriminator, always "sse".
|
|
25
|
+
|
|
26
|
+
Examples:
|
|
27
|
+
Remote SSE transport:
|
|
28
|
+
|
|
29
|
+
>>> MCPProvider(
|
|
30
|
+
... name="remote-mcp",
|
|
31
|
+
... endpoint="http://localhost:8080/sse",
|
|
32
|
+
... api_key="your-api-key",
|
|
33
|
+
... )
|
|
34
|
+
"""
|
|
35
|
+
|
|
36
|
+
provider_type: Literal["sse"] = "sse"
|
|
37
|
+
name: str
|
|
38
|
+
endpoint: str
|
|
39
|
+
api_key: str | None = None
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class LocalStdioMCPProvider(ConfigBase):
|
|
43
|
+
"""Configuration for launching a local MCP server via stdio transport.
|
|
44
|
+
|
|
45
|
+
LocalStdioMCPProvider is used to launch MCP servers as subprocesses using stdio
|
|
46
|
+
for communication. For connecting to remote/pre-existing MCP servers, use MCPProvider instead.
|
|
47
|
+
|
|
48
|
+
Attributes:
|
|
49
|
+
name (str): Unique name used to reference this MCP provider.
|
|
50
|
+
command (str): Executable to launch the MCP server via stdio transport.
|
|
51
|
+
args (list[str]): Arguments passed to the MCP server executable. Defaults to [].
|
|
52
|
+
env (dict[str, str]): Environment variables passed to the MCP server subprocess. Defaults to {}.
|
|
53
|
+
provider_type (Literal["stdio"]): Transport type discriminator, always "stdio".
|
|
54
|
+
|
|
55
|
+
Examples:
|
|
56
|
+
Stdio (subprocess) transport:
|
|
57
|
+
|
|
58
|
+
>>> LocalStdioMCPProvider(
|
|
59
|
+
... name="demo-mcp",
|
|
60
|
+
... command="python",
|
|
61
|
+
... args=["-m", "data_designer_e2e_tests.mcp_demo_server"],
|
|
62
|
+
... env={"PYTHONPATH": "/path/to/project"},
|
|
63
|
+
... )
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
provider_type: Literal["stdio"] = "stdio"
|
|
67
|
+
name: str
|
|
68
|
+
command: str
|
|
69
|
+
args: list[str] = Field(default_factory=list)
|
|
70
|
+
env: dict[str, str] = Field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
MCPProviderT: TypeAlias = Annotated[MCPProvider | LocalStdioMCPProvider, Field(discriminator="provider_type")]
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ToolConfig(ConfigBase):
|
|
77
|
+
"""Configuration for permitting MCP tools on an LLM column.
|
|
78
|
+
|
|
79
|
+
ToolConfig defines which tools are available for use during LLM generation.
|
|
80
|
+
It references one or more MCP providers by name and can optionally restrict
|
|
81
|
+
which tools from those providers are permitted.
|
|
82
|
+
|
|
83
|
+
Attributes:
|
|
84
|
+
tool_alias (str): User-defined alias to reference this tool configuration in column configs.
|
|
85
|
+
providers (list[str]): Names of the MCP providers to use for tool calls. Tools can be
|
|
86
|
+
drawn from multiple providers.
|
|
87
|
+
allow_tools (list[str] | None): Optional allowlist of tool names that restricts which
|
|
88
|
+
tools are permitted. If None, all tools from the specified providers are allowed.
|
|
89
|
+
Defaults to None.
|
|
90
|
+
max_tool_call_turns (int): Maximum number of tool-calling turns permitted in a single
|
|
91
|
+
generation. A turn is one iteration where the LLM requests tool calls. With parallel
|
|
92
|
+
tool calling, a single turn may execute multiple tools simultaneously. Defaults to 5.
|
|
93
|
+
timeout_sec (float | None): Timeout in seconds for MCP tool calls. Defaults to None (no timeout).
|
|
94
|
+
|
|
95
|
+
Examples:
|
|
96
|
+
>>> ToolConfig(
|
|
97
|
+
... tool_alias="search-tools",
|
|
98
|
+
... providers=["doc-search-mcp", "web-search-mcp"],
|
|
99
|
+
... allow_tools=["search_docs", "list_docs"],
|
|
100
|
+
... max_tool_call_turns=10,
|
|
101
|
+
... timeout_sec=30.0,
|
|
102
|
+
... )
|
|
103
|
+
"""
|
|
104
|
+
|
|
105
|
+
tool_alias: str
|
|
106
|
+
providers: list[str]
|
|
107
|
+
allow_tools: list[str] | None = None
|
|
108
|
+
max_tool_call_turns: int = Field(default=5, ge=1)
|
|
109
|
+
timeout_sec: float | None = Field(default=None, gt=0)
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
2
|
# SPDX-License-Identifier: Apache-2.0
|
|
3
3
|
|
|
4
4
|
from __future__ import annotations
|
|
@@ -33,10 +33,6 @@ class RunConfig(ConfigBase):
|
|
|
33
33
|
max_conversation_correction_steps: Maximum number of correction rounds permitted within a
|
|
34
34
|
single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
|
|
35
35
|
Default is 0.
|
|
36
|
-
debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
|
|
37
|
-
and includes `__trace` columns for ALL LLM generations, containing the full ordered
|
|
38
|
-
message history (system/user/assistant) for the final generation attempt.
|
|
39
|
-
Useful for debugging. Default is False.
|
|
40
36
|
"""
|
|
41
37
|
|
|
42
38
|
disable_early_shutdown: bool = False
|
|
@@ -46,7 +42,6 @@ class RunConfig(ConfigBase):
|
|
|
46
42
|
non_inference_max_parallel_workers: int = Field(default=4, ge=1)
|
|
47
43
|
max_conversation_restarts: int = Field(default=5, ge=0)
|
|
48
44
|
max_conversation_correction_steps: int = Field(default=0, ge=0)
|
|
49
|
-
debug_override_save_all_column_traces: bool = False
|
|
50
45
|
|
|
51
46
|
@model_validator(mode="after")
|
|
52
47
|
def normalize_shutdown_settings(self) -> Self:
|
|
@@ -167,6 +167,7 @@ MAX_TOP_P = 1.0
|
|
|
167
167
|
MIN_TOP_P = 0.0
|
|
168
168
|
MIN_MAX_TOKENS = 1
|
|
169
169
|
TRACE_COLUMN_POSTFIX = "__trace"
|
|
170
|
+
REASONING_CONTENT_COLUMN_POSTFIX = "__reasoning_content"
|
|
170
171
|
|
|
171
172
|
AVAILABLE_LOCALES = [
|
|
172
173
|
"ar_AA",
|
|
@@ -276,6 +277,14 @@ MODEL_PROVIDERS_FILE_NAME = "model_providers.yaml"
|
|
|
276
277
|
|
|
277
278
|
MODEL_PROVIDERS_FILE_PATH = DATA_DESIGNER_HOME / MODEL_PROVIDERS_FILE_NAME
|
|
278
279
|
|
|
280
|
+
MCP_PROVIDERS_FILE_NAME = "mcp_providers.yaml"
|
|
281
|
+
|
|
282
|
+
MCP_PROVIDERS_FILE_PATH = DATA_DESIGNER_HOME / MCP_PROVIDERS_FILE_NAME
|
|
283
|
+
|
|
284
|
+
TOOL_CONFIGS_FILE_NAME = "tool_configs.yaml"
|
|
285
|
+
|
|
286
|
+
TOOL_CONFIGS_FILE_PATH = DATA_DESIGNER_HOME / TOOL_CONFIGS_FILE_NAME
|
|
287
|
+
|
|
279
288
|
NVIDIA_PROVIDER_NAME = "nvidia"
|
|
280
289
|
|
|
281
290
|
NVIDIA_API_KEY_ENV_VAR_NAME = "NVIDIA_API_KEY"
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
from data_designer.config.utils.type_helpers import StrEnum
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class TraceType(StrEnum):
|
|
10
|
+
"""Specifies the type of reasoning trace to capture for LLM columns.
|
|
11
|
+
|
|
12
|
+
Traces capture the conversation history during LLM generation, which is
|
|
13
|
+
useful for debugging, analysis, and understanding model behavior.
|
|
14
|
+
|
|
15
|
+
Attributes:
|
|
16
|
+
NONE: No trace is captured. This is the default.
|
|
17
|
+
LAST_MESSAGE: Only the final assistant message is captured.
|
|
18
|
+
ALL_MESSAGES: The full conversation history (system/user/assistant/tool)
|
|
19
|
+
is captured.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
NONE = "none"
|
|
23
|
+
LAST_MESSAGE = "last_message"
|
|
24
|
+
ALL_MESSAGES = "all_messages"
|
|
@@ -203,6 +203,7 @@ def display_sample_record(
|
|
|
203
203
|
+ config_builder.get_columns_of_type(DataDesignerColumnType.LLM_TEXT)
|
|
204
204
|
+ config_builder.get_columns_of_type(DataDesignerColumnType.LLM_STRUCTURED)
|
|
205
205
|
+ config_builder.get_columns_of_type(DataDesignerColumnType.EMBEDDING)
|
|
206
|
+
+ config_builder.get_columns_of_type(DataDesignerColumnType.CUSTOM)
|
|
206
207
|
)
|
|
207
208
|
if len(non_code_columns) > 0:
|
|
208
209
|
table = Table(title="Generated Columns", **table_kws)
|
|
@@ -215,6 +216,11 @@ def display_sample_record(
|
|
|
215
216
|
get_truncated_list_as_string(embd) for embd in record[col.name].get("embeddings")
|
|
216
217
|
]
|
|
217
218
|
table.add_row(col.name, convert_to_row_element(record[col.name]))
|
|
219
|
+
# Also display side_effect_columns for custom generators
|
|
220
|
+
if col.column_type == DataDesignerColumnType.CUSTOM:
|
|
221
|
+
for output_col in col.side_effect_columns:
|
|
222
|
+
if output_col in record:
|
|
223
|
+
table.add_row(output_col, convert_to_row_element(record[output_col]))
|
|
218
224
|
render_list.append(pad_console_element(table))
|
|
219
225
|
|
|
220
226
|
for col in config_builder.get_columns_of_type(DataDesignerColumnType.LLM_CODE):
|
|
@@ -2,22 +2,24 @@ data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
|
|
|
2
2
|
data_designer/lazy_heavy_imports.py,sha256=5X04vUj9TYbKgfDmY2qvhzRf5-IZWKOanIpi3_u7fmM,1662
|
|
3
3
|
data_designer/logging.py,sha256=Xq2cRwxmDJ-r8_s9NWnk15efLRsrKm5iVScHy6HkjiE,6044
|
|
4
4
|
data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
|
|
5
|
-
data_designer/config/__init__.py,sha256=
|
|
6
|
-
data_designer/config/
|
|
7
|
-
data_designer/config/
|
|
8
|
-
data_designer/config/
|
|
9
|
-
data_designer/config/
|
|
10
|
-
data_designer/config/
|
|
11
|
-
data_designer/config/data_designer_config.py,sha256=
|
|
5
|
+
data_designer/config/__init__.py,sha256=Joc1wgUNdtQROIW4x3si5_B1yZVVrM_wRBBQ3VREECU,10192
|
|
6
|
+
data_designer/config/base.py,sha256=uiZRsDkDx5Gt_Bd54YIzo8as0mZPCxaUrnCMlgRQ8YY,2456
|
|
7
|
+
data_designer/config/column_configs.py,sha256=ghYwuwVplAvihHu2ZFQvwYubI8r2nILUHnsPHI3GAbI,25769
|
|
8
|
+
data_designer/config/column_types.py,sha256=Su-wnzUMKZwTMxn4bKrAMrv6m7s-2AlRJV9WqMPvQOY,5566
|
|
9
|
+
data_designer/config/config_builder.py,sha256=msvu_PE0MfLtRFNBHQgO5fx7cF4fhm1rM2Vj9TowdUM,28623
|
|
10
|
+
data_designer/config/custom_column.py,sha256=Ks5DX2SMx3jTUkitarAlvR7hxS-rONAy9UYZCLcZC8c,2515
|
|
11
|
+
data_designer/config/data_designer_config.py,sha256=HnTaBYwNiD37_3aHVTDRqCjCDYVUCq7FIqKiNOT2Fdo,2170
|
|
12
12
|
data_designer/config/dataset_builders.py,sha256=jdCujJYFlKAiSkPNX2Qeyrs683GrRcCDv_m8ZZhtg64,368
|
|
13
13
|
data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhFMy9vvbUwMV4,647
|
|
14
14
|
data_designer/config/default_model_settings.py,sha256=c-llH2otfG0tMCMsxoz3ZcS1nFxIQQPfRedFXAydDbc,4868
|
|
15
15
|
data_designer/config/errors.py,sha256=JhvUYecfLmP0gZjQzqA3OmfaSs9TRlC5E-ubnV_-3gs,560
|
|
16
|
+
data_designer/config/exportable_config.py,sha256=634mOo2yB84Hq9S-oiAuJbomFgJGvSEqep2VoRZcMEg,2214
|
|
16
17
|
data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75IqdpFLfk,1676
|
|
18
|
+
data_designer/config/mcp.py,sha256=GqDWPieqzMWp6Nj5ikfHSzB1_w3pfjg8NA_QrLjrv9o,4295
|
|
17
19
|
data_designer/config/models.py,sha256=_NctRk4brgBeb5q5V7r_hXE5OORlLh6SCVZP0eu2LGo,16721
|
|
18
20
|
data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
|
|
19
21
|
data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
|
|
20
|
-
data_designer/config/run_config.py,sha256=
|
|
22
|
+
data_designer/config/run_config.py,sha256=FjZi9DMePGJiGyXG9-XXAfg4enb2fs0fJQR47HQLP88,2668
|
|
21
23
|
data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
|
|
22
24
|
data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
|
|
23
25
|
data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
|
|
@@ -33,18 +35,19 @@ data_designer/config/analysis/utils/reporting.py,sha256=teTzd1OHtpI4vbIinGOGsKXy
|
|
|
33
35
|
data_designer/config/testing/__init__.py,sha256=vxFrIOqDoDfOx-MWjC5lb_hvmB4kRKvh1QdTv--QYFM,222
|
|
34
36
|
data_designer/config/testing/fixtures.py,sha256=J1bcWjerAIoVUIZBVPbUcuvEa2laj_kspVcLS7UZMbo,10876
|
|
35
37
|
data_designer/config/utils/code_lang.py,sha256=nUeWjuzSYBVF5gwOiUE2-EsYCEDzRZaw31RIivt7GPI,2638
|
|
36
|
-
data_designer/config/utils/constants.py,sha256=
|
|
38
|
+
data_designer/config/utils/constants.py,sha256=FYQgrBuX6JVrLnMjNk-TSmbcvgOofBt06RDvgTew0a4,9246
|
|
37
39
|
data_designer/config/utils/errors.py,sha256=HCjer0YrF0bMn5j8gmgWaLb0395LAr_hxMD1ftOsOc8,520
|
|
38
40
|
data_designer/config/utils/info.py,sha256=yOa4U8kI_CY4OfCKZxCm2okU8klAiThvyjKM5tG-F0A,3469
|
|
39
41
|
data_designer/config/utils/io_helpers.py,sha256=kzvOR7QgqijkqU-O2enIlpCWwHvzc3oRaEl4Lsjh1Do,8466
|
|
40
42
|
data_designer/config/utils/misc.py,sha256=7n_0txc78IoK6V39CwZY-65KtYcjh38WDl0Q1bQM-EA,2481
|
|
41
43
|
data_designer/config/utils/numerical_helpers.py,sha256=DIubKzc8q2_Bw7xRjyOGwxYulTV3dt3JxCdpH560dak,838
|
|
44
|
+
data_designer/config/utils/trace_type.py,sha256=PtUXGdM-9n77M83sJ7gaYknj1KrFfjGDVUksO7g4X3s,830
|
|
42
45
|
data_designer/config/utils/type_helpers.py,sha256=XyVup24F4Bl7uNze_yUW9oD6EzFbfsJWKhpeMN2901A,4059
|
|
43
|
-
data_designer/config/utils/visualization.py,sha256=
|
|
46
|
+
data_designer/config/utils/visualization.py,sha256=clfDqFOsny6aEHW_dyMmItVnjPv93sAX_3RIlmGQJOc,18919
|
|
44
47
|
data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8tPWMI,275
|
|
45
48
|
data_designer/plugins/errors.py,sha256=d7FMed3ueQvZHwuhwyPLzF4E34bO1mdj3aBVEw6p34o,386
|
|
46
49
|
data_designer/plugins/plugin.py,sha256=TVyyOaQBWAt0FQwUmtihTZ9MDJD85HwggrQ3L9CviPQ,5367
|
|
47
50
|
data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQRE_dY,3053
|
|
48
|
-
data_designer_config-0.
|
|
49
|
-
data_designer_config-0.
|
|
50
|
-
data_designer_config-0.
|
|
51
|
+
data_designer_config-0.5.0rc1.dist-info/METADATA,sha256=Jl2IswRqm6fHQra4g44_tUGMkGCypsGskTBPWhnLQr8,2286
|
|
52
|
+
data_designer_config-0.5.0rc1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
53
|
+
data_designer_config-0.5.0rc1.dist-info/RECORD,,
|
data_designer/config/_version.py
DELETED
|
@@ -1,34 +0,0 @@
|
|
|
1
|
-
# file generated by setuptools-scm
|
|
2
|
-
# don't change, don't track in version control
|
|
3
|
-
|
|
4
|
-
__all__ = [
|
|
5
|
-
"__version__",
|
|
6
|
-
"__version_tuple__",
|
|
7
|
-
"version",
|
|
8
|
-
"version_tuple",
|
|
9
|
-
"__commit_id__",
|
|
10
|
-
"commit_id",
|
|
11
|
-
]
|
|
12
|
-
|
|
13
|
-
TYPE_CHECKING = False
|
|
14
|
-
if TYPE_CHECKING:
|
|
15
|
-
from typing import Tuple
|
|
16
|
-
from typing import Union
|
|
17
|
-
|
|
18
|
-
VERSION_TUPLE = Tuple[Union[int, str], ...]
|
|
19
|
-
COMMIT_ID = Union[str, None]
|
|
20
|
-
else:
|
|
21
|
-
VERSION_TUPLE = object
|
|
22
|
-
COMMIT_ID = object
|
|
23
|
-
|
|
24
|
-
version: str
|
|
25
|
-
__version__: str
|
|
26
|
-
__version_tuple__: VERSION_TUPLE
|
|
27
|
-
version_tuple: VERSION_TUPLE
|
|
28
|
-
commit_id: COMMIT_ID
|
|
29
|
-
__commit_id__: COMMIT_ID
|
|
30
|
-
|
|
31
|
-
__version__ = version = '0.4.0rc3'
|
|
32
|
-
__version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
|
|
33
|
-
|
|
34
|
-
__commit_id__ = commit_id = None
|
|
File without changes
|