data-designer-config 0.4.0rc3__py3-none-any.whl → 0.5.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,109 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ from typing import Annotated, Literal
7
+
8
+ from pydantic import Field
9
+ from typing_extensions import TypeAlias
10
+
11
+ from data_designer.config.base import ConfigBase
12
+
13
+
14
+ class MCPProvider(ConfigBase):
15
+ """Configuration for a remote MCP server connection.
16
+
17
+ MCPProvider is used to connect to pre-existing MCP servers via SSE (Server-Sent Events)
18
+ transport. For local subprocess-based MCP servers, use LocalStdioMCPProvider instead.
19
+
20
+ Attributes:
21
+ name (str): Unique name used to reference this MCP provider.
22
+ endpoint (str): SSE endpoint URL for connecting to the remote MCP server.
23
+ api_key (str | None): Optional API key for authentication. Defaults to None.
24
+ provider_type (Literal["sse"]): Transport type discriminator, always "sse".
25
+
26
+ Examples:
27
+ Remote SSE transport:
28
+
29
+ >>> MCPProvider(
30
+ ... name="remote-mcp",
31
+ ... endpoint="http://localhost:8080/sse",
32
+ ... api_key="your-api-key",
33
+ ... )
34
+ """
35
+
36
+ provider_type: Literal["sse"] = "sse"
37
+ name: str
38
+ endpoint: str
39
+ api_key: str | None = None
40
+
41
+
42
+ class LocalStdioMCPProvider(ConfigBase):
43
+ """Configuration for launching a local MCP server via stdio transport.
44
+
45
+ LocalStdioMCPProvider is used to launch MCP servers as subprocesses using stdio
46
+ for communication. For connecting to remote/pre-existing MCP servers, use MCPProvider instead.
47
+
48
+ Attributes:
49
+ name (str): Unique name used to reference this MCP provider.
50
+ command (str): Executable to launch the MCP server via stdio transport.
51
+ args (list[str]): Arguments passed to the MCP server executable. Defaults to [].
52
+ env (dict[str, str]): Environment variables passed to the MCP server subprocess. Defaults to {}.
53
+ provider_type (Literal["stdio"]): Transport type discriminator, always "stdio".
54
+
55
+ Examples:
56
+ Stdio (subprocess) transport:
57
+
58
+ >>> LocalStdioMCPProvider(
59
+ ... name="demo-mcp",
60
+ ... command="python",
61
+ ... args=["-m", "data_designer_e2e_tests.mcp_demo_server"],
62
+ ... env={"PYTHONPATH": "/path/to/project"},
63
+ ... )
64
+ """
65
+
66
+ provider_type: Literal["stdio"] = "stdio"
67
+ name: str
68
+ command: str
69
+ args: list[str] = Field(default_factory=list)
70
+ env: dict[str, str] = Field(default_factory=dict)
71
+
72
+
73
+ MCPProviderT: TypeAlias = Annotated[MCPProvider | LocalStdioMCPProvider, Field(discriminator="provider_type")]
74
+
75
+
76
+ class ToolConfig(ConfigBase):
77
+ """Configuration for permitting MCP tools on an LLM column.
78
+
79
+ ToolConfig defines which tools are available for use during LLM generation.
80
+ It references one or more MCP providers by name and can optionally restrict
81
+ which tools from those providers are permitted.
82
+
83
+ Attributes:
84
+ tool_alias (str): User-defined alias to reference this tool configuration in column configs.
85
+ providers (list[str]): Names of the MCP providers to use for tool calls. Tools can be
86
+ drawn from multiple providers.
87
+ allow_tools (list[str] | None): Optional allowlist of tool names that restricts which
88
+ tools are permitted. If None, all tools from the specified providers are allowed.
89
+ Defaults to None.
90
+ max_tool_call_turns (int): Maximum number of tool-calling turns permitted in a single
91
+ generation. A turn is one iteration where the LLM requests tool calls. With parallel
92
+ tool calling, a single turn may execute multiple tools simultaneously. Defaults to 5.
93
+ timeout_sec (float | None): Timeout in seconds for MCP tool calls. Defaults to None (no timeout).
94
+
95
+ Examples:
96
+ >>> ToolConfig(
97
+ ... tool_alias="search-tools",
98
+ ... providers=["doc-search-mcp", "web-search-mcp"],
99
+ ... allow_tools=["search_docs", "list_docs"],
100
+ ... max_tool_call_turns=10,
101
+ ... timeout_sec=30.0,
102
+ ... )
103
+ """
104
+
105
+ tool_alias: str
106
+ providers: list[str]
107
+ allow_tools: list[str] | None = None
108
+ max_tool_call_turns: int = Field(default=5, ge=1)
109
+ timeout_sec: float | None = Field(default=None, gt=0)
@@ -1,4 +1,4 @@
1
- # SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
2
  # SPDX-License-Identifier: Apache-2.0
3
3
 
4
4
  from __future__ import annotations
@@ -33,10 +33,6 @@ class RunConfig(ConfigBase):
33
33
  max_conversation_correction_steps: Maximum number of correction rounds permitted within a
34
34
  single conversation when generation tasks call `ModelFacade.generate(...)`. Must be >= 0.
35
35
  Default is 0.
36
- debug_override_save_all_column_traces: If True, overrides per-column `with_trace` settings
37
- and includes `__trace` columns for ALL LLM generations, containing the full ordered
38
- message history (system/user/assistant) for the final generation attempt.
39
- Useful for debugging. Default is False.
40
36
  """
41
37
 
42
38
  disable_early_shutdown: bool = False
@@ -46,7 +42,6 @@ class RunConfig(ConfigBase):
46
42
  non_inference_max_parallel_workers: int = Field(default=4, ge=1)
47
43
  max_conversation_restarts: int = Field(default=5, ge=0)
48
44
  max_conversation_correction_steps: int = Field(default=0, ge=0)
49
- debug_override_save_all_column_traces: bool = False
50
45
 
51
46
  @model_validator(mode="after")
52
47
  def normalize_shutdown_settings(self) -> Self:
@@ -167,6 +167,7 @@ MAX_TOP_P = 1.0
167
167
  MIN_TOP_P = 0.0
168
168
  MIN_MAX_TOKENS = 1
169
169
  TRACE_COLUMN_POSTFIX = "__trace"
170
+ REASONING_CONTENT_COLUMN_POSTFIX = "__reasoning_content"
170
171
 
171
172
  AVAILABLE_LOCALES = [
172
173
  "ar_AA",
@@ -276,6 +277,14 @@ MODEL_PROVIDERS_FILE_NAME = "model_providers.yaml"
276
277
 
277
278
  MODEL_PROVIDERS_FILE_PATH = DATA_DESIGNER_HOME / MODEL_PROVIDERS_FILE_NAME
278
279
 
280
+ MCP_PROVIDERS_FILE_NAME = "mcp_providers.yaml"
281
+
282
+ MCP_PROVIDERS_FILE_PATH = DATA_DESIGNER_HOME / MCP_PROVIDERS_FILE_NAME
283
+
284
+ TOOL_CONFIGS_FILE_NAME = "tool_configs.yaml"
285
+
286
+ TOOL_CONFIGS_FILE_PATH = DATA_DESIGNER_HOME / TOOL_CONFIGS_FILE_NAME
287
+
279
288
  NVIDIA_PROVIDER_NAME = "nvidia"
280
289
 
281
290
  NVIDIA_API_KEY_ENV_VAR_NAME = "NVIDIA_API_KEY"
@@ -0,0 +1,24 @@
1
+ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ from __future__ import annotations
5
+
6
+ from data_designer.config.utils.type_helpers import StrEnum
7
+
8
+
9
+ class TraceType(StrEnum):
10
+ """Specifies the type of reasoning trace to capture for LLM columns.
11
+
12
+ Traces capture the conversation history during LLM generation, which is
13
+ useful for debugging, analysis, and understanding model behavior.
14
+
15
+ Attributes:
16
+ NONE: No trace is captured. This is the default.
17
+ LAST_MESSAGE: Only the final assistant message is captured.
18
+ ALL_MESSAGES: The full conversation history (system/user/assistant/tool)
19
+ is captured.
20
+ """
21
+
22
+ NONE = "none"
23
+ LAST_MESSAGE = "last_message"
24
+ ALL_MESSAGES = "all_messages"
@@ -203,6 +203,7 @@ def display_sample_record(
203
203
  + config_builder.get_columns_of_type(DataDesignerColumnType.LLM_TEXT)
204
204
  + config_builder.get_columns_of_type(DataDesignerColumnType.LLM_STRUCTURED)
205
205
  + config_builder.get_columns_of_type(DataDesignerColumnType.EMBEDDING)
206
+ + config_builder.get_columns_of_type(DataDesignerColumnType.CUSTOM)
206
207
  )
207
208
  if len(non_code_columns) > 0:
208
209
  table = Table(title="Generated Columns", **table_kws)
@@ -215,6 +216,11 @@ def display_sample_record(
215
216
  get_truncated_list_as_string(embd) for embd in record[col.name].get("embeddings")
216
217
  ]
217
218
  table.add_row(col.name, convert_to_row_element(record[col.name]))
219
+ # Also display side_effect_columns for custom generators
220
+ if col.column_type == DataDesignerColumnType.CUSTOM:
221
+ for output_col in col.side_effect_columns:
222
+ if output_col in record:
223
+ table.add_row(output_col, convert_to_row_element(record[output_col]))
218
224
  render_list.append(pad_console_element(table))
219
225
 
220
226
  for col in config_builder.get_columns_of_type(DataDesignerColumnType.LLM_CODE):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-designer-config
3
- Version: 0.4.0rc3
3
+ Version: 0.5.0rc1
4
4
  Summary: Configuration layer for DataDesigner synthetic data generation
5
5
  License-Expression: Apache-2.0
6
6
  Classifier: Development Status :: 4 - Beta
@@ -2,22 +2,24 @@ data_designer/errors.py,sha256=r1pBvmvRBAsPmb7oF_veubhkxZ2uPo9cGEDwykLziX4,220
2
2
  data_designer/lazy_heavy_imports.py,sha256=5X04vUj9TYbKgfDmY2qvhzRf5-IZWKOanIpi3_u7fmM,1662
3
3
  data_designer/logging.py,sha256=Xq2cRwxmDJ-r8_s9NWnk15efLRsrKm5iVScHy6HkjiE,6044
4
4
  data_designer/plugin_manager.py,sha256=C2ZkZiXlcMRiaxfrrho5Shz6DKdExVeBha7ch-d4CnU,2695
5
- data_designer/config/__init__.py,sha256=MWzRZhXA41sTpc0sL_xq2baA3kSlV37alT6g8RlP8dU,4919
6
- data_designer/config/_version.py,sha256=DTeqqEr2nu3d0J3qZZfx7FpCHf1Ai3akZDbCReLKmgY,714
7
- data_designer/config/base.py,sha256=IGj6sy_GnKzC94uu2rdxe12EqR_AmGJ6O3rl2MxOv6g,2449
8
- data_designer/config/column_configs.py,sha256=QEHXbxljbGEfOEnzNsiR3_CRpaCukQsayBbHQyhMhbc,20720
9
- data_designer/config/column_types.py,sha256=xGXuu0EBy3Y5Jd74f2VM6x5jHq72GmK9leA6qOnAz8c,5423
10
- data_designer/config/config_builder.py,sha256=vuPibkodbJxbCXdaI1tt1Uyo1SVCnAOfLBAW1AmhajI,24707
11
- data_designer/config/data_designer_config.py,sha256=qOojviug05vHR2S4800sjd4OmxhSVi6kB8SAFXLlPog,1891
5
+ data_designer/config/__init__.py,sha256=Joc1wgUNdtQROIW4x3si5_B1yZVVrM_wRBBQ3VREECU,10192
6
+ data_designer/config/base.py,sha256=uiZRsDkDx5Gt_Bd54YIzo8as0mZPCxaUrnCMlgRQ8YY,2456
7
+ data_designer/config/column_configs.py,sha256=ghYwuwVplAvihHu2ZFQvwYubI8r2nILUHnsPHI3GAbI,25769
8
+ data_designer/config/column_types.py,sha256=Su-wnzUMKZwTMxn4bKrAMrv6m7s-2AlRJV9WqMPvQOY,5566
9
+ data_designer/config/config_builder.py,sha256=msvu_PE0MfLtRFNBHQgO5fx7cF4fhm1rM2Vj9TowdUM,28623
10
+ data_designer/config/custom_column.py,sha256=Ks5DX2SMx3jTUkitarAlvR7hxS-rONAy9UYZCLcZC8c,2515
11
+ data_designer/config/data_designer_config.py,sha256=HnTaBYwNiD37_3aHVTDRqCjCDYVUCq7FIqKiNOT2Fdo,2170
12
12
  data_designer/config/dataset_builders.py,sha256=jdCujJYFlKAiSkPNX2Qeyrs683GrRcCDv_m8ZZhtg64,368
13
13
  data_designer/config/dataset_metadata.py,sha256=UTlEgnHWgjwPuc7bP95T7gaKmcr7pIhFMy9vvbUwMV4,647
14
14
  data_designer/config/default_model_settings.py,sha256=c-llH2otfG0tMCMsxoz3ZcS1nFxIQQPfRedFXAydDbc,4868
15
15
  data_designer/config/errors.py,sha256=JhvUYecfLmP0gZjQzqA3OmfaSs9TRlC5E-ubnV_-3gs,560
16
+ data_designer/config/exportable_config.py,sha256=634mOo2yB84Hq9S-oiAuJbomFgJGvSEqep2VoRZcMEg,2214
16
17
  data_designer/config/interface.py,sha256=ikmpm_KwencTpM-yg0auo7XMgcmMSa67S75IqdpFLfk,1676
18
+ data_designer/config/mcp.py,sha256=GqDWPieqzMWp6Nj5ikfHSzB1_w3pfjg8NA_QrLjrv9o,4295
17
19
  data_designer/config/models.py,sha256=_NctRk4brgBeb5q5V7r_hXE5OORlLh6SCVZP0eu2LGo,16721
18
20
  data_designer/config/preview_results.py,sha256=WnPlDcHElIHNfjV_P-nLu_Dpul8D3Eyb5qyi3E173Gs,1744
19
21
  data_designer/config/processors.py,sha256=lnyUZA1EhO9NWjjVFFioYxSgeYpoAaM1J7UzwOYkvms,6028
20
- data_designer/config/run_config.py,sha256=m_rrqEmNHR533AYJ_OR5yq0a9Pegy9vPGZgyfD4x9cI,3052
22
+ data_designer/config/run_config.py,sha256=FjZi9DMePGJiGyXG9-XXAfg4enb2fs0fJQR47HQLP88,2668
21
23
  data_designer/config/sampler_constraints.py,sha256=tQI1XLF5bS4TnyKMLo0nArvefnXI8dWCzov38r4qNCQ,1197
22
24
  data_designer/config/sampler_params.py,sha256=Gio-53vjSYOdPhF2CEq4HSWCXCaZMy4WpGPbuFVcWOM,27965
23
25
  data_designer/config/seed.py,sha256=eShSqOcSUzfCEZBnqY-rB0qZpRGxjeOE3fSaJAwacec,4668
@@ -33,18 +35,19 @@ data_designer/config/analysis/utils/reporting.py,sha256=teTzd1OHtpI4vbIinGOGsKXy
33
35
  data_designer/config/testing/__init__.py,sha256=vxFrIOqDoDfOx-MWjC5lb_hvmB4kRKvh1QdTv--QYFM,222
34
36
  data_designer/config/testing/fixtures.py,sha256=J1bcWjerAIoVUIZBVPbUcuvEa2laj_kspVcLS7UZMbo,10876
35
37
  data_designer/config/utils/code_lang.py,sha256=nUeWjuzSYBVF5gwOiUE2-EsYCEDzRZaw31RIivt7GPI,2638
36
- data_designer/config/utils/constants.py,sha256=lprfeF_bIzGJ_oGrZBhvHEbLVgrGfFtVbCdWJHf_6B8,8953
38
+ data_designer/config/utils/constants.py,sha256=FYQgrBuX6JVrLnMjNk-TSmbcvgOofBt06RDvgTew0a4,9246
37
39
  data_designer/config/utils/errors.py,sha256=HCjer0YrF0bMn5j8gmgWaLb0395LAr_hxMD1ftOsOc8,520
38
40
  data_designer/config/utils/info.py,sha256=yOa4U8kI_CY4OfCKZxCm2okU8klAiThvyjKM5tG-F0A,3469
39
41
  data_designer/config/utils/io_helpers.py,sha256=kzvOR7QgqijkqU-O2enIlpCWwHvzc3oRaEl4Lsjh1Do,8466
40
42
  data_designer/config/utils/misc.py,sha256=7n_0txc78IoK6V39CwZY-65KtYcjh38WDl0Q1bQM-EA,2481
41
43
  data_designer/config/utils/numerical_helpers.py,sha256=DIubKzc8q2_Bw7xRjyOGwxYulTV3dt3JxCdpH560dak,838
44
+ data_designer/config/utils/trace_type.py,sha256=PtUXGdM-9n77M83sJ7gaYknj1KrFfjGDVUksO7g4X3s,830
42
45
  data_designer/config/utils/type_helpers.py,sha256=XyVup24F4Bl7uNze_yUW9oD6EzFbfsJWKhpeMN2901A,4059
43
- data_designer/config/utils/visualization.py,sha256=_0Mn-jva0Oz1tVTQH1mnWSARpqZ2kh1JSzJEuikyy9s,18491
46
+ data_designer/config/utils/visualization.py,sha256=clfDqFOsny6aEHW_dyMmItVnjPv93sAX_3RIlmGQJOc,18919
44
47
  data_designer/plugins/__init__.py,sha256=qe1alcTEtnMSMdzknjb57vvjqKgFE5cEHXxBj8tPWMI,275
45
48
  data_designer/plugins/errors.py,sha256=d7FMed3ueQvZHwuhwyPLzF4E34bO1mdj3aBVEw6p34o,386
46
49
  data_designer/plugins/plugin.py,sha256=TVyyOaQBWAt0FQwUmtihTZ9MDJD85HwggrQ3L9CviPQ,5367
47
50
  data_designer/plugins/registry.py,sha256=Cnt33Q25o9bS2v2YDbV3QPM57VNrtIBKAb4ERQRE_dY,3053
48
- data_designer_config-0.4.0rc3.dist-info/METADATA,sha256=7FQwABHHNXZOEomFztSQi4heVz8ioAouK_vIhreW0Tw,2286
49
- data_designer_config-0.4.0rc3.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
50
- data_designer_config-0.4.0rc3.dist-info/RECORD,,
51
+ data_designer_config-0.5.0rc1.dist-info/METADATA,sha256=Jl2IswRqm6fHQra4g44_tUGMkGCypsGskTBPWhnLQr8,2286
52
+ data_designer_config-0.5.0rc1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
53
+ data_designer_config-0.5.0rc1.dist-info/RECORD,,
@@ -1,34 +0,0 @@
1
- # file generated by setuptools-scm
2
- # don't change, don't track in version control
3
-
4
- __all__ = [
5
- "__version__",
6
- "__version_tuple__",
7
- "version",
8
- "version_tuple",
9
- "__commit_id__",
10
- "commit_id",
11
- ]
12
-
13
- TYPE_CHECKING = False
14
- if TYPE_CHECKING:
15
- from typing import Tuple
16
- from typing import Union
17
-
18
- VERSION_TUPLE = Tuple[Union[int, str], ...]
19
- COMMIT_ID = Union[str, None]
20
- else:
21
- VERSION_TUPLE = object
22
- COMMIT_ID = object
23
-
24
- version: str
25
- __version__: str
26
- __version_tuple__: VERSION_TUPLE
27
- version_tuple: VERSION_TUPLE
28
- commit_id: COMMIT_ID
29
- __commit_id__: COMMIT_ID
30
-
31
- __version__ = version = '0.4.0rc3'
32
- __version_tuple__ = version_tuple = (0, 4, 0, 'rc3')
33
-
34
- __commit_id__ = commit_id = None