datarobot-genai 0.2.37__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. datarobot_genai/core/agents/__init__.py +1 -1
  2. datarobot_genai/core/agents/base.py +5 -2
  3. datarobot_genai/core/chat/responses.py +6 -1
  4. datarobot_genai/core/utils/auth.py +188 -31
  5. datarobot_genai/crewai/__init__.py +1 -4
  6. datarobot_genai/crewai/agent.py +150 -17
  7. datarobot_genai/crewai/events.py +11 -4
  8. datarobot_genai/drmcp/__init__.py +4 -2
  9. datarobot_genai/drmcp/core/config.py +21 -1
  10. datarobot_genai/drmcp/core/mcp_instance.py +5 -49
  11. datarobot_genai/drmcp/core/routes.py +108 -13
  12. datarobot_genai/drmcp/core/tool_config.py +16 -0
  13. datarobot_genai/drmcp/core/utils.py +110 -0
  14. datarobot_genai/drmcp/test_utils/tool_base_ete.py +41 -26
  15. datarobot_genai/drmcp/tools/clients/gdrive.py +2 -0
  16. datarobot_genai/drmcp/tools/clients/microsoft_graph.py +141 -0
  17. datarobot_genai/drmcp/tools/clients/perplexity.py +173 -0
  18. datarobot_genai/drmcp/tools/clients/tavily.py +199 -0
  19. datarobot_genai/drmcp/tools/confluence/tools.py +43 -94
  20. datarobot_genai/drmcp/tools/gdrive/tools.py +44 -133
  21. datarobot_genai/drmcp/tools/jira/tools.py +19 -41
  22. datarobot_genai/drmcp/tools/microsoft_graph/tools.py +201 -32
  23. datarobot_genai/drmcp/tools/perplexity/__init__.py +0 -0
  24. datarobot_genai/drmcp/tools/perplexity/tools.py +117 -0
  25. datarobot_genai/drmcp/tools/predictive/data.py +1 -9
  26. datarobot_genai/drmcp/tools/predictive/deployment.py +0 -8
  27. datarobot_genai/drmcp/tools/predictive/deployment_info.py +91 -117
  28. datarobot_genai/drmcp/tools/predictive/model.py +0 -21
  29. datarobot_genai/drmcp/tools/predictive/predict_realtime.py +3 -0
  30. datarobot_genai/drmcp/tools/predictive/project.py +3 -19
  31. datarobot_genai/drmcp/tools/predictive/training.py +1 -19
  32. datarobot_genai/drmcp/tools/tavily/__init__.py +13 -0
  33. datarobot_genai/drmcp/tools/tavily/tools.py +141 -0
  34. datarobot_genai/langgraph/agent.py +10 -2
  35. datarobot_genai/llama_index/__init__.py +1 -1
  36. datarobot_genai/llama_index/agent.py +284 -5
  37. datarobot_genai/nat/agent.py +17 -6
  38. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/METADATA +3 -1
  39. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/RECORD +43 -40
  40. datarobot_genai/crewai/base.py +0 -159
  41. datarobot_genai/drmcp/core/tool_filter.py +0 -117
  42. datarobot_genai/llama_index/base.py +0 -299
  43. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/WHEEL +0 -0
  44. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/entry_points.txt +0 -0
  45. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/licenses/AUTHORS +0 -0
  46. {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,117 @@
1
+ # Copyright 2026 DataRobot, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Perplexity MCP tools."""
16
+
17
+ import logging
18
+ from typing import Annotated
19
+ from typing import Literal
20
+
21
+ from fastmcp.exceptions import ToolError
22
+ from fastmcp.tools.tool import ToolResult
23
+
24
+ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
25
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_QUERIES
26
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS
27
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS_DEFAULT
28
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_SEARCH_DOMAIN_FILTER
29
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE
30
+ from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE_DEFAULT
31
+ from datarobot_genai.drmcp.tools.clients.perplexity import PerplexityClient
32
+ from datarobot_genai.drmcp.tools.clients.perplexity import get_perplexity_access_token
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+
37
+ @dr_mcp_tool(tags={"perplexity", "web", "search", "websearch"})
38
+ async def perplexity_search(
39
+ *,
40
+ query: Annotated[
41
+ str,
42
+ list[str],
43
+ f"The search query string OR "
44
+ f"a list of up to {MAX_QUERIES} sub-queries for multi-query research.",
45
+ ],
46
+ search_domain_filter: Annotated[
47
+ list[str] | None,
48
+ f"Up to {MAX_SEARCH_DOMAIN_FILTER} domains/URLs "
49
+ f"to allowlist or denylist (prefix with '-').",
50
+ ] = None,
51
+ recency: Annotated[
52
+ Literal["day", "week", "month", "year"] | None, "Filter results by time period."
53
+ ] = None,
54
+ max_results: Annotated[
55
+ int, f"Number of ranked results to return (1-{MAX_RESULTS})."
56
+ ] = MAX_RESULTS_DEFAULT,
57
+ max_tokens_per_page: Annotated[
58
+ int,
59
+ f"Content extraction cap per page (1-{MAX_TOKENS_PER_PAGE}) "
60
+ f"(default {MAX_TOKENS_PER_PAGE_DEFAULT}).",
61
+ ] = MAX_TOKENS_PER_PAGE_DEFAULT,
62
+ ) -> ToolResult:
63
+ """Perplexity web search tool combining multi-query research and content extraction control."""
64
+ if not query:
65
+ raise ToolError("Argument validation error: query cannot be empty.")
66
+ if query and isinstance(query, str) and not query.strip():
67
+ raise ToolError("Argument validation error: query cannot be empty.")
68
+ if query and isinstance(query, list) and len(query) > MAX_QUERIES:
69
+ raise ToolError(
70
+ f"Argument validation error: query list cannot be bigger than {MAX_QUERIES}."
71
+ )
72
+ if query and isinstance(query, list) and not all(q.strip() for q in query):
73
+ raise ToolError("Argument validation error: query cannot contain empty str.")
74
+ if search_domain_filter and len(search_domain_filter) > MAX_SEARCH_DOMAIN_FILTER:
75
+ raise ToolError(
76
+ f"Argument validation error: "
77
+ f"maximum number of search domain filters is {MAX_SEARCH_DOMAIN_FILTER}."
78
+ )
79
+ if max_results <= 0:
80
+ raise ToolError("Argument validation error: max_results must be greater than 0.")
81
+ if max_results > MAX_RESULTS:
82
+ raise ToolError(
83
+ f"Argument validation error: "
84
+ f"max_results must be smaller than or equal to {MAX_RESULTS}."
85
+ )
86
+ if max_tokens_per_page <= 0:
87
+ raise ToolError("Argument validation error: max_tokens_per_page must be greater than 0.")
88
+ if max_tokens_per_page > MAX_TOKENS_PER_PAGE:
89
+ raise ToolError(
90
+ f"Argument validation error: "
91
+ f"max_tokens_per_page must be smaller than or equal to {MAX_TOKENS_PER_PAGE}."
92
+ )
93
+
94
+ access_token = await get_perplexity_access_token()
95
+ if isinstance(access_token, ToolError):
96
+ raise access_token
97
+
98
+ async with PerplexityClient(access_token=access_token) as perplexity_client:
99
+ results = await perplexity_client.search(
100
+ query=query,
101
+ search_domain_filter=search_domain_filter,
102
+ recency=recency,
103
+ max_results=max_results,
104
+ max_tokens_per_page=max_tokens_per_page,
105
+ )
106
+
107
+ return ToolResult(
108
+ structured_content={
109
+ "results": results,
110
+ "count": len(results),
111
+ "metadata": {
112
+ "queriesExecuted": len(query) if isinstance(query, list) else 1,
113
+ "filtersApplied": {"domains": search_domain_filter, "recency": recency},
114
+ "extractionLimit": max_tokens_per_page,
115
+ },
116
+ },
117
+ )
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import json
16
15
  import logging
17
16
  import os
18
17
  from typing import Annotated
@@ -60,7 +59,6 @@ async def upload_dataset_to_ai_catalog(
60
59
  raise ToolError("Failed to upload dataset.")
61
60
 
62
61
  return ToolResult(
63
- content=f"Successfully uploaded dataset: {catalog_item.id}",
64
62
  structured_content={
65
63
  "dataset_id": catalog_item.id,
66
64
  "dataset_version_id": catalog_item.version_id,
@@ -78,21 +76,15 @@ async def list_ai_catalog_items() -> ToolResult:
78
76
  if not datasets:
79
77
  logger.info("No AI Catalog items found")
80
78
  return ToolResult(
81
- content="No AI Catalog items found.",
82
79
  structured_content={"datasets": []},
83
80
  )
84
81
 
85
82
  datasets_dict = {ds.id: ds.name for ds in datasets}
86
- datasets_count = len(datasets)
87
83
 
88
84
  return ToolResult(
89
- content=(
90
- f"Found {datasets_count} AI Catalog items, here are the details:\n"
91
- f"{json.dumps(datasets_dict, indent=2)}"
92
- ),
93
85
  structured_content={
94
86
  "datasets": datasets_dict,
95
- "count": datasets_count,
87
+ "count": len(datasets),
96
88
  },
97
89
  )
98
90
 
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import json
16
15
  import logging
17
16
  from typing import Annotated
18
17
 
@@ -32,12 +31,10 @@ async def list_deployments() -> ToolResult:
32
31
  deployments = client.Deployment.list()
33
32
  if not deployments:
34
33
  return ToolResult(
35
- content="No deployments found.",
36
34
  structured_content={"deployments": []},
37
35
  )
38
36
  deployments_dict = {d.id: d.label for d in deployments}
39
37
  return ToolResult(
40
- content="\n".join(f"{d.id}: {d.label}" for d in deployments),
41
38
  structured_content={"deployments": deployments_dict},
42
39
  )
43
40
 
@@ -54,10 +51,6 @@ async def get_model_info_from_deployment(
54
51
  client = get_sdk_client()
55
52
  deployment = client.Deployment.get(deployment_id)
56
53
  return ToolResult(
57
- content=(
58
- f"Retrieved model info for deployment {deployment_id}, here are the details:\n"
59
- f"{json.dumps(deployment.model, indent=2)}"
60
- ),
61
54
  structured_content=deployment.model,
62
55
  )
63
56
 
@@ -87,7 +80,6 @@ async def deploy_model(
87
80
  default_prediction_server_id=prediction_servers[0].id,
88
81
  )
89
82
  return ToolResult(
90
- content=f"Created deployment {deployment.id} with label {label}",
91
83
  structured_content={
92
84
  "deployment_id": deployment.id,
93
85
  "label": label,
@@ -19,9 +19,13 @@ import json
19
19
  import logging
20
20
  from datetime import datetime
21
21
  from datetime import timedelta
22
+ from typing import Annotated
22
23
  from typing import Any
23
24
 
24
25
  import pandas as pd
26
+ from fastmcp.exceptions import ToolError
27
+ from fastmcp.tools.tool import ToolResult
28
+ from mcp.types import TextContent
25
29
 
26
30
  from datarobot_genai.drmcp.core.clients import get_sdk_client
27
31
  from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
@@ -29,40 +33,18 @@ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
29
33
  logger = logging.getLogger(__name__)
30
34
 
31
35
 
32
- @dr_mcp_tool(tags={"deployment", "info", "metadata"})
33
- async def get_deployment_info(deployment_id: str) -> str:
36
+ @dr_mcp_tool(tags={"predictive", "deployment", "read", "info", "metadata"})
37
+ async def get_deployment_info(
38
+ *,
39
+ deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
40
+ ) -> ToolError | ToolResult:
34
41
  """
35
42
  Retrieve information about the deployment, including the list of
36
43
  features needed to make predictions on this deployment.
37
-
38
- Args:
39
- deployment_id: The ID of the DataRobot deployment
40
-
41
- Returns
42
- -------
43
- JSON string containing model and feature information including:
44
- For datarobot native models will return model information for custom models
45
- this will likely just return features and total_features values.
46
-
47
- - model_type: Type of model
48
- - target: Name of the target feature
49
- - target_type: Type of the target feature
50
- - features: List of features with their importance and type
51
- - total_features: Total number of features
52
- - time_series_config: Time series configuration if applicable
53
-
54
- for features:
55
- - feature_name: Name of the feature
56
- - ``name`` : str, feature name
57
- - ``feature_type`` : str, feature type
58
- - ``importance`` : float, numeric measure of the relationship strength between
59
- the feature and target (independent of model or other features)
60
- - ``date_format`` : str or None, the date format string for how this feature was
61
- interpreted, null if not a date feature, compatible with
62
- https://docs.python.org/2/library/time.html#time.strftime.
63
- - ``known_in_advance`` : bool, whether the feature was selected as known in advance in
64
- a time series model, false for non-time series models.
65
44
  """
45
+ if not deployment_id:
46
+ raise ToolError("Deployment ID must be provided")
47
+
66
48
  client = get_sdk_client()
67
49
  deployment = client.Deployment.get(deployment_id)
68
50
 
@@ -112,40 +94,33 @@ async def get_deployment_info(deployment_id: str) -> str:
112
94
  "series_id_columns": partition.multiseries_id_columns or [],
113
95
  }
114
96
 
115
- return json.dumps(result, indent=2)
97
+ return ToolResult(
98
+ structured_content=result,
99
+ )
116
100
 
117
101
 
118
- @dr_mcp_tool(tags={"deployment", "template", "data"})
119
- async def generate_prediction_data_template(deployment_id: str, n_rows: int = 1) -> str:
120
- """
121
- Generate a template CSV with the correct structure for making predictions.
122
-
123
- This creates a template with:
124
- - All required feature columns in the correct order
125
- - Sample values based on feature types
126
- - Comments explaining each feature
127
- - When using this tool, always consider feature importance. For features with high importance,
128
- try to infer or ask for a reasonable value, using frequent values or domain knowledge if
129
- available. For less important features, you may leave them blank.
130
- - If frequent values are available for a feature, they will be used as sample values;
131
- otherwise, blank fields will be used.
132
- Please note that using frequent values in your predictions data can influence the prediction,
133
- think of it as sending in the average value for the feature. If you don't want this effect on
134
- your predictions leave the field blank you in predictions dataset.
135
-
136
- Args:
137
- deployment_id: The ID of the DataRobot deployment
138
- n_rows: Number of template rows to generate (default 1)
139
-
140
- Returns
141
- -------
142
- CSV template string with sample data ready for predictions
143
- """
102
+ @dr_mcp_tool(tags={"predictive", "deployment", "read", "template", "data"})
103
+ async def generate_prediction_data_template(
104
+ *,
105
+ deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
106
+ n_rows: Annotated[int, "Number of template rows to generate"] = 1,
107
+ ) -> ToolError | ToolResult:
108
+ """Generate a template CSV with the correct structure for making predictions."""
109
+ if not deployment_id:
110
+ raise ToolError("Deployment ID must be provided")
111
+ if n_rows is None or n_rows <= 0:
112
+ n_rows = 1
113
+
144
114
  # Get feature information
145
- features_json = await get_deployment_features(deployment_id)
115
+ features_result = await get_deployment_features(deployment_id=deployment_id)
146
116
  # Add error handling for empty or error responses
117
+ # Extract text content from ToolResult
118
+ if features_result.content and isinstance(features_result.content[0], TextContent):
119
+ features_json = features_result.content[0].text
120
+ else:
121
+ features_json = str(features_result.content)
147
122
  if not features_json or features_json.strip().startswith("Error"):
148
- return f"Error: {features_json}"
123
+ raise ToolError(f"Error with feature information: {features_json}")
149
124
  features_info = json.loads(features_json)
150
125
 
151
126
  # Create template data
@@ -203,64 +178,54 @@ async def generate_prediction_data_template(deployment_id: str, n_rows: int = 1)
203
178
  # Create DataFrame
204
179
  df = pd.DataFrame(template_data)
205
180
 
206
- # Add metadata comments
207
- result = f"# Prediction Data Template for Deployment: {deployment_id}\n"
208
- result += f"# Model Type: {features_info['model_type']}\n"
209
- result += f"# Target: {features_info['target']} (Type: {features_info['target_type']})\n"
181
+ # Build structured content with template data and metadata
182
+ structured_content = {
183
+ "deployment_id": deployment_id,
184
+ "model_type": features_info["model_type"],
185
+ "target": features_info["target"],
186
+ "target_type": features_info["target_type"],
187
+ "total_features": features_info["total_features"],
188
+ "template_data": df.to_dict("records"), # Convert DataFrame to list of dicts
189
+ }
210
190
 
211
191
  if "time_series_config" in features_info:
212
- ts = features_info["time_series_config"]
213
- result += f"# Time Series: datetime_column={ts['datetime_column']}, "
214
- result += f"forecast_window=[{ts['forecast_window_start']}, {ts['forecast_window_end']}]\n"
215
- if ts["series_id_columns"]:
216
- result += f"# Multiseries ID Columns: {', '.join(ts['series_id_columns'])}\n"
217
-
218
- result += f"# Total Features: {features_info['total_features']}\n"
219
- result += df.to_csv(index=False)
192
+ structured_content["time_series_config"] = features_info["time_series_config"]
220
193
 
221
- return str(result)
194
+ return ToolResult(
195
+ structured_content=structured_content,
196
+ )
222
197
 
223
198
 
224
- @dr_mcp_tool(tags={"deployment", "validation", "data"})
199
+ @dr_mcp_tool(tags={"predictive", "deployment", "read", "validation", "data"})
225
200
  async def validate_prediction_data(
226
- deployment_id: str,
227
- file_path: str | None = None,
228
- csv_string: str | None = None,
229
- ) -> str:
230
- """
231
- Validate if a CSV file is suitable for making predictions with a deployment.
232
-
233
- Checks:
234
- - All required features are present
235
- - Feature types match expectations
236
- - Missing values (null, empty string, or blank fields) are allowed and will not cause errors
237
- - No critical issues that would prevent predictions
238
-
239
- Args:
240
- deployment_id: The ID of the DataRobot deployment
241
- file_path: Path to the CSV file to validate (optional if csv_string is provided)
242
- csv_string: CSV data as a string (optional, used if file_path is not provided)
243
-
244
- Returns
245
- -------
246
- Validation report including any errors, warnings, and suggestions
247
- """
201
+ *,
202
+ deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
203
+ file_path: Annotated[
204
+ str, "Path to the CSV file to validate (optional if csv_string is provided)"
205
+ ]
206
+ | None = None,
207
+ csv_string: Annotated[str, "CSV data as a string (optional, used if file_path is not provided)"]
208
+ | None = None,
209
+ ) -> ToolError | ToolResult:
210
+ """Validate if a CSV file is suitable for making predictions with a deployment."""
248
211
  # Load the data
249
212
  if csv_string is not None:
250
213
  df = pd.read_csv(io.StringIO(csv_string))
251
214
  elif file_path is not None:
252
215
  df = pd.read_csv(file_path)
253
216
  else:
254
- return json.dumps(
255
- {
256
- "status": "error",
257
- "error": "Must provide either file_path or csv_string.",
258
- },
259
- indent=2,
260
- )
217
+ raise ToolError("Must provide either file_path or csv_string.")
218
+
219
+ if not deployment_id:
220
+ raise ToolError("Deployment ID must be provided")
261
221
 
262
222
  # Get deployment features
263
- features_json = await get_deployment_features(deployment_id)
223
+ features_result = await get_deployment_features(deployment_id=deployment_id)
224
+ # Extract text content from ToolResult
225
+ if features_result.content and isinstance(features_result.content[0], TextContent):
226
+ features_json = features_result.content[0].text
227
+ else:
228
+ features_json = str(features_result.content)
264
229
  features_info = json.loads(features_json)
265
230
 
266
231
  validation_report: dict[str, Any] = {
@@ -359,22 +324,28 @@ async def validate_prediction_data(
359
324
  "model_type": features_info["model_type"],
360
325
  }
361
326
 
362
- return json.dumps(validation_report, indent=2)
327
+ return ToolResult(
328
+ structured_content=validation_report,
329
+ )
363
330
 
364
331
 
365
- @dr_mcp_tool(tags={"deployment", "features", "info"})
366
- async def get_deployment_features(deployment_id: str) -> str:
367
- """
368
- Retrieve only the features list for a deployment, as JSON string.
369
- Args:
370
- deployment_id: The ID of the DataRobot deployment
371
- Returns:
372
- JSON string containing only the features list and time series config if present.
373
- """
374
- info_json = await get_deployment_info(deployment_id)
332
+ @dr_mcp_tool(tags={"predictive", "deployment", "read", "features", "info"})
333
+ async def get_deployment_features(
334
+ *,
335
+ deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
336
+ ) -> ToolError | ToolResult:
337
+ """Retrieve only the features list for a deployment, as JSON string."""
338
+ if not deployment_id:
339
+ raise ToolError("Deployment ID must be provided")
340
+
341
+ info_result = await get_deployment_info(deployment_id=deployment_id)
342
+ # Extract text content from ToolResult
343
+ if info_result.content and isinstance(info_result.content[0], TextContent):
344
+ info_json = info_result.content[0].text
345
+ else:
346
+ info_json = str(info_result.content)
375
347
  if not info_json.strip().startswith("{"):
376
- # Return a default error JSON
377
- return json.dumps({"features": [], "total_features": 0, "error": info_json}, indent=2)
348
+ raise ToolError(f"Error with deployment info: {info_json}")
378
349
  info = json.loads(info_json)
379
350
  # Only keep features, time_series_config, and total_features
380
351
  result = {
@@ -389,4 +360,7 @@ async def get_deployment_features(deployment_id: str) -> str:
389
360
  result["target"] = info["target"]
390
361
  if "target_type" in info:
391
362
  result["target_type"] = info["target_type"]
392
- return json.dumps(result, indent=2)
363
+
364
+ return ToolResult(
365
+ structured_content=result,
366
+ )
@@ -93,33 +93,17 @@ async def get_best_model(
93
93
  best_model = leaderboard[0]
94
94
  logger.info(f"Found best model {best_model.id} for project {project_id}")
95
95
 
96
- metric_info = ""
97
96
  metric_value = None
98
97
 
99
98
  if metric and best_model.metrics and metric in best_model.metrics:
100
99
  metric_value = best_model.metrics[metric].get("validation")
101
- if metric_value is not None:
102
- metric_info = f" with {metric}: {metric_value:.2f}"
103
100
 
104
101
  # Include full metrics in the response
105
102
  best_model_dict = model_to_dict(best_model)
106
103
  best_model_dict["metric"] = metric
107
104
  best_model_dict["metric_value"] = metric_value
108
105
 
109
- # Format metrics for human-readable content
110
- metrics_text = ""
111
- if best_model.metrics:
112
- metrics_list = []
113
- for metric_name, metric_data in best_model.metrics.items():
114
- if isinstance(metric_data, dict) and "validation" in metric_data:
115
- val = metric_data["validation"]
116
- if val is not None:
117
- metrics_list.append(f"{metric_name}: {val:.4f}")
118
- if metrics_list:
119
- metrics_text = "\nPerformance metrics:\n" + "\n".join(f" - {m}" for m in metrics_list)
120
-
121
106
  return ToolResult(
122
- content=f"Best model: {best_model.model_type}{metric_info}{metrics_text}",
123
107
  structured_content={
124
108
  "project_id": project_id,
125
109
  "best_model": best_model_dict,
@@ -148,7 +132,6 @@ async def score_dataset_with_model(
148
132
  job = model.score(dataset_url)
149
133
 
150
134
  return ToolResult(
151
- content=f"Scoring job started: {job.id}",
152
135
  structured_content={
153
136
  "scoring_job_id": job.id,
154
137
  "project_id": project_id,
@@ -172,10 +155,6 @@ async def list_models(
172
155
  models = project.get_models()
173
156
 
174
157
  return ToolResult(
175
- content=(
176
- f"Found {len(models)} models in project {project_id}, here are the details:\n"
177
- f"{json.dumps(models, indent=2, cls=ModelEncoder)}"
178
- ),
179
158
  structured_content={
180
159
  "project_id": project_id,
181
160
  "models": [model_to_dict(model) for model in models],
@@ -240,6 +240,9 @@ async def predict_realtime(
240
240
  else:
241
241
  raise ValueError("Either file_path or dataset must be provided.")
242
242
 
243
+ # Normalize column names: strip leading/trailing whitespace
244
+ df.columns = df.columns.str.strip()
245
+
243
246
  if series_id_column and series_id_column not in df.columns:
244
247
  raise ValueError(f"series_id_column '{series_id_column}' not found in input data.")
245
248
 
@@ -12,7 +12,6 @@
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
14
 
15
- import json
16
15
  import logging
17
16
  from typing import Annotated
18
17
 
@@ -33,11 +32,6 @@ async def list_projects() -> ToolResult:
33
32
  projects = {p.id: p.project_name for p in projects}
34
33
 
35
34
  return ToolResult(
36
- content=(
37
- json.dumps(projects, indent=2)
38
- if projects
39
- else json.dumps({"message": "No projects found."}, indent=2)
40
- ),
41
35
  structured_content=projects,
42
36
  )
43
37
 
@@ -48,7 +42,7 @@ async def get_project_dataset_by_name(
48
42
  project_id: Annotated[str, "The ID of the DataRobot project."] | None = None,
49
43
  dataset_name: Annotated[str, "The name of the dataset to find (e.g., 'training', 'holdout')."]
50
44
  | None = None,
51
- ) -> ToolError | ToolResult:
45
+ ) -> ToolResult:
52
46
  """Get a dataset ID by name for a given project.
53
47
 
54
48
  The dataset ID and the dataset type (source or prediction) as a string, or an error message.
@@ -70,21 +64,11 @@ async def get_project_dataset_by_name(
70
64
  for ds in all_datasets:
71
65
  if dataset_name.lower() in ds["dataset"].name.lower():
72
66
  return ToolResult(
73
- content=(
74
- json.dumps(
75
- {
76
- "dataset_id": ds["dataset"].id,
77
- "dataset_type": ds["type"],
78
- },
79
- indent=2,
80
- )
81
- ),
82
67
  structured_content={
83
68
  "dataset_id": ds["dataset"].id,
84
69
  "dataset_type": ds["type"],
85
70
  },
86
71
  )
87
- return ToolResult(
88
- content=f"Dataset with name containing '{dataset_name}' not found in project {project_id}.",
89
- structured_content={},
72
+ raise ToolError(
73
+ f"Dataset with name containing '{dataset_name}' not found in project {project_id}."
90
74
  )
@@ -14,7 +14,6 @@
14
14
 
15
15
  """Tools for analyzing datasets and suggesting ML use cases."""
16
16
 
17
- import json
18
17
  import logging
19
18
  from dataclasses import asdict
20
19
  from dataclasses import dataclass
@@ -134,7 +133,6 @@ async def analyze_dataset(
134
133
  insights_dict = asdict(insights)
135
134
 
136
135
  return ToolResult(
137
- content=json.dumps(insights_dict, indent=2),
138
136
  structured_content=insights_dict,
139
137
  )
140
138
 
@@ -164,7 +162,6 @@ async def suggest_use_cases(
164
162
  suggestions.sort(key=lambda x: x["confidence"], reverse=True)
165
163
 
166
164
  return ToolResult(
167
- content=json.dumps(suggestions, indent=2),
168
165
  structured_content={"use_case_suggestions": suggestions},
169
166
  )
170
167
 
@@ -255,7 +252,6 @@ async def get_exploratory_insights(
255
252
  )
256
253
 
257
254
  return ToolResult(
258
- content=json.dumps(eda_insights, indent=2),
259
255
  structured_content=eda_insights,
260
256
  )
261
257
 
@@ -540,22 +536,11 @@ async def start_autopilot(
540
536
  }
541
537
 
542
538
  return ToolResult(
543
- content=json.dumps(result, indent=2),
544
539
  structured_content=result,
545
540
  )
546
541
 
547
542
  except Exception as e:
548
- raise ToolError(
549
- content=json.dumps(
550
- {
551
- "error": f"Failed to start Autopilot: {str(e)}",
552
- "project_id": project.id if project else None,
553
- "target": target,
554
- "mode": mode,
555
- },
556
- indent=2,
557
- )
558
- )
543
+ raise ToolError(f"Failed to start Autopilot: {str(e)}")
559
544
 
560
545
 
561
546
  @dr_mcp_tool(tags={"prediction", "training", "read", "model", "evaluation"})
@@ -611,7 +596,6 @@ async def get_model_roc_curve(
611
596
  }
612
597
 
613
598
  return ToolResult(
614
- content=json.dumps({"data": roc_data}, indent=2),
615
599
  structured_content={"data": roc_data},
616
600
  )
617
601
  except Exception as e:
@@ -638,7 +622,6 @@ async def get_model_feature_impact(
638
622
  feature_impact = model.get_or_request_feature_impact()
639
623
 
640
624
  return ToolResult(
641
- content=json.dumps({"data": feature_impact}, indent=2),
642
625
  structured_content={"data": feature_impact},
643
626
  )
644
627
 
@@ -684,6 +667,5 @@ async def get_model_lift_chart(
684
667
  }
685
668
 
686
669
  return ToolResult(
687
- content=json.dumps({"data": lift_chart_data}, indent=2),
688
670
  structured_content={"data": lift_chart_data},
689
671
  )