datarobot-genai 0.2.22__py3-none-any.whl → 0.2.26__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/drmcp/core/dr_mcp_server.py +0 -3
- datarobot_genai/drmcp/core/mcp_instance.py +37 -103
- datarobot_genai/drmcp/core/tool_filter.py +10 -1
- datarobot_genai/drmcp/tools/clients/confluence.py +93 -1
- datarobot_genai/drmcp/tools/clients/gdrive.py +255 -0
- datarobot_genai/drmcp/tools/confluence/tools.py +67 -0
- datarobot_genai/drmcp/tools/gdrive/tools.py +66 -0
- datarobot_genai/drmcp/tools/predictive/project.py +45 -27
- datarobot_genai/drmcp/tools/predictive/training.py +160 -151
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/METADATA +1 -1
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/RECORD +15 -16
- datarobot_genai/drmcp/core/mcp_server_tools.py +0 -129
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/WHEEL +0 -0
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/entry_points.txt +0 -0
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/licenses/AUTHORS +0 -0
- {datarobot_genai-0.2.22.dist-info → datarobot_genai-0.2.26.dist-info}/licenses/LICENSE +0 -0
|
@@ -252,3 +252,70 @@ async def confluence_search(
|
|
|
252
252
|
content=f"Successfully executed CQL query and retrieved {n} result(s).",
|
|
253
253
|
structured_content={"data": data, "count": n},
|
|
254
254
|
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
@dr_mcp_tool(tags={"confluence", "write", "update", "page"})
|
|
258
|
+
async def confluence_update_page(
|
|
259
|
+
*,
|
|
260
|
+
page_id: Annotated[str, "The ID of the Confluence page to update."],
|
|
261
|
+
new_body_content: Annotated[
|
|
262
|
+
str,
|
|
263
|
+
"The full updated content of the page in Confluence Storage Format (XML) or raw text.",
|
|
264
|
+
],
|
|
265
|
+
version_number: Annotated[
|
|
266
|
+
int,
|
|
267
|
+
"The current version number of the page, required to prevent update conflicts. "
|
|
268
|
+
"Get this from the confluence_get_page tool.",
|
|
269
|
+
],
|
|
270
|
+
) -> ToolResult:
|
|
271
|
+
"""Update the content of an existing Confluence page.
|
|
272
|
+
|
|
273
|
+
Requires the current version number to ensure atomic updates.
|
|
274
|
+
Use this tool to update the body content of an existing Confluence page.
|
|
275
|
+
The version_number is required for optimistic locking - it prevents overwriting
|
|
276
|
+
changes made by others since you last fetched the page.
|
|
277
|
+
|
|
278
|
+
Usage:
|
|
279
|
+
page_id="856391684", new_body_content="<p>New content</p>", version_number=5
|
|
280
|
+
|
|
281
|
+
Important: Always fetch the page first using confluence_get_page to get the
|
|
282
|
+
current version number before updating.
|
|
283
|
+
"""
|
|
284
|
+
if not page_id:
|
|
285
|
+
raise ToolError("Argument validation error: 'page_id' cannot be empty.")
|
|
286
|
+
|
|
287
|
+
if not new_body_content:
|
|
288
|
+
raise ToolError("Argument validation error: 'new_body_content' cannot be empty.")
|
|
289
|
+
|
|
290
|
+
if version_number < 1:
|
|
291
|
+
raise ToolError(
|
|
292
|
+
"Argument validation error: 'version_number' must be a positive integer (>= 1)."
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
access_token = await get_atlassian_access_token()
|
|
296
|
+
if isinstance(access_token, ToolError):
|
|
297
|
+
raise access_token
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
async with ConfluenceClient(access_token) as client:
|
|
301
|
+
page_response = await client.update_page(
|
|
302
|
+
page_id=page_id,
|
|
303
|
+
new_body_content=new_body_content,
|
|
304
|
+
version_number=version_number,
|
|
305
|
+
)
|
|
306
|
+
except ConfluenceError as e:
|
|
307
|
+
logger.error(f"Confluence error updating page: {e}")
|
|
308
|
+
raise ToolError(str(e))
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Unexpected error updating Confluence page: {e}")
|
|
311
|
+
raise ToolError(
|
|
312
|
+
f"An unexpected error occurred while updating Confluence page '{page_id}': {str(e)}"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
return ToolResult(
|
|
316
|
+
content=f"Page ID {page_id} updated successfully to version {page_response.version}.",
|
|
317
|
+
structured_content={
|
|
318
|
+
"updated_page_id": page_response.page_id,
|
|
319
|
+
"new_version": page_response.version,
|
|
320
|
+
},
|
|
321
|
+
)
|
|
@@ -109,3 +109,69 @@ async def gdrive_find_contents(
|
|
|
109
109
|
"nextPageToken": data.next_page_token,
|
|
110
110
|
},
|
|
111
111
|
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dr_mcp_tool(tags={"google", "gdrive", "read", "content", "file", "download"})
|
|
115
|
+
async def gdrive_read_content(
|
|
116
|
+
*,
|
|
117
|
+
file_id: Annotated[str, "The ID of the file to read."],
|
|
118
|
+
target_format: Annotated[
|
|
119
|
+
str | None,
|
|
120
|
+
"The preferred output format for Google Workspace files "
|
|
121
|
+
"(e.g., 'text/markdown' for Docs, 'text/csv' for Sheets). "
|
|
122
|
+
"If not specified, uses sensible defaults. Has no effect on regular files.",
|
|
123
|
+
] = None,
|
|
124
|
+
) -> ToolResult | ToolError:
|
|
125
|
+
"""
|
|
126
|
+
Retrieve the content of a specific file by its ID. Google Workspace files are
|
|
127
|
+
automatically exported to LLM-readable formats (Push-Down).
|
|
128
|
+
|
|
129
|
+
Usage:
|
|
130
|
+
- Basic: gdrive_read_content(file_id="1ABC123def456")
|
|
131
|
+
- Custom format: gdrive_read_content(file_id="1ABC...", target_format="text/plain")
|
|
132
|
+
- First use gdrive_find_contents to discover file IDs
|
|
133
|
+
|
|
134
|
+
Supported conversions (defaults):
|
|
135
|
+
- Google Docs -> Markdown (text/markdown)
|
|
136
|
+
- Google Sheets -> CSV (text/csv)
|
|
137
|
+
- Google Slides -> Plain text (text/plain)
|
|
138
|
+
- PDF files -> Extracted text (text/plain)
|
|
139
|
+
- Other text files -> Downloaded as-is
|
|
140
|
+
|
|
141
|
+
Note: Binary files (images, videos, etc.) are not supported and will return an error.
|
|
142
|
+
Large Google Workspace files (>10MB) may fail to export due to API limits.
|
|
143
|
+
|
|
144
|
+
Refer to Google Drive export formats documentation:
|
|
145
|
+
https://developers.google.com/workspace/drive/api/guides/ref-export-formats
|
|
146
|
+
"""
|
|
147
|
+
if not file_id or not file_id.strip():
|
|
148
|
+
raise ToolError("Argument validation error: 'file_id' cannot be empty.")
|
|
149
|
+
|
|
150
|
+
access_token = await get_gdrive_access_token()
|
|
151
|
+
if isinstance(access_token, ToolError):
|
|
152
|
+
raise access_token
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
async with GoogleDriveClient(access_token) as client:
|
|
156
|
+
file_content = await client.read_file_content(file_id, target_format)
|
|
157
|
+
except GoogleDriveError as e:
|
|
158
|
+
logger.error(f"Google Drive error reading file content: {e}")
|
|
159
|
+
raise ToolError(str(e))
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Unexpected error reading Google Drive file content: {e}")
|
|
162
|
+
raise ToolError(
|
|
163
|
+
f"An unexpected error occurred while reading Google Drive file content: {str(e)}"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Provide helpful context about the conversion
|
|
167
|
+
export_info = ""
|
|
168
|
+
if file_content.was_exported:
|
|
169
|
+
export_info = f" (exported from {file_content.original_mime_type})"
|
|
170
|
+
|
|
171
|
+
return ToolResult(
|
|
172
|
+
content=(
|
|
173
|
+
f"Successfully retrieved content of '{file_content.name}' "
|
|
174
|
+
f"({file_content.mime_type}){export_info}."
|
|
175
|
+
),
|
|
176
|
+
structured_content=file_content.as_flat_dict(),
|
|
177
|
+
)
|
|
@@ -14,6 +14,10 @@
|
|
|
14
14
|
|
|
15
15
|
import json
|
|
16
16
|
import logging
|
|
17
|
+
from typing import Annotated
|
|
18
|
+
|
|
19
|
+
from fastmcp.exceptions import ToolError
|
|
20
|
+
from fastmcp.tools.tool import ToolResult
|
|
17
21
|
|
|
18
22
|
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
19
23
|
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
@@ -21,35 +25,39 @@ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
|
21
25
|
logger = logging.getLogger(__name__)
|
|
22
26
|
|
|
23
27
|
|
|
24
|
-
@dr_mcp_tool(tags={"project", "management", "list"})
|
|
25
|
-
async def list_projects() ->
|
|
26
|
-
"""
|
|
27
|
-
List all DataRobot projects for the authenticated user.
|
|
28
|
-
|
|
29
|
-
Returns
|
|
30
|
-
-------
|
|
31
|
-
A string summary of the user's DataRobot projects.
|
|
32
|
-
"""
|
|
28
|
+
@dr_mcp_tool(tags={"predictive", "project", "read", "management", "list"})
|
|
29
|
+
async def list_projects() -> ToolResult:
|
|
30
|
+
"""List all DataRobot projects for the authenticated user."""
|
|
33
31
|
client = get_sdk_client()
|
|
34
32
|
projects = client.Project.list()
|
|
35
|
-
|
|
36
|
-
return "No projects found."
|
|
37
|
-
return "\n".join(f"{p.id}: {p.project_name}" for p in projects)
|
|
33
|
+
projects = {p.id: p.project_name for p in projects}
|
|
38
34
|
|
|
35
|
+
return ToolResult(
|
|
36
|
+
content=(
|
|
37
|
+
json.dumps(projects, indent=2)
|
|
38
|
+
if projects
|
|
39
|
+
else json.dumps({"message": "No projects found."}, indent=2)
|
|
40
|
+
),
|
|
41
|
+
structured_content=projects,
|
|
42
|
+
)
|
|
39
43
|
|
|
40
|
-
@dr_mcp_tool(tags={"project", "data", "info"})
|
|
41
|
-
async def get_project_dataset_by_name(project_id: str, dataset_name: str) -> str:
|
|
42
|
-
"""
|
|
43
|
-
Get a dataset ID by name for a given project.
|
|
44
44
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
45
|
+
@dr_mcp_tool(tags={"predictive", "project", "read", "data", "info"})
|
|
46
|
+
async def get_project_dataset_by_name(
|
|
47
|
+
*,
|
|
48
|
+
project_id: Annotated[str, "The ID of the DataRobot project."] | None = None,
|
|
49
|
+
dataset_name: Annotated[str, "The name of the dataset to find (e.g., 'training', 'holdout')."]
|
|
50
|
+
| None = None,
|
|
51
|
+
) -> ToolError | ToolResult:
|
|
52
|
+
"""Get a dataset ID by name for a given project.
|
|
48
53
|
|
|
49
|
-
|
|
50
|
-
-------
|
|
51
|
-
The dataset ID and the dataset type (source or prediction) as a string, or an error message.
|
|
54
|
+
The dataset ID and the dataset type (source or prediction) as a string, or an error message.
|
|
52
55
|
"""
|
|
56
|
+
if not project_id:
|
|
57
|
+
return ToolError("Project ID is required.")
|
|
58
|
+
if not dataset_name:
|
|
59
|
+
return ToolError("Dataset name is required.")
|
|
60
|
+
|
|
53
61
|
client = get_sdk_client()
|
|
54
62
|
project = client.Project.get(project_id)
|
|
55
63
|
all_datasets = []
|
|
@@ -61,12 +69,22 @@ async def get_project_dataset_by_name(project_id: str, dataset_name: str) -> str
|
|
|
61
69
|
all_datasets.extend([{"type": "prediction", "dataset": ds} for ds in prediction_datasets])
|
|
62
70
|
for ds in all_datasets:
|
|
63
71
|
if dataset_name.lower() in ds["dataset"].name.lower():
|
|
64
|
-
return
|
|
65
|
-
|
|
72
|
+
return ToolResult(
|
|
73
|
+
content=(
|
|
74
|
+
json.dumps(
|
|
75
|
+
{
|
|
76
|
+
"dataset_id": ds["dataset"].id,
|
|
77
|
+
"dataset_type": ds["type"],
|
|
78
|
+
},
|
|
79
|
+
indent=2,
|
|
80
|
+
)
|
|
81
|
+
),
|
|
82
|
+
structured_content={
|
|
66
83
|
"dataset_id": ds["dataset"].id,
|
|
67
84
|
"dataset_type": ds["type"],
|
|
68
|
-
"ui_panel": ["dataset"],
|
|
69
85
|
},
|
|
70
|
-
indent=2,
|
|
71
86
|
)
|
|
72
|
-
return
|
|
87
|
+
return ToolResult(
|
|
88
|
+
content=f"Dataset with name containing '{dataset_name}' not found in project {project_id}.",
|
|
89
|
+
structured_content={},
|
|
90
|
+
)
|
|
@@ -18,8 +18,11 @@ import json
|
|
|
18
18
|
import logging
|
|
19
19
|
from dataclasses import asdict
|
|
20
20
|
from dataclasses import dataclass
|
|
21
|
+
from typing import Annotated
|
|
21
22
|
|
|
22
23
|
import pandas as pd
|
|
24
|
+
from fastmcp.exceptions import ToolError
|
|
25
|
+
from fastmcp.tools.tool import ToolResult
|
|
23
26
|
|
|
24
27
|
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
25
28
|
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
@@ -53,22 +56,15 @@ class DatasetInsight:
|
|
|
53
56
|
missing_data_summary: dict[str, float]
|
|
54
57
|
|
|
55
58
|
|
|
56
|
-
@dr_mcp_tool(tags={"training", "analysis", "dataset"})
|
|
57
|
-
async def analyze_dataset(
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "dataset"})
|
|
60
|
+
async def analyze_dataset(
|
|
61
|
+
*,
|
|
62
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
63
|
+
) -> ToolError | ToolResult:
|
|
64
|
+
"""Analyze a dataset to understand its structure and potential use cases."""
|
|
65
|
+
if not dataset_id:
|
|
66
|
+
return ToolError("Dataset ID must be provided")
|
|
60
67
|
|
|
61
|
-
Args:
|
|
62
|
-
dataset_id: The ID of the DataRobot dataset to analyze
|
|
63
|
-
|
|
64
|
-
Returns
|
|
65
|
-
-------
|
|
66
|
-
JSON string containing dataset insights including:
|
|
67
|
-
- Basic statistics (rows, columns)
|
|
68
|
-
- Column types (numerical, categorical, datetime, text)
|
|
69
|
-
- Potential target columns
|
|
70
|
-
- Missing data summary
|
|
71
|
-
"""
|
|
72
68
|
client = get_sdk_client()
|
|
73
69
|
dataset = client.Dataset.get(dataset_id)
|
|
74
70
|
df = dataset.get_as_dataframe()
|
|
@@ -105,27 +101,23 @@ async def analyze_dataset(dataset_id: str) -> str:
|
|
|
105
101
|
potential_targets=potential_targets,
|
|
106
102
|
missing_data_summary=missing_data,
|
|
107
103
|
)
|
|
104
|
+
insights_dict = asdict(insights)
|
|
108
105
|
|
|
109
|
-
return
|
|
110
|
-
|
|
106
|
+
return ToolResult(
|
|
107
|
+
content=json.dumps(insights_dict, indent=2),
|
|
108
|
+
structured_content=insights_dict,
|
|
109
|
+
)
|
|
111
110
|
|
|
112
|
-
@dr_mcp_tool(tags={"training", "analysis", "usecase"})
|
|
113
|
-
async def suggest_use_cases(dataset_id: str) -> str:
|
|
114
|
-
"""
|
|
115
|
-
Analyze a dataset and suggest potential machine learning use cases.
|
|
116
111
|
|
|
117
|
-
|
|
118
|
-
|
|
112
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "usecase"})
|
|
113
|
+
async def suggest_use_cases(
|
|
114
|
+
*,
|
|
115
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
116
|
+
) -> ToolError | ToolResult:
|
|
117
|
+
"""Analyze a dataset and suggest potential machine learning use cases."""
|
|
118
|
+
if not dataset_id:
|
|
119
|
+
return ToolError("Dataset ID must be provided")
|
|
119
120
|
|
|
120
|
-
Returns
|
|
121
|
-
-------
|
|
122
|
-
JSON string containing suggested use cases with:
|
|
123
|
-
- Use case name and description
|
|
124
|
-
- Suggested target column
|
|
125
|
-
- Problem type
|
|
126
|
-
- Confidence score
|
|
127
|
-
- Reasoning for the suggestion
|
|
128
|
-
"""
|
|
129
121
|
client = get_sdk_client()
|
|
130
122
|
dataset = client.Dataset.get(dataset_id)
|
|
131
123
|
df = dataset.get_as_dataframe()
|
|
@@ -141,27 +133,23 @@ async def suggest_use_cases(dataset_id: str) -> str:
|
|
|
141
133
|
|
|
142
134
|
# Sort by confidence score
|
|
143
135
|
suggestions.sort(key=lambda x: x["confidence"], reverse=True)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
- Feature correlations with target
|
|
162
|
-
- Missing data analysis
|
|
163
|
-
- Data type distribution
|
|
164
|
-
"""
|
|
136
|
+
|
|
137
|
+
return ToolResult(
|
|
138
|
+
content=json.dumps(suggestions, indent=2),
|
|
139
|
+
structured_content={"use_case_suggestions": suggestions},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "eda"})
|
|
144
|
+
async def get_exploratory_insights(
|
|
145
|
+
*,
|
|
146
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
147
|
+
target_col: Annotated[str, "Optional target column to focus EDA insights on"] | None = None,
|
|
148
|
+
) -> ToolError | ToolResult:
|
|
149
|
+
"""Generate exploratory data insights for a dataset."""
|
|
150
|
+
if not dataset_id:
|
|
151
|
+
return ToolError("Dataset ID must be provided")
|
|
152
|
+
|
|
165
153
|
client = get_sdk_client()
|
|
166
154
|
dataset = client.Dataset.get(dataset_id)
|
|
167
155
|
df = dataset.get_as_dataframe()
|
|
@@ -238,8 +226,10 @@ async def get_exploratory_insights(dataset_id: str, target_col: str | None = Non
|
|
|
238
226
|
sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)
|
|
239
227
|
)
|
|
240
228
|
|
|
241
|
-
|
|
242
|
-
|
|
229
|
+
return ToolResult(
|
|
230
|
+
content=json.dumps(eda_insights, indent=2),
|
|
231
|
+
structured_content=eda_insights,
|
|
232
|
+
)
|
|
243
233
|
|
|
244
234
|
|
|
245
235
|
def _identify_potential_targets(
|
|
@@ -450,47 +440,50 @@ def _analyze_target_for_use_cases(df: pd.DataFrame, target_col: str) -> list[Use
|
|
|
450
440
|
return suggestions
|
|
451
441
|
|
|
452
442
|
|
|
453
|
-
@dr_mcp_tool(tags={"training", "autopilot", "model"})
|
|
443
|
+
@dr_mcp_tool(tags={"predictive", "training", "write", "autopilot", "model"})
|
|
454
444
|
async def start_autopilot(
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
445
|
+
*,
|
|
446
|
+
target: Annotated[str, "Name of the target column for modeling"] | None = None,
|
|
447
|
+
project_id: Annotated[
|
|
448
|
+
str, "Optional, the ID of the DataRobot project or a new project if no id is provided"
|
|
449
|
+
]
|
|
450
|
+
| None = None,
|
|
451
|
+
mode: Annotated[str, "Optional, Autopilot mode ('quick', 'comprehensive', or 'manual')"]
|
|
452
|
+
| None = "quick",
|
|
453
|
+
dataset_url: Annotated[
|
|
454
|
+
str,
|
|
455
|
+
"""
|
|
456
|
+
Optional, The URL to the dataset to upload
|
|
457
|
+
(optional if dataset_id is provided) for a new project.
|
|
458
|
+
""",
|
|
459
|
+
]
|
|
460
|
+
| None = None,
|
|
461
|
+
dataset_id: Annotated[
|
|
462
|
+
str,
|
|
463
|
+
"""
|
|
464
|
+
Optional, The ID of an existing dataset in AI Catalog
|
|
465
|
+
(optional if dataset_url is provided) for a new project.
|
|
466
|
+
""",
|
|
467
|
+
]
|
|
468
|
+
| None = None,
|
|
469
|
+
project_name: Annotated[
|
|
470
|
+
str, "Optional, name for the project if no id is provided, creates a new project"
|
|
471
|
+
]
|
|
472
|
+
| None = "MCP Project",
|
|
473
|
+
use_case_id: Annotated[
|
|
474
|
+
str,
|
|
475
|
+
"Optional, ID of the use case to associate this project (required for next-gen platform)",
|
|
476
|
+
]
|
|
477
|
+
| None = None,
|
|
478
|
+
) -> ToolError | ToolResult:
|
|
479
|
+
"""Start automated model training (Autopilot) for a project."""
|
|
487
480
|
client = get_sdk_client()
|
|
488
481
|
|
|
489
482
|
if not project_id:
|
|
490
483
|
if not dataset_url and not dataset_id:
|
|
491
|
-
return "
|
|
484
|
+
return ToolError("Either dataset_url or dataset_id must be provided")
|
|
492
485
|
if dataset_url and dataset_id:
|
|
493
|
-
return "
|
|
486
|
+
return ToolError("Please provide either dataset_url or dataset_id, not both")
|
|
494
487
|
|
|
495
488
|
if dataset_url:
|
|
496
489
|
dataset = client.Dataset.create_from_url(dataset_url)
|
|
@@ -504,7 +497,7 @@ async def start_autopilot(
|
|
|
504
497
|
project = client.Project.get(project_id)
|
|
505
498
|
|
|
506
499
|
if not target:
|
|
507
|
-
return "
|
|
500
|
+
return ToolError("Target variable must be specified")
|
|
508
501
|
|
|
509
502
|
try:
|
|
510
503
|
# Start modeling
|
|
@@ -515,40 +508,48 @@ async def start_autopilot(
|
|
|
515
508
|
"target": target,
|
|
516
509
|
"mode": mode,
|
|
517
510
|
"status": project.get_status(),
|
|
518
|
-
"ui_panel": ["eda", "model-training", "leaderboard"],
|
|
519
511
|
"use_case_id": project.use_case_id,
|
|
520
512
|
}
|
|
521
513
|
|
|
522
|
-
return
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
{
|
|
526
|
-
"error": f"Failed to start Autopilot: {str(e)}",
|
|
527
|
-
"project_id": project.id,
|
|
528
|
-
"target": target,
|
|
529
|
-
"mode": mode,
|
|
530
|
-
},
|
|
531
|
-
indent=2,
|
|
514
|
+
return ToolResult(
|
|
515
|
+
content=json.dumps(result, indent=2),
|
|
516
|
+
structured_content=result,
|
|
532
517
|
)
|
|
533
518
|
|
|
519
|
+
except Exception as e:
|
|
520
|
+
return ToolError(
|
|
521
|
+
content=json.dumps(
|
|
522
|
+
{
|
|
523
|
+
"error": f"Failed to start Autopilot: {str(e)}",
|
|
524
|
+
"project_id": project.id if project else None,
|
|
525
|
+
"target": target,
|
|
526
|
+
"mode": mode,
|
|
527
|
+
},
|
|
528
|
+
indent=2,
|
|
529
|
+
)
|
|
530
|
+
)
|
|
534
531
|
|
|
535
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
536
|
-
async def get_model_roc_curve(project_id: str, model_id: str, source: str = "validation") -> str:
|
|
537
|
-
"""
|
|
538
|
-
Get detailed ROC curve for a specific model.
|
|
539
532
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
533
|
+
@dr_mcp_tool(tags={"prediction", "training", "read", "model", "evaluation"})
|
|
534
|
+
async def get_model_roc_curve(
|
|
535
|
+
*,
|
|
536
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
537
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
538
|
+
source: Annotated[
|
|
539
|
+
str,
|
|
540
|
+
"""
|
|
541
|
+
The source of the data to use for the ROC curve
|
|
542
|
+
('validation' or 'holdout' or 'crossValidation')
|
|
543
|
+
""",
|
|
544
|
+
]
|
|
545
|
+
| str = "validation",
|
|
546
|
+
) -> ToolError | ToolResult:
|
|
547
|
+
"""Get detailed ROC curve for a specific model."""
|
|
548
|
+
if not project_id:
|
|
549
|
+
return ToolError("Project ID must be provided")
|
|
550
|
+
if not model_id:
|
|
551
|
+
return ToolError("Model ID must be provided")
|
|
545
552
|
|
|
546
|
-
Returns
|
|
547
|
-
-------
|
|
548
|
-
JSON string containing:
|
|
549
|
-
- roc_curve: ROC curve data
|
|
550
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
551
|
-
"""
|
|
552
553
|
client = get_sdk_client()
|
|
553
554
|
project = client.Project.get(project_id)
|
|
554
555
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -581,26 +582,26 @@ async def get_model_roc_curve(project_id: str, model_id: str, source: str = "val
|
|
|
581
582
|
"source": source,
|
|
582
583
|
}
|
|
583
584
|
|
|
584
|
-
return
|
|
585
|
+
return ToolResult(
|
|
586
|
+
content=json.dumps({"data": roc_data}, indent=2),
|
|
587
|
+
structured_content={"data": roc_data},
|
|
588
|
+
)
|
|
585
589
|
except Exception as e:
|
|
586
|
-
return
|
|
590
|
+
return ToolError(f"Failed to get ROC curve: {str(e)}")
|
|
587
591
|
|
|
588
592
|
|
|
589
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
590
|
-
async def get_model_feature_impact(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
593
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "model", "evaluation"})
|
|
594
|
+
async def get_model_feature_impact(
|
|
595
|
+
*,
|
|
596
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
597
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
598
|
+
) -> ToolError | ToolResult:
|
|
599
|
+
"""Get detailed feature impact for a specific model."""
|
|
600
|
+
if not project_id:
|
|
601
|
+
return ToolError("Project ID must be provided")
|
|
602
|
+
if not model_id:
|
|
603
|
+
return ToolError("Model ID must be provided")
|
|
597
604
|
|
|
598
|
-
Returns
|
|
599
|
-
-------
|
|
600
|
-
JSON string containing:
|
|
601
|
-
- feature_impact: Feature importance scores
|
|
602
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
603
|
-
"""
|
|
604
605
|
client = get_sdk_client()
|
|
605
606
|
project = client.Project.get(project_id)
|
|
606
607
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -608,26 +609,31 @@ async def get_model_feature_impact(project_id: str, model_id: str) -> str:
|
|
|
608
609
|
model.request_feature_impact()
|
|
609
610
|
feature_impact = model.get_or_request_feature_impact()
|
|
610
611
|
|
|
611
|
-
return
|
|
612
|
-
|
|
612
|
+
return ToolResult(
|
|
613
|
+
content=json.dumps({"data": feature_impact}, indent=2),
|
|
614
|
+
structured_content={"data": feature_impact},
|
|
615
|
+
)
|
|
613
616
|
|
|
614
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
615
|
-
async def get_model_lift_chart(project_id: str, model_id: str, source: str = "validation") -> str:
|
|
616
|
-
"""
|
|
617
|
-
Get detailed lift chart for a specific model.
|
|
618
617
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
618
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "model", "evaluation"})
|
|
619
|
+
async def get_model_lift_chart(
|
|
620
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
621
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
622
|
+
source: Annotated[
|
|
623
|
+
str,
|
|
624
|
+
"""
|
|
625
|
+
The source of the data to use for the lift chart
|
|
626
|
+
('validation' or 'holdout' or 'crossValidation')
|
|
627
|
+
""",
|
|
628
|
+
]
|
|
629
|
+
| str = "validation",
|
|
630
|
+
) -> ToolError | ToolResult:
|
|
631
|
+
"""Get detailed lift chart for a specific model."""
|
|
632
|
+
if not project_id:
|
|
633
|
+
return ToolError("Project ID must be provided")
|
|
634
|
+
if not model_id:
|
|
635
|
+
return ToolError("Model ID must be provided")
|
|
624
636
|
|
|
625
|
-
Returns
|
|
626
|
-
-------
|
|
627
|
-
JSON string containing:
|
|
628
|
-
- lift_chart: Lift chart data
|
|
629
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
630
|
-
"""
|
|
631
637
|
client = get_sdk_client()
|
|
632
638
|
project = client.Project.get(project_id)
|
|
633
639
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -648,4 +654,7 @@ async def get_model_lift_chart(project_id: str, model_id: str, source: str = "va
|
|
|
648
654
|
"target_class": lift_chart.target_class,
|
|
649
655
|
}
|
|
650
656
|
|
|
651
|
-
return
|
|
657
|
+
return ToolResult(
|
|
658
|
+
content=json.dumps({"data": lift_chart_data}, indent=2),
|
|
659
|
+
structured_content={"data": lift_chart_data},
|
|
660
|
+
)
|