datarobot-genai 0.2.37__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/core/agents/__init__.py +1 -1
- datarobot_genai/core/agents/base.py +5 -2
- datarobot_genai/core/chat/responses.py +6 -1
- datarobot_genai/core/utils/auth.py +188 -31
- datarobot_genai/crewai/__init__.py +1 -4
- datarobot_genai/crewai/agent.py +150 -17
- datarobot_genai/crewai/events.py +11 -4
- datarobot_genai/drmcp/__init__.py +4 -2
- datarobot_genai/drmcp/core/config.py +21 -1
- datarobot_genai/drmcp/core/mcp_instance.py +5 -49
- datarobot_genai/drmcp/core/routes.py +108 -13
- datarobot_genai/drmcp/core/tool_config.py +16 -0
- datarobot_genai/drmcp/core/utils.py +110 -0
- datarobot_genai/drmcp/test_utils/tool_base_ete.py +41 -26
- datarobot_genai/drmcp/tools/clients/gdrive.py +2 -0
- datarobot_genai/drmcp/tools/clients/microsoft_graph.py +141 -0
- datarobot_genai/drmcp/tools/clients/perplexity.py +173 -0
- datarobot_genai/drmcp/tools/clients/tavily.py +199 -0
- datarobot_genai/drmcp/tools/confluence/tools.py +43 -94
- datarobot_genai/drmcp/tools/gdrive/tools.py +44 -133
- datarobot_genai/drmcp/tools/jira/tools.py +19 -41
- datarobot_genai/drmcp/tools/microsoft_graph/tools.py +201 -32
- datarobot_genai/drmcp/tools/perplexity/__init__.py +0 -0
- datarobot_genai/drmcp/tools/perplexity/tools.py +117 -0
- datarobot_genai/drmcp/tools/predictive/data.py +1 -9
- datarobot_genai/drmcp/tools/predictive/deployment.py +0 -8
- datarobot_genai/drmcp/tools/predictive/deployment_info.py +91 -117
- datarobot_genai/drmcp/tools/predictive/model.py +0 -21
- datarobot_genai/drmcp/tools/predictive/predict_realtime.py +3 -0
- datarobot_genai/drmcp/tools/predictive/project.py +3 -19
- datarobot_genai/drmcp/tools/predictive/training.py +1 -19
- datarobot_genai/drmcp/tools/tavily/__init__.py +13 -0
- datarobot_genai/drmcp/tools/tavily/tools.py +141 -0
- datarobot_genai/langgraph/agent.py +10 -2
- datarobot_genai/llama_index/__init__.py +1 -1
- datarobot_genai/llama_index/agent.py +284 -5
- datarobot_genai/nat/agent.py +17 -6
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/METADATA +3 -1
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/RECORD +43 -40
- datarobot_genai/crewai/base.py +0 -159
- datarobot_genai/drmcp/core/tool_filter.py +0 -117
- datarobot_genai/llama_index/base.py +0 -299
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/WHEEL +0 -0
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/entry_points.txt +0 -0
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/licenses/AUTHORS +0 -0
- {datarobot_genai-0.2.37.dist-info → datarobot_genai-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
# Copyright 2026 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Perplexity MCP tools."""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Annotated
|
|
19
|
+
from typing import Literal
|
|
20
|
+
|
|
21
|
+
from fastmcp.exceptions import ToolError
|
|
22
|
+
from fastmcp.tools.tool import ToolResult
|
|
23
|
+
|
|
24
|
+
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
25
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_QUERIES
|
|
26
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS
|
|
27
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_RESULTS_DEFAULT
|
|
28
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_SEARCH_DOMAIN_FILTER
|
|
29
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE
|
|
30
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import MAX_TOKENS_PER_PAGE_DEFAULT
|
|
31
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import PerplexityClient
|
|
32
|
+
from datarobot_genai.drmcp.tools.clients.perplexity import get_perplexity_access_token
|
|
33
|
+
|
|
34
|
+
logger = logging.getLogger(__name__)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dr_mcp_tool(tags={"perplexity", "web", "search", "websearch"})
|
|
38
|
+
async def perplexity_search(
|
|
39
|
+
*,
|
|
40
|
+
query: Annotated[
|
|
41
|
+
str,
|
|
42
|
+
list[str],
|
|
43
|
+
f"The search query string OR "
|
|
44
|
+
f"a list of up to {MAX_QUERIES} sub-queries for multi-query research.",
|
|
45
|
+
],
|
|
46
|
+
search_domain_filter: Annotated[
|
|
47
|
+
list[str] | None,
|
|
48
|
+
f"Up to {MAX_SEARCH_DOMAIN_FILTER} domains/URLs "
|
|
49
|
+
f"to allowlist or denylist (prefix with '-').",
|
|
50
|
+
] = None,
|
|
51
|
+
recency: Annotated[
|
|
52
|
+
Literal["day", "week", "month", "year"] | None, "Filter results by time period."
|
|
53
|
+
] = None,
|
|
54
|
+
max_results: Annotated[
|
|
55
|
+
int, f"Number of ranked results to return (1-{MAX_RESULTS})."
|
|
56
|
+
] = MAX_RESULTS_DEFAULT,
|
|
57
|
+
max_tokens_per_page: Annotated[
|
|
58
|
+
int,
|
|
59
|
+
f"Content extraction cap per page (1-{MAX_TOKENS_PER_PAGE}) "
|
|
60
|
+
f"(default {MAX_TOKENS_PER_PAGE_DEFAULT}).",
|
|
61
|
+
] = MAX_TOKENS_PER_PAGE_DEFAULT,
|
|
62
|
+
) -> ToolResult:
|
|
63
|
+
"""Perplexity web search tool combining multi-query research and content extraction control."""
|
|
64
|
+
if not query:
|
|
65
|
+
raise ToolError("Argument validation error: query cannot be empty.")
|
|
66
|
+
if query and isinstance(query, str) and not query.strip():
|
|
67
|
+
raise ToolError("Argument validation error: query cannot be empty.")
|
|
68
|
+
if query and isinstance(query, list) and len(query) > MAX_QUERIES:
|
|
69
|
+
raise ToolError(
|
|
70
|
+
f"Argument validation error: query list cannot be bigger than {MAX_QUERIES}."
|
|
71
|
+
)
|
|
72
|
+
if query and isinstance(query, list) and not all(q.strip() for q in query):
|
|
73
|
+
raise ToolError("Argument validation error: query cannot contain empty str.")
|
|
74
|
+
if search_domain_filter and len(search_domain_filter) > MAX_SEARCH_DOMAIN_FILTER:
|
|
75
|
+
raise ToolError(
|
|
76
|
+
f"Argument validation error: "
|
|
77
|
+
f"maximum number of search domain filters is {MAX_SEARCH_DOMAIN_FILTER}."
|
|
78
|
+
)
|
|
79
|
+
if max_results <= 0:
|
|
80
|
+
raise ToolError("Argument validation error: max_results must be greater than 0.")
|
|
81
|
+
if max_results > MAX_RESULTS:
|
|
82
|
+
raise ToolError(
|
|
83
|
+
f"Argument validation error: "
|
|
84
|
+
f"max_results must be smaller than or equal to {MAX_RESULTS}."
|
|
85
|
+
)
|
|
86
|
+
if max_tokens_per_page <= 0:
|
|
87
|
+
raise ToolError("Argument validation error: max_tokens_per_page must be greater than 0.")
|
|
88
|
+
if max_tokens_per_page > MAX_TOKENS_PER_PAGE:
|
|
89
|
+
raise ToolError(
|
|
90
|
+
f"Argument validation error: "
|
|
91
|
+
f"max_tokens_per_page must be smaller than or equal to {MAX_TOKENS_PER_PAGE}."
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
access_token = await get_perplexity_access_token()
|
|
95
|
+
if isinstance(access_token, ToolError):
|
|
96
|
+
raise access_token
|
|
97
|
+
|
|
98
|
+
async with PerplexityClient(access_token=access_token) as perplexity_client:
|
|
99
|
+
results = await perplexity_client.search(
|
|
100
|
+
query=query,
|
|
101
|
+
search_domain_filter=search_domain_filter,
|
|
102
|
+
recency=recency,
|
|
103
|
+
max_results=max_results,
|
|
104
|
+
max_tokens_per_page=max_tokens_per_page,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return ToolResult(
|
|
108
|
+
structured_content={
|
|
109
|
+
"results": results,
|
|
110
|
+
"count": len(results),
|
|
111
|
+
"metadata": {
|
|
112
|
+
"queriesExecuted": len(query) if isinstance(query, list) else 1,
|
|
113
|
+
"filtersApplied": {"domains": search_domain_filter, "recency": recency},
|
|
114
|
+
"extractionLimit": max_tokens_per_page,
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
)
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import json
|
|
16
15
|
import logging
|
|
17
16
|
import os
|
|
18
17
|
from typing import Annotated
|
|
@@ -60,7 +59,6 @@ async def upload_dataset_to_ai_catalog(
|
|
|
60
59
|
raise ToolError("Failed to upload dataset.")
|
|
61
60
|
|
|
62
61
|
return ToolResult(
|
|
63
|
-
content=f"Successfully uploaded dataset: {catalog_item.id}",
|
|
64
62
|
structured_content={
|
|
65
63
|
"dataset_id": catalog_item.id,
|
|
66
64
|
"dataset_version_id": catalog_item.version_id,
|
|
@@ -78,21 +76,15 @@ async def list_ai_catalog_items() -> ToolResult:
|
|
|
78
76
|
if not datasets:
|
|
79
77
|
logger.info("No AI Catalog items found")
|
|
80
78
|
return ToolResult(
|
|
81
|
-
content="No AI Catalog items found.",
|
|
82
79
|
structured_content={"datasets": []},
|
|
83
80
|
)
|
|
84
81
|
|
|
85
82
|
datasets_dict = {ds.id: ds.name for ds in datasets}
|
|
86
|
-
datasets_count = len(datasets)
|
|
87
83
|
|
|
88
84
|
return ToolResult(
|
|
89
|
-
content=(
|
|
90
|
-
f"Found {datasets_count} AI Catalog items, here are the details:\n"
|
|
91
|
-
f"{json.dumps(datasets_dict, indent=2)}"
|
|
92
|
-
),
|
|
93
85
|
structured_content={
|
|
94
86
|
"datasets": datasets_dict,
|
|
95
|
-
"count":
|
|
87
|
+
"count": len(datasets),
|
|
96
88
|
},
|
|
97
89
|
)
|
|
98
90
|
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import json
|
|
16
15
|
import logging
|
|
17
16
|
from typing import Annotated
|
|
18
17
|
|
|
@@ -32,12 +31,10 @@ async def list_deployments() -> ToolResult:
|
|
|
32
31
|
deployments = client.Deployment.list()
|
|
33
32
|
if not deployments:
|
|
34
33
|
return ToolResult(
|
|
35
|
-
content="No deployments found.",
|
|
36
34
|
structured_content={"deployments": []},
|
|
37
35
|
)
|
|
38
36
|
deployments_dict = {d.id: d.label for d in deployments}
|
|
39
37
|
return ToolResult(
|
|
40
|
-
content="\n".join(f"{d.id}: {d.label}" for d in deployments),
|
|
41
38
|
structured_content={"deployments": deployments_dict},
|
|
42
39
|
)
|
|
43
40
|
|
|
@@ -54,10 +51,6 @@ async def get_model_info_from_deployment(
|
|
|
54
51
|
client = get_sdk_client()
|
|
55
52
|
deployment = client.Deployment.get(deployment_id)
|
|
56
53
|
return ToolResult(
|
|
57
|
-
content=(
|
|
58
|
-
f"Retrieved model info for deployment {deployment_id}, here are the details:\n"
|
|
59
|
-
f"{json.dumps(deployment.model, indent=2)}"
|
|
60
|
-
),
|
|
61
54
|
structured_content=deployment.model,
|
|
62
55
|
)
|
|
63
56
|
|
|
@@ -87,7 +80,6 @@ async def deploy_model(
|
|
|
87
80
|
default_prediction_server_id=prediction_servers[0].id,
|
|
88
81
|
)
|
|
89
82
|
return ToolResult(
|
|
90
|
-
content=f"Created deployment {deployment.id} with label {label}",
|
|
91
83
|
structured_content={
|
|
92
84
|
"deployment_id": deployment.id,
|
|
93
85
|
"label": label,
|
|
@@ -19,9 +19,13 @@ import json
|
|
|
19
19
|
import logging
|
|
20
20
|
from datetime import datetime
|
|
21
21
|
from datetime import timedelta
|
|
22
|
+
from typing import Annotated
|
|
22
23
|
from typing import Any
|
|
23
24
|
|
|
24
25
|
import pandas as pd
|
|
26
|
+
from fastmcp.exceptions import ToolError
|
|
27
|
+
from fastmcp.tools.tool import ToolResult
|
|
28
|
+
from mcp.types import TextContent
|
|
25
29
|
|
|
26
30
|
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
27
31
|
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
@@ -29,40 +33,18 @@ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
|
29
33
|
logger = logging.getLogger(__name__)
|
|
30
34
|
|
|
31
35
|
|
|
32
|
-
@dr_mcp_tool(tags={"deployment", "info", "metadata"})
|
|
33
|
-
async def get_deployment_info(
|
|
36
|
+
@dr_mcp_tool(tags={"predictive", "deployment", "read", "info", "metadata"})
|
|
37
|
+
async def get_deployment_info(
|
|
38
|
+
*,
|
|
39
|
+
deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
|
|
40
|
+
) -> ToolError | ToolResult:
|
|
34
41
|
"""
|
|
35
42
|
Retrieve information about the deployment, including the list of
|
|
36
43
|
features needed to make predictions on this deployment.
|
|
37
|
-
|
|
38
|
-
Args:
|
|
39
|
-
deployment_id: The ID of the DataRobot deployment
|
|
40
|
-
|
|
41
|
-
Returns
|
|
42
|
-
-------
|
|
43
|
-
JSON string containing model and feature information including:
|
|
44
|
-
For datarobot native models will return model information for custom models
|
|
45
|
-
this will likely just return features and total_features values.
|
|
46
|
-
|
|
47
|
-
- model_type: Type of model
|
|
48
|
-
- target: Name of the target feature
|
|
49
|
-
- target_type: Type of the target feature
|
|
50
|
-
- features: List of features with their importance and type
|
|
51
|
-
- total_features: Total number of features
|
|
52
|
-
- time_series_config: Time series configuration if applicable
|
|
53
|
-
|
|
54
|
-
for features:
|
|
55
|
-
- feature_name: Name of the feature
|
|
56
|
-
- ``name`` : str, feature name
|
|
57
|
-
- ``feature_type`` : str, feature type
|
|
58
|
-
- ``importance`` : float, numeric measure of the relationship strength between
|
|
59
|
-
the feature and target (independent of model or other features)
|
|
60
|
-
- ``date_format`` : str or None, the date format string for how this feature was
|
|
61
|
-
interpreted, null if not a date feature, compatible with
|
|
62
|
-
https://docs.python.org/2/library/time.html#time.strftime.
|
|
63
|
-
- ``known_in_advance`` : bool, whether the feature was selected as known in advance in
|
|
64
|
-
a time series model, false for non-time series models.
|
|
65
44
|
"""
|
|
45
|
+
if not deployment_id:
|
|
46
|
+
raise ToolError("Deployment ID must be provided")
|
|
47
|
+
|
|
66
48
|
client = get_sdk_client()
|
|
67
49
|
deployment = client.Deployment.get(deployment_id)
|
|
68
50
|
|
|
@@ -112,40 +94,33 @@ async def get_deployment_info(deployment_id: str) -> str:
|
|
|
112
94
|
"series_id_columns": partition.multiseries_id_columns or [],
|
|
113
95
|
}
|
|
114
96
|
|
|
115
|
-
return
|
|
97
|
+
return ToolResult(
|
|
98
|
+
structured_content=result,
|
|
99
|
+
)
|
|
116
100
|
|
|
117
101
|
|
|
118
|
-
@dr_mcp_tool(tags={"deployment", "template", "data"})
|
|
119
|
-
async def generate_prediction_data_template(
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
- If frequent values are available for a feature, they will be used as sample values;
|
|
131
|
-
otherwise, blank fields will be used.
|
|
132
|
-
Please note that using frequent values in your predictions data can influence the prediction,
|
|
133
|
-
think of it as sending in the average value for the feature. If you don't want this effect on
|
|
134
|
-
your predictions leave the field blank you in predictions dataset.
|
|
135
|
-
|
|
136
|
-
Args:
|
|
137
|
-
deployment_id: The ID of the DataRobot deployment
|
|
138
|
-
n_rows: Number of template rows to generate (default 1)
|
|
139
|
-
|
|
140
|
-
Returns
|
|
141
|
-
-------
|
|
142
|
-
CSV template string with sample data ready for predictions
|
|
143
|
-
"""
|
|
102
|
+
@dr_mcp_tool(tags={"predictive", "deployment", "read", "template", "data"})
|
|
103
|
+
async def generate_prediction_data_template(
|
|
104
|
+
*,
|
|
105
|
+
deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
|
|
106
|
+
n_rows: Annotated[int, "Number of template rows to generate"] = 1,
|
|
107
|
+
) -> ToolError | ToolResult:
|
|
108
|
+
"""Generate a template CSV with the correct structure for making predictions."""
|
|
109
|
+
if not deployment_id:
|
|
110
|
+
raise ToolError("Deployment ID must be provided")
|
|
111
|
+
if n_rows is None or n_rows <= 0:
|
|
112
|
+
n_rows = 1
|
|
113
|
+
|
|
144
114
|
# Get feature information
|
|
145
|
-
|
|
115
|
+
features_result = await get_deployment_features(deployment_id=deployment_id)
|
|
146
116
|
# Add error handling for empty or error responses
|
|
117
|
+
# Extract text content from ToolResult
|
|
118
|
+
if features_result.content and isinstance(features_result.content[0], TextContent):
|
|
119
|
+
features_json = features_result.content[0].text
|
|
120
|
+
else:
|
|
121
|
+
features_json = str(features_result.content)
|
|
147
122
|
if not features_json or features_json.strip().startswith("Error"):
|
|
148
|
-
|
|
123
|
+
raise ToolError(f"Error with feature information: {features_json}")
|
|
149
124
|
features_info = json.loads(features_json)
|
|
150
125
|
|
|
151
126
|
# Create template data
|
|
@@ -203,64 +178,54 @@ async def generate_prediction_data_template(deployment_id: str, n_rows: int = 1)
|
|
|
203
178
|
# Create DataFrame
|
|
204
179
|
df = pd.DataFrame(template_data)
|
|
205
180
|
|
|
206
|
-
#
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
181
|
+
# Build structured content with template data and metadata
|
|
182
|
+
structured_content = {
|
|
183
|
+
"deployment_id": deployment_id,
|
|
184
|
+
"model_type": features_info["model_type"],
|
|
185
|
+
"target": features_info["target"],
|
|
186
|
+
"target_type": features_info["target_type"],
|
|
187
|
+
"total_features": features_info["total_features"],
|
|
188
|
+
"template_data": df.to_dict("records"), # Convert DataFrame to list of dicts
|
|
189
|
+
}
|
|
210
190
|
|
|
211
191
|
if "time_series_config" in features_info:
|
|
212
|
-
|
|
213
|
-
result += f"# Time Series: datetime_column={ts['datetime_column']}, "
|
|
214
|
-
result += f"forecast_window=[{ts['forecast_window_start']}, {ts['forecast_window_end']}]\n"
|
|
215
|
-
if ts["series_id_columns"]:
|
|
216
|
-
result += f"# Multiseries ID Columns: {', '.join(ts['series_id_columns'])}\n"
|
|
217
|
-
|
|
218
|
-
result += f"# Total Features: {features_info['total_features']}\n"
|
|
219
|
-
result += df.to_csv(index=False)
|
|
192
|
+
structured_content["time_series_config"] = features_info["time_series_config"]
|
|
220
193
|
|
|
221
|
-
return
|
|
194
|
+
return ToolResult(
|
|
195
|
+
structured_content=structured_content,
|
|
196
|
+
)
|
|
222
197
|
|
|
223
198
|
|
|
224
|
-
@dr_mcp_tool(tags={"deployment", "validation", "data"})
|
|
199
|
+
@dr_mcp_tool(tags={"predictive", "deployment", "read", "validation", "data"})
|
|
225
200
|
async def validate_prediction_data(
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
- Missing values (null, empty string, or blank fields) are allowed and will not cause errors
|
|
237
|
-
- No critical issues that would prevent predictions
|
|
238
|
-
|
|
239
|
-
Args:
|
|
240
|
-
deployment_id: The ID of the DataRobot deployment
|
|
241
|
-
file_path: Path to the CSV file to validate (optional if csv_string is provided)
|
|
242
|
-
csv_string: CSV data as a string (optional, used if file_path is not provided)
|
|
243
|
-
|
|
244
|
-
Returns
|
|
245
|
-
-------
|
|
246
|
-
Validation report including any errors, warnings, and suggestions
|
|
247
|
-
"""
|
|
201
|
+
*,
|
|
202
|
+
deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
|
|
203
|
+
file_path: Annotated[
|
|
204
|
+
str, "Path to the CSV file to validate (optional if csv_string is provided)"
|
|
205
|
+
]
|
|
206
|
+
| None = None,
|
|
207
|
+
csv_string: Annotated[str, "CSV data as a string (optional, used if file_path is not provided)"]
|
|
208
|
+
| None = None,
|
|
209
|
+
) -> ToolError | ToolResult:
|
|
210
|
+
"""Validate if a CSV file is suitable for making predictions with a deployment."""
|
|
248
211
|
# Load the data
|
|
249
212
|
if csv_string is not None:
|
|
250
213
|
df = pd.read_csv(io.StringIO(csv_string))
|
|
251
214
|
elif file_path is not None:
|
|
252
215
|
df = pd.read_csv(file_path)
|
|
253
216
|
else:
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
},
|
|
259
|
-
indent=2,
|
|
260
|
-
)
|
|
217
|
+
raise ToolError("Must provide either file_path or csv_string.")
|
|
218
|
+
|
|
219
|
+
if not deployment_id:
|
|
220
|
+
raise ToolError("Deployment ID must be provided")
|
|
261
221
|
|
|
262
222
|
# Get deployment features
|
|
263
|
-
|
|
223
|
+
features_result = await get_deployment_features(deployment_id=deployment_id)
|
|
224
|
+
# Extract text content from ToolResult
|
|
225
|
+
if features_result.content and isinstance(features_result.content[0], TextContent):
|
|
226
|
+
features_json = features_result.content[0].text
|
|
227
|
+
else:
|
|
228
|
+
features_json = str(features_result.content)
|
|
264
229
|
features_info = json.loads(features_json)
|
|
265
230
|
|
|
266
231
|
validation_report: dict[str, Any] = {
|
|
@@ -359,22 +324,28 @@ async def validate_prediction_data(
|
|
|
359
324
|
"model_type": features_info["model_type"],
|
|
360
325
|
}
|
|
361
326
|
|
|
362
|
-
return
|
|
327
|
+
return ToolResult(
|
|
328
|
+
structured_content=validation_report,
|
|
329
|
+
)
|
|
363
330
|
|
|
364
331
|
|
|
365
|
-
@dr_mcp_tool(tags={"deployment", "features", "info"})
|
|
366
|
-
async def get_deployment_features(
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
332
|
+
@dr_mcp_tool(tags={"predictive", "deployment", "read", "features", "info"})
|
|
333
|
+
async def get_deployment_features(
|
|
334
|
+
*,
|
|
335
|
+
deployment_id: Annotated[str, "The ID of the DataRobot deployment"] | None = None,
|
|
336
|
+
) -> ToolError | ToolResult:
|
|
337
|
+
"""Retrieve only the features list for a deployment, as JSON string."""
|
|
338
|
+
if not deployment_id:
|
|
339
|
+
raise ToolError("Deployment ID must be provided")
|
|
340
|
+
|
|
341
|
+
info_result = await get_deployment_info(deployment_id=deployment_id)
|
|
342
|
+
# Extract text content from ToolResult
|
|
343
|
+
if info_result.content and isinstance(info_result.content[0], TextContent):
|
|
344
|
+
info_json = info_result.content[0].text
|
|
345
|
+
else:
|
|
346
|
+
info_json = str(info_result.content)
|
|
375
347
|
if not info_json.strip().startswith("{"):
|
|
376
|
-
|
|
377
|
-
return json.dumps({"features": [], "total_features": 0, "error": info_json}, indent=2)
|
|
348
|
+
raise ToolError(f"Error with deployment info: {info_json}")
|
|
378
349
|
info = json.loads(info_json)
|
|
379
350
|
# Only keep features, time_series_config, and total_features
|
|
380
351
|
result = {
|
|
@@ -389,4 +360,7 @@ async def get_deployment_features(deployment_id: str) -> str:
|
|
|
389
360
|
result["target"] = info["target"]
|
|
390
361
|
if "target_type" in info:
|
|
391
362
|
result["target_type"] = info["target_type"]
|
|
392
|
-
|
|
363
|
+
|
|
364
|
+
return ToolResult(
|
|
365
|
+
structured_content=result,
|
|
366
|
+
)
|
|
@@ -93,33 +93,17 @@ async def get_best_model(
|
|
|
93
93
|
best_model = leaderboard[0]
|
|
94
94
|
logger.info(f"Found best model {best_model.id} for project {project_id}")
|
|
95
95
|
|
|
96
|
-
metric_info = ""
|
|
97
96
|
metric_value = None
|
|
98
97
|
|
|
99
98
|
if metric and best_model.metrics and metric in best_model.metrics:
|
|
100
99
|
metric_value = best_model.metrics[metric].get("validation")
|
|
101
|
-
if metric_value is not None:
|
|
102
|
-
metric_info = f" with {metric}: {metric_value:.2f}"
|
|
103
100
|
|
|
104
101
|
# Include full metrics in the response
|
|
105
102
|
best_model_dict = model_to_dict(best_model)
|
|
106
103
|
best_model_dict["metric"] = metric
|
|
107
104
|
best_model_dict["metric_value"] = metric_value
|
|
108
105
|
|
|
109
|
-
# Format metrics for human-readable content
|
|
110
|
-
metrics_text = ""
|
|
111
|
-
if best_model.metrics:
|
|
112
|
-
metrics_list = []
|
|
113
|
-
for metric_name, metric_data in best_model.metrics.items():
|
|
114
|
-
if isinstance(metric_data, dict) and "validation" in metric_data:
|
|
115
|
-
val = metric_data["validation"]
|
|
116
|
-
if val is not None:
|
|
117
|
-
metrics_list.append(f"{metric_name}: {val:.4f}")
|
|
118
|
-
if metrics_list:
|
|
119
|
-
metrics_text = "\nPerformance metrics:\n" + "\n".join(f" - {m}" for m in metrics_list)
|
|
120
|
-
|
|
121
106
|
return ToolResult(
|
|
122
|
-
content=f"Best model: {best_model.model_type}{metric_info}{metrics_text}",
|
|
123
107
|
structured_content={
|
|
124
108
|
"project_id": project_id,
|
|
125
109
|
"best_model": best_model_dict,
|
|
@@ -148,7 +132,6 @@ async def score_dataset_with_model(
|
|
|
148
132
|
job = model.score(dataset_url)
|
|
149
133
|
|
|
150
134
|
return ToolResult(
|
|
151
|
-
content=f"Scoring job started: {job.id}",
|
|
152
135
|
structured_content={
|
|
153
136
|
"scoring_job_id": job.id,
|
|
154
137
|
"project_id": project_id,
|
|
@@ -172,10 +155,6 @@ async def list_models(
|
|
|
172
155
|
models = project.get_models()
|
|
173
156
|
|
|
174
157
|
return ToolResult(
|
|
175
|
-
content=(
|
|
176
|
-
f"Found {len(models)} models in project {project_id}, here are the details:\n"
|
|
177
|
-
f"{json.dumps(models, indent=2, cls=ModelEncoder)}"
|
|
178
|
-
),
|
|
179
158
|
structured_content={
|
|
180
159
|
"project_id": project_id,
|
|
181
160
|
"models": [model_to_dict(model) for model in models],
|
|
@@ -240,6 +240,9 @@ async def predict_realtime(
|
|
|
240
240
|
else:
|
|
241
241
|
raise ValueError("Either file_path or dataset must be provided.")
|
|
242
242
|
|
|
243
|
+
# Normalize column names: strip leading/trailing whitespace
|
|
244
|
+
df.columns = df.columns.str.strip()
|
|
245
|
+
|
|
243
246
|
if series_id_column and series_id_column not in df.columns:
|
|
244
247
|
raise ValueError(f"series_id_column '{series_id_column}' not found in input data.")
|
|
245
248
|
|
|
@@ -12,7 +12,6 @@
|
|
|
12
12
|
# See the License for the specific language governing permissions and
|
|
13
13
|
# limitations under the License.
|
|
14
14
|
|
|
15
|
-
import json
|
|
16
15
|
import logging
|
|
17
16
|
from typing import Annotated
|
|
18
17
|
|
|
@@ -33,11 +32,6 @@ async def list_projects() -> ToolResult:
|
|
|
33
32
|
projects = {p.id: p.project_name for p in projects}
|
|
34
33
|
|
|
35
34
|
return ToolResult(
|
|
36
|
-
content=(
|
|
37
|
-
json.dumps(projects, indent=2)
|
|
38
|
-
if projects
|
|
39
|
-
else json.dumps({"message": "No projects found."}, indent=2)
|
|
40
|
-
),
|
|
41
35
|
structured_content=projects,
|
|
42
36
|
)
|
|
43
37
|
|
|
@@ -48,7 +42,7 @@ async def get_project_dataset_by_name(
|
|
|
48
42
|
project_id: Annotated[str, "The ID of the DataRobot project."] | None = None,
|
|
49
43
|
dataset_name: Annotated[str, "The name of the dataset to find (e.g., 'training', 'holdout')."]
|
|
50
44
|
| None = None,
|
|
51
|
-
) ->
|
|
45
|
+
) -> ToolResult:
|
|
52
46
|
"""Get a dataset ID by name for a given project.
|
|
53
47
|
|
|
54
48
|
The dataset ID and the dataset type (source or prediction) as a string, or an error message.
|
|
@@ -70,21 +64,11 @@ async def get_project_dataset_by_name(
|
|
|
70
64
|
for ds in all_datasets:
|
|
71
65
|
if dataset_name.lower() in ds["dataset"].name.lower():
|
|
72
66
|
return ToolResult(
|
|
73
|
-
content=(
|
|
74
|
-
json.dumps(
|
|
75
|
-
{
|
|
76
|
-
"dataset_id": ds["dataset"].id,
|
|
77
|
-
"dataset_type": ds["type"],
|
|
78
|
-
},
|
|
79
|
-
indent=2,
|
|
80
|
-
)
|
|
81
|
-
),
|
|
82
67
|
structured_content={
|
|
83
68
|
"dataset_id": ds["dataset"].id,
|
|
84
69
|
"dataset_type": ds["type"],
|
|
85
70
|
},
|
|
86
71
|
)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
structured_content={},
|
|
72
|
+
raise ToolError(
|
|
73
|
+
f"Dataset with name containing '{dataset_name}' not found in project {project_id}."
|
|
90
74
|
)
|
|
@@ -14,7 +14,6 @@
|
|
|
14
14
|
|
|
15
15
|
"""Tools for analyzing datasets and suggesting ML use cases."""
|
|
16
16
|
|
|
17
|
-
import json
|
|
18
17
|
import logging
|
|
19
18
|
from dataclasses import asdict
|
|
20
19
|
from dataclasses import dataclass
|
|
@@ -134,7 +133,6 @@ async def analyze_dataset(
|
|
|
134
133
|
insights_dict = asdict(insights)
|
|
135
134
|
|
|
136
135
|
return ToolResult(
|
|
137
|
-
content=json.dumps(insights_dict, indent=2),
|
|
138
136
|
structured_content=insights_dict,
|
|
139
137
|
)
|
|
140
138
|
|
|
@@ -164,7 +162,6 @@ async def suggest_use_cases(
|
|
|
164
162
|
suggestions.sort(key=lambda x: x["confidence"], reverse=True)
|
|
165
163
|
|
|
166
164
|
return ToolResult(
|
|
167
|
-
content=json.dumps(suggestions, indent=2),
|
|
168
165
|
structured_content={"use_case_suggestions": suggestions},
|
|
169
166
|
)
|
|
170
167
|
|
|
@@ -255,7 +252,6 @@ async def get_exploratory_insights(
|
|
|
255
252
|
)
|
|
256
253
|
|
|
257
254
|
return ToolResult(
|
|
258
|
-
content=json.dumps(eda_insights, indent=2),
|
|
259
255
|
structured_content=eda_insights,
|
|
260
256
|
)
|
|
261
257
|
|
|
@@ -540,22 +536,11 @@ async def start_autopilot(
|
|
|
540
536
|
}
|
|
541
537
|
|
|
542
538
|
return ToolResult(
|
|
543
|
-
content=json.dumps(result, indent=2),
|
|
544
539
|
structured_content=result,
|
|
545
540
|
)
|
|
546
541
|
|
|
547
542
|
except Exception as e:
|
|
548
|
-
raise ToolError(
|
|
549
|
-
content=json.dumps(
|
|
550
|
-
{
|
|
551
|
-
"error": f"Failed to start Autopilot: {str(e)}",
|
|
552
|
-
"project_id": project.id if project else None,
|
|
553
|
-
"target": target,
|
|
554
|
-
"mode": mode,
|
|
555
|
-
},
|
|
556
|
-
indent=2,
|
|
557
|
-
)
|
|
558
|
-
)
|
|
543
|
+
raise ToolError(f"Failed to start Autopilot: {str(e)}")
|
|
559
544
|
|
|
560
545
|
|
|
561
546
|
@dr_mcp_tool(tags={"prediction", "training", "read", "model", "evaluation"})
|
|
@@ -611,7 +596,6 @@ async def get_model_roc_curve(
|
|
|
611
596
|
}
|
|
612
597
|
|
|
613
598
|
return ToolResult(
|
|
614
|
-
content=json.dumps({"data": roc_data}, indent=2),
|
|
615
599
|
structured_content={"data": roc_data},
|
|
616
600
|
)
|
|
617
601
|
except Exception as e:
|
|
@@ -638,7 +622,6 @@ async def get_model_feature_impact(
|
|
|
638
622
|
feature_impact = model.get_or_request_feature_impact()
|
|
639
623
|
|
|
640
624
|
return ToolResult(
|
|
641
|
-
content=json.dumps({"data": feature_impact}, indent=2),
|
|
642
625
|
structured_content={"data": feature_impact},
|
|
643
626
|
)
|
|
644
627
|
|
|
@@ -684,6 +667,5 @@ async def get_model_lift_chart(
|
|
|
684
667
|
}
|
|
685
668
|
|
|
686
669
|
return ToolResult(
|
|
687
|
-
content=json.dumps({"data": lift_chart_data}, indent=2),
|
|
688
670
|
structured_content={"data": lift_chart_data},
|
|
689
671
|
)
|