datarobot-genai 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/__init__.py +19 -0
- datarobot_genai/core/__init__.py +0 -0
- datarobot_genai/core/agents/__init__.py +43 -0
- datarobot_genai/core/agents/base.py +195 -0
- datarobot_genai/core/chat/__init__.py +19 -0
- datarobot_genai/core/chat/auth.py +146 -0
- datarobot_genai/core/chat/client.py +178 -0
- datarobot_genai/core/chat/responses.py +297 -0
- datarobot_genai/core/cli/__init__.py +18 -0
- datarobot_genai/core/cli/agent_environment.py +47 -0
- datarobot_genai/core/cli/agent_kernel.py +211 -0
- datarobot_genai/core/custom_model.py +141 -0
- datarobot_genai/core/mcp/__init__.py +0 -0
- datarobot_genai/core/mcp/common.py +218 -0
- datarobot_genai/core/telemetry_agent.py +126 -0
- datarobot_genai/core/utils/__init__.py +3 -0
- datarobot_genai/core/utils/auth.py +234 -0
- datarobot_genai/core/utils/urls.py +64 -0
- datarobot_genai/crewai/__init__.py +24 -0
- datarobot_genai/crewai/agent.py +42 -0
- datarobot_genai/crewai/base.py +159 -0
- datarobot_genai/crewai/events.py +117 -0
- datarobot_genai/crewai/mcp.py +59 -0
- datarobot_genai/drmcp/__init__.py +78 -0
- datarobot_genai/drmcp/core/__init__.py +13 -0
- datarobot_genai/drmcp/core/auth.py +165 -0
- datarobot_genai/drmcp/core/clients.py +180 -0
- datarobot_genai/drmcp/core/config.py +250 -0
- datarobot_genai/drmcp/core/config_utils.py +174 -0
- datarobot_genai/drmcp/core/constants.py +18 -0
- datarobot_genai/drmcp/core/credentials.py +190 -0
- datarobot_genai/drmcp/core/dr_mcp_server.py +316 -0
- datarobot_genai/drmcp/core/dr_mcp_server_logo.py +136 -0
- datarobot_genai/drmcp/core/dynamic_prompts/__init__.py +13 -0
- datarobot_genai/drmcp/core/dynamic_prompts/controllers.py +130 -0
- datarobot_genai/drmcp/core/dynamic_prompts/dr_lib.py +128 -0
- datarobot_genai/drmcp/core/dynamic_prompts/register.py +206 -0
- datarobot_genai/drmcp/core/dynamic_prompts/utils.py +33 -0
- datarobot_genai/drmcp/core/dynamic_tools/__init__.py +14 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/__init__.py +0 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/__init__.py +14 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/base.py +72 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/default.py +82 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/drum.py +238 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/config.py +228 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/controllers.py +63 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/metadata.py +162 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/register.py +87 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_agentic_fallback_schema.json +36 -0
- datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_prediction_fallback_schema.json +10 -0
- datarobot_genai/drmcp/core/dynamic_tools/register.py +254 -0
- datarobot_genai/drmcp/core/dynamic_tools/schema.py +532 -0
- datarobot_genai/drmcp/core/exceptions.py +25 -0
- datarobot_genai/drmcp/core/logging.py +98 -0
- datarobot_genai/drmcp/core/mcp_instance.py +542 -0
- datarobot_genai/drmcp/core/mcp_server_tools.py +129 -0
- datarobot_genai/drmcp/core/memory_management/__init__.py +13 -0
- datarobot_genai/drmcp/core/memory_management/manager.py +820 -0
- datarobot_genai/drmcp/core/memory_management/memory_tools.py +201 -0
- datarobot_genai/drmcp/core/routes.py +436 -0
- datarobot_genai/drmcp/core/routes_utils.py +30 -0
- datarobot_genai/drmcp/core/server_life_cycle.py +107 -0
- datarobot_genai/drmcp/core/telemetry.py +424 -0
- datarobot_genai/drmcp/core/tool_filter.py +108 -0
- datarobot_genai/drmcp/core/utils.py +131 -0
- datarobot_genai/drmcp/server.py +19 -0
- datarobot_genai/drmcp/test_utils/__init__.py +13 -0
- datarobot_genai/drmcp/test_utils/integration_mcp_server.py +102 -0
- datarobot_genai/drmcp/test_utils/mcp_utils_ete.py +96 -0
- datarobot_genai/drmcp/test_utils/mcp_utils_integration.py +94 -0
- datarobot_genai/drmcp/test_utils/openai_llm_mcp_client.py +234 -0
- datarobot_genai/drmcp/test_utils/tool_base_ete.py +151 -0
- datarobot_genai/drmcp/test_utils/utils.py +91 -0
- datarobot_genai/drmcp/tools/__init__.py +14 -0
- datarobot_genai/drmcp/tools/predictive/__init__.py +27 -0
- datarobot_genai/drmcp/tools/predictive/data.py +97 -0
- datarobot_genai/drmcp/tools/predictive/deployment.py +91 -0
- datarobot_genai/drmcp/tools/predictive/deployment_info.py +392 -0
- datarobot_genai/drmcp/tools/predictive/model.py +148 -0
- datarobot_genai/drmcp/tools/predictive/predict.py +254 -0
- datarobot_genai/drmcp/tools/predictive/predict_realtime.py +307 -0
- datarobot_genai/drmcp/tools/predictive/project.py +72 -0
- datarobot_genai/drmcp/tools/predictive/training.py +651 -0
- datarobot_genai/langgraph/__init__.py +0 -0
- datarobot_genai/langgraph/agent.py +341 -0
- datarobot_genai/langgraph/mcp.py +73 -0
- datarobot_genai/llama_index/__init__.py +16 -0
- datarobot_genai/llama_index/agent.py +50 -0
- datarobot_genai/llama_index/base.py +299 -0
- datarobot_genai/llama_index/mcp.py +79 -0
- datarobot_genai/nat/__init__.py +0 -0
- datarobot_genai/nat/agent.py +258 -0
- datarobot_genai/nat/datarobot_llm_clients.py +249 -0
- datarobot_genai/nat/datarobot_llm_providers.py +130 -0
- datarobot_genai/py.typed +0 -0
- datarobot_genai-0.2.0.dist-info/METADATA +139 -0
- datarobot_genai-0.2.0.dist-info/RECORD +101 -0
- datarobot_genai-0.2.0.dist-info/WHEEL +4 -0
- datarobot_genai-0.2.0.dist-info/entry_points.txt +3 -0
- datarobot_genai-0.2.0.dist-info/licenses/AUTHORS +2 -0
- datarobot_genai-0.2.0.dist-info/licenses/LICENSE +201 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Tools for retrieving deployment metadata and data requirements."""
|
|
16
|
+
|
|
17
|
+
import io
|
|
18
|
+
import json
|
|
19
|
+
import logging
|
|
20
|
+
from datetime import datetime
|
|
21
|
+
from datetime import timedelta
|
|
22
|
+
from typing import Any
|
|
23
|
+
|
|
24
|
+
import pandas as pd
|
|
25
|
+
|
|
26
|
+
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
27
|
+
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dr_mcp_tool(tags={"deployment", "info", "metadata"})
|
|
33
|
+
async def get_deployment_info(deployment_id: str) -> str:
|
|
34
|
+
"""
|
|
35
|
+
Retrieve information about the deployment, including the list of
|
|
36
|
+
features needed to make predictions on this deployment.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
deployment_id: The ID of the DataRobot deployment
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
JSON string containing model and feature information including:
|
|
44
|
+
For datarobot native models will return model information for custom models
|
|
45
|
+
this will likely just return features and total_features values.
|
|
46
|
+
|
|
47
|
+
- model_type: Type of model
|
|
48
|
+
- target: Name of the target feature
|
|
49
|
+
- target_type: Type of the target feature
|
|
50
|
+
- features: List of features with their importance and type
|
|
51
|
+
- total_features: Total number of features
|
|
52
|
+
- time_series_config: Time series configuration if applicable
|
|
53
|
+
|
|
54
|
+
for features:
|
|
55
|
+
- feature_name: Name of the feature
|
|
56
|
+
- ``name`` : str, feature name
|
|
57
|
+
- ``feature_type`` : str, feature type
|
|
58
|
+
- ``importance`` : float, numeric measure of the relationship strength between
|
|
59
|
+
the feature and target (independent of model or other features)
|
|
60
|
+
- ``date_format`` : str or None, the date format string for how this feature was
|
|
61
|
+
interpreted, null if not a date feature, compatible with
|
|
62
|
+
https://docs.python.org/2/library/time.html#time.strftime.
|
|
63
|
+
- ``known_in_advance`` : bool, whether the feature was selected as known in advance in
|
|
64
|
+
a time series model, false for non-time series models.
|
|
65
|
+
"""
|
|
66
|
+
client = get_sdk_client()
|
|
67
|
+
deployment = client.Deployment.get(deployment_id)
|
|
68
|
+
|
|
69
|
+
# get features from the deployment
|
|
70
|
+
features_raw = deployment.get_features()
|
|
71
|
+
deployment.get_capabilities()
|
|
72
|
+
|
|
73
|
+
# Parse features if it's a JSON string
|
|
74
|
+
if isinstance(features_raw, str):
|
|
75
|
+
try:
|
|
76
|
+
features = json.loads(features_raw)
|
|
77
|
+
except json.JSONDecodeError:
|
|
78
|
+
features = []
|
|
79
|
+
else:
|
|
80
|
+
features = features_raw
|
|
81
|
+
|
|
82
|
+
# get model type if its not a custom model
|
|
83
|
+
project = None
|
|
84
|
+
if deployment.model.get("project_id") is None:
|
|
85
|
+
model_type = "custom"
|
|
86
|
+
target = ""
|
|
87
|
+
target_type = ""
|
|
88
|
+
else:
|
|
89
|
+
project = client.Project.get(deployment.model["project_id"])
|
|
90
|
+
model = client.Model.get(project=project, model_id=deployment.model["id"])
|
|
91
|
+
model_type = model.model_type
|
|
92
|
+
target = project.target
|
|
93
|
+
target_type = project.target_type
|
|
94
|
+
|
|
95
|
+
# Add model metadata
|
|
96
|
+
result = {
|
|
97
|
+
"deployment_id": deployment_id,
|
|
98
|
+
"model_type": model_type,
|
|
99
|
+
"target": target,
|
|
100
|
+
"target_type": target_type,
|
|
101
|
+
"features": sorted(features, key=lambda x: (x.get("importance") or 0), reverse=True),
|
|
102
|
+
"total_features": len(features),
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
# Add time series specific information if applicable
|
|
106
|
+
if project and hasattr(project, "datetime_partitioning"):
|
|
107
|
+
partition = project.datetime_partitioning
|
|
108
|
+
result["time_series_config"] = {
|
|
109
|
+
"datetime_column": partition.datetime_partition_column,
|
|
110
|
+
"forecast_window_start": partition.forecast_window_start,
|
|
111
|
+
"forecast_window_end": partition.forecast_window_end,
|
|
112
|
+
"series_id_columns": partition.multiseries_id_columns or [],
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
return json.dumps(result, indent=2)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
@dr_mcp_tool(tags={"deployment", "template", "data"})
|
|
119
|
+
async def generate_prediction_data_template(deployment_id: str, n_rows: int = 1) -> str:
|
|
120
|
+
"""
|
|
121
|
+
Generate a template CSV with the correct structure for making predictions.
|
|
122
|
+
|
|
123
|
+
This creates a template with:
|
|
124
|
+
- All required feature columns in the correct order
|
|
125
|
+
- Sample values based on feature types
|
|
126
|
+
- Comments explaining each feature
|
|
127
|
+
- When using this tool, always consider feature importance. For features with high importance,
|
|
128
|
+
try to infer or ask for a reasonable value, using frequent values or domain knowledge if
|
|
129
|
+
available. For less important features, you may leave them blank.
|
|
130
|
+
- If frequent values are available for a feature, they will be used as sample values;
|
|
131
|
+
otherwise, blank fields will be used.
|
|
132
|
+
Please note that using frequent values in your predictions data can influence the prediction,
|
|
133
|
+
think of it as sending in the average value for the feature. If you don't want this effect on
|
|
134
|
+
your predictions leave the field blank you in predictions dataset.
|
|
135
|
+
|
|
136
|
+
Args:
|
|
137
|
+
deployment_id: The ID of the DataRobot deployment
|
|
138
|
+
n_rows: Number of template rows to generate (default 1)
|
|
139
|
+
|
|
140
|
+
Returns
|
|
141
|
+
-------
|
|
142
|
+
CSV template string with sample data ready for predictions
|
|
143
|
+
"""
|
|
144
|
+
# Get feature information
|
|
145
|
+
features_json = await get_deployment_features(deployment_id)
|
|
146
|
+
# Add error handling for empty or error responses
|
|
147
|
+
if not features_json or features_json.strip().startswith("Error"):
|
|
148
|
+
return f"Error: {features_json}"
|
|
149
|
+
features_info = json.loads(features_json)
|
|
150
|
+
|
|
151
|
+
# Create template data
|
|
152
|
+
template_data = {}
|
|
153
|
+
|
|
154
|
+
for feature in features_info["features"]:
|
|
155
|
+
feature_name = feature["name"]
|
|
156
|
+
feature_type = feature["feature_type"].lower() # Normalize to lowercase
|
|
157
|
+
|
|
158
|
+
# Use frequent values if available
|
|
159
|
+
frequent_values = feature.get("frequent_values")
|
|
160
|
+
if frequent_values and isinstance(frequent_values, list) and frequent_values:
|
|
161
|
+
template_data[feature_name] = [frequent_values[0]] * n_rows
|
|
162
|
+
continue
|
|
163
|
+
|
|
164
|
+
# Use only documented feature properties
|
|
165
|
+
min_val = feature.get("min", 0)
|
|
166
|
+
max_val = feature.get("max", 0)
|
|
167
|
+
|
|
168
|
+
# Handle None for min/max
|
|
169
|
+
if min_val is None:
|
|
170
|
+
min_val = 0 if feature_type == "numeric" else "2020-01-01"
|
|
171
|
+
if max_val is None:
|
|
172
|
+
max_val = 0 if feature_type == "numeric" else "2020-01-10"
|
|
173
|
+
|
|
174
|
+
# Generate sample values based on type
|
|
175
|
+
if feature_type == "numeric":
|
|
176
|
+
template_data[feature_name] = [None] * n_rows
|
|
177
|
+
elif feature_type == "date":
|
|
178
|
+
template_data[feature_name] = [None] * n_rows
|
|
179
|
+
elif feature_type == "summarized categorical":
|
|
180
|
+
template_data[feature_name] = [""] * n_rows
|
|
181
|
+
elif feature_type == "categorical":
|
|
182
|
+
template_data[feature_name] = [""] * n_rows
|
|
183
|
+
elif feature_type == "text":
|
|
184
|
+
template_data[feature_name] = [""] * n_rows
|
|
185
|
+
else:
|
|
186
|
+
template_data[feature_name] = [""] * n_rows
|
|
187
|
+
|
|
188
|
+
# Handle time series specific columns
|
|
189
|
+
if "time_series_config" in features_info:
|
|
190
|
+
ts_config = features_info["time_series_config"]
|
|
191
|
+
|
|
192
|
+
# Ensure datetime column exists
|
|
193
|
+
if ts_config["datetime_column"] not in template_data:
|
|
194
|
+
base_date = datetime.now()
|
|
195
|
+
dates = [base_date + timedelta(days=i) for i in range(n_rows)]
|
|
196
|
+
template_data[ts_config["datetime_column"]] = dates
|
|
197
|
+
|
|
198
|
+
# Add series ID columns if multiseries
|
|
199
|
+
for series_col in ts_config["series_id_columns"]:
|
|
200
|
+
if series_col not in template_data:
|
|
201
|
+
template_data[series_col] = ["series_A"] * n_rows
|
|
202
|
+
|
|
203
|
+
# Create DataFrame
|
|
204
|
+
df = pd.DataFrame(template_data)
|
|
205
|
+
|
|
206
|
+
# Add metadata comments
|
|
207
|
+
result = f"# Prediction Data Template for Deployment: {deployment_id}\n"
|
|
208
|
+
result += f"# Model Type: {features_info['model_type']}\n"
|
|
209
|
+
result += f"# Target: {features_info['target']} (Type: {features_info['target_type']})\n"
|
|
210
|
+
|
|
211
|
+
if "time_series_config" in features_info:
|
|
212
|
+
ts = features_info["time_series_config"]
|
|
213
|
+
result += f"# Time Series: datetime_column={ts['datetime_column']}, "
|
|
214
|
+
result += f"forecast_window=[{ts['forecast_window_start']}, {ts['forecast_window_end']}]\n"
|
|
215
|
+
if ts["series_id_columns"]:
|
|
216
|
+
result += f"# Multiseries ID Columns: {', '.join(ts['series_id_columns'])}\n"
|
|
217
|
+
|
|
218
|
+
result += f"# Total Features: {features_info['total_features']}\n"
|
|
219
|
+
result += df.to_csv(index=False)
|
|
220
|
+
|
|
221
|
+
return str(result)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
@dr_mcp_tool(tags={"deployment", "validation", "data"})
|
|
225
|
+
async def validate_prediction_data(
|
|
226
|
+
deployment_id: str,
|
|
227
|
+
file_path: str | None = None,
|
|
228
|
+
csv_string: str | None = None,
|
|
229
|
+
) -> str:
|
|
230
|
+
"""
|
|
231
|
+
Validate if a CSV file is suitable for making predictions with a deployment.
|
|
232
|
+
|
|
233
|
+
Checks:
|
|
234
|
+
- All required features are present
|
|
235
|
+
- Feature types match expectations
|
|
236
|
+
- Missing values (null, empty string, or blank fields) are allowed and will not cause errors
|
|
237
|
+
- No critical issues that would prevent predictions
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
deployment_id: The ID of the DataRobot deployment
|
|
241
|
+
file_path: Path to the CSV file to validate (optional if csv_string is provided)
|
|
242
|
+
csv_string: CSV data as a string (optional, used if file_path is not provided)
|
|
243
|
+
|
|
244
|
+
Returns
|
|
245
|
+
-------
|
|
246
|
+
Validation report including any errors, warnings, and suggestions
|
|
247
|
+
"""
|
|
248
|
+
# Load the data
|
|
249
|
+
if csv_string is not None:
|
|
250
|
+
df = pd.read_csv(io.StringIO(csv_string))
|
|
251
|
+
elif file_path is not None:
|
|
252
|
+
df = pd.read_csv(file_path)
|
|
253
|
+
else:
|
|
254
|
+
return json.dumps(
|
|
255
|
+
{
|
|
256
|
+
"status": "error",
|
|
257
|
+
"error": "Must provide either file_path or csv_string.",
|
|
258
|
+
},
|
|
259
|
+
indent=2,
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
# Get deployment features
|
|
263
|
+
features_json = await get_deployment_features(deployment_id)
|
|
264
|
+
features_info = json.loads(features_json)
|
|
265
|
+
|
|
266
|
+
validation_report: dict[str, Any] = {
|
|
267
|
+
"status": "valid",
|
|
268
|
+
"errors": [],
|
|
269
|
+
"warnings": [],
|
|
270
|
+
"info": [],
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Check each required feature
|
|
274
|
+
required_features = [f for f in features_info["features"]]
|
|
275
|
+
data_columns = set(df.columns)
|
|
276
|
+
|
|
277
|
+
# Threshold for considering a feature as important
|
|
278
|
+
importance_threshold = 0.1
|
|
279
|
+
|
|
280
|
+
for feature in required_features:
|
|
281
|
+
feature_name = feature["name"] if "name" in feature else feature["feature_name"]
|
|
282
|
+
|
|
283
|
+
# Check if feature exists
|
|
284
|
+
if feature_name not in data_columns:
|
|
285
|
+
if feature.get("importance", 0) > importance_threshold:
|
|
286
|
+
validation_report["warnings"].append(
|
|
287
|
+
f"Missing important feature: {feature_name} (importance: "
|
|
288
|
+
f"{feature.get('importance', 0):.2f})"
|
|
289
|
+
)
|
|
290
|
+
else:
|
|
291
|
+
validation_report["warnings"].append(
|
|
292
|
+
f"Missing feature column: {feature_name} (column will be treated as missing "
|
|
293
|
+
f"values)"
|
|
294
|
+
)
|
|
295
|
+
continue
|
|
296
|
+
|
|
297
|
+
# Check for missing values (allowed)
|
|
298
|
+
if df[feature_name].isnull().all() or (df[feature_name] == "").all():
|
|
299
|
+
validation_report["info"].append(
|
|
300
|
+
f"Feature {feature_name} is entirely missing or empty (this is allowed)"
|
|
301
|
+
)
|
|
302
|
+
continue
|
|
303
|
+
|
|
304
|
+
# Check data type compatibility (only if not all missing)
|
|
305
|
+
col_dtype = str(df[feature_name].dtype)
|
|
306
|
+
if feature["feature_type"] == "numeric" and not pd.api.types.is_numeric_dtype(
|
|
307
|
+
df[feature_name].dropna()
|
|
308
|
+
):
|
|
309
|
+
validation_report["warnings"].append(
|
|
310
|
+
f"Feature {feature_name} should be numeric but is {col_dtype}"
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
# Check for extra columns
|
|
314
|
+
expected_features = {
|
|
315
|
+
f["name"] if "name" in f else f["feature_name"] for f in features_info["features"]
|
|
316
|
+
}
|
|
317
|
+
extra_columns = data_columns - expected_features
|
|
318
|
+
if extra_columns:
|
|
319
|
+
validation_report["info"].append(
|
|
320
|
+
f"Extra columns found (will be ignored): {', '.join(extra_columns)}"
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
# Time series specific validation
|
|
324
|
+
if "time_series_config" in features_info:
|
|
325
|
+
ts_config = features_info["time_series_config"]
|
|
326
|
+
|
|
327
|
+
# Check datetime column
|
|
328
|
+
if ts_config["datetime_column"] not in data_columns:
|
|
329
|
+
validation_report["errors"].append(
|
|
330
|
+
f"Missing required datetime column: {ts_config['datetime_column']}"
|
|
331
|
+
)
|
|
332
|
+
validation_report["status"] = "invalid"
|
|
333
|
+
elif (
|
|
334
|
+
not df[ts_config["datetime_column"]].isnull().all()
|
|
335
|
+
and not (df[ts_config["datetime_column"]] == "").all()
|
|
336
|
+
):
|
|
337
|
+
try:
|
|
338
|
+
pd.to_datetime(df[ts_config["datetime_column"]])
|
|
339
|
+
except ValueError:
|
|
340
|
+
validation_report["errors"].append(
|
|
341
|
+
f"Datetime column {ts_config['datetime_column']} cannot be parsed as dates"
|
|
342
|
+
)
|
|
343
|
+
validation_report["status"] = "invalid"
|
|
344
|
+
|
|
345
|
+
# Check series ID columns for multiseries
|
|
346
|
+
for series_col in ts_config["series_id_columns"]:
|
|
347
|
+
if series_col not in data_columns:
|
|
348
|
+
validation_report["errors"].append(
|
|
349
|
+
f"Missing required series ID column: {series_col}"
|
|
350
|
+
)
|
|
351
|
+
validation_report["status"] = "invalid"
|
|
352
|
+
|
|
353
|
+
# Add summary
|
|
354
|
+
validation_report["summary"] = {
|
|
355
|
+
"file_path": file_path,
|
|
356
|
+
"rows": len(df),
|
|
357
|
+
"columns": len(df.columns),
|
|
358
|
+
"deployment_id": deployment_id,
|
|
359
|
+
"model_type": features_info["model_type"],
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
return json.dumps(validation_report, indent=2)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
@dr_mcp_tool(tags={"deployment", "features", "info"})
|
|
366
|
+
async def get_deployment_features(deployment_id: str) -> str:
|
|
367
|
+
"""
|
|
368
|
+
Retrieve only the features list for a deployment, as JSON string.
|
|
369
|
+
Args:
|
|
370
|
+
deployment_id: The ID of the DataRobot deployment
|
|
371
|
+
Returns:
|
|
372
|
+
JSON string containing only the features list and time series config if present.
|
|
373
|
+
"""
|
|
374
|
+
info_json = await get_deployment_info(deployment_id)
|
|
375
|
+
if not info_json.strip().startswith("{"):
|
|
376
|
+
# Return a default error JSON
|
|
377
|
+
return json.dumps({"features": [], "total_features": 0, "error": info_json}, indent=2)
|
|
378
|
+
info = json.loads(info_json)
|
|
379
|
+
# Only keep features, time_series_config, and total_features
|
|
380
|
+
result = {
|
|
381
|
+
"features": info.get("features", []),
|
|
382
|
+
"total_features": info.get("total_features", 0),
|
|
383
|
+
}
|
|
384
|
+
if "time_series_config" in info:
|
|
385
|
+
result["time_series_config"] = info["time_series_config"]
|
|
386
|
+
if "model_type" in info:
|
|
387
|
+
result["model_type"] = info["model_type"]
|
|
388
|
+
if "target" in info:
|
|
389
|
+
result["target"] = info["target"]
|
|
390
|
+
if "target_type" in info:
|
|
391
|
+
result["target_type"] = info["target_type"]
|
|
392
|
+
return json.dumps(result, indent=2)
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
# Copyright 2025 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
from typing import Any
|
|
18
|
+
|
|
19
|
+
from datarobot.models.model import Model
|
|
20
|
+
|
|
21
|
+
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
22
|
+
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def model_to_dict(model: Any) -> dict[str, Any]:
|
|
28
|
+
"""Convert a DataRobot Model object to a dictionary."""
|
|
29
|
+
try:
|
|
30
|
+
return {
|
|
31
|
+
"id": model.id,
|
|
32
|
+
"model_type": model.model_type,
|
|
33
|
+
"metrics": model.metrics,
|
|
34
|
+
}
|
|
35
|
+
except AttributeError as e:
|
|
36
|
+
logger.warning(f"Failed to access some model attributes: {e}")
|
|
37
|
+
# Return minimal information if some attributes are not accessible
|
|
38
|
+
return {
|
|
39
|
+
"id": getattr(model, "id", "unknown"),
|
|
40
|
+
"model_type": getattr(model, "model_type", "unknown"),
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ModelEncoder(json.JSONEncoder):
|
|
45
|
+
"""Custom JSON encoder for DataRobot Model objects."""
|
|
46
|
+
|
|
47
|
+
def default(self, obj: Any) -> Any:
|
|
48
|
+
if isinstance(obj, Model):
|
|
49
|
+
return model_to_dict(obj)
|
|
50
|
+
return super().default(obj)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dr_mcp_tool(tags={"model", "management", "info"})
|
|
54
|
+
async def get_best_model(project_id: str, metric: str | None = None) -> str:
|
|
55
|
+
"""
|
|
56
|
+
Get the best model for a DataRobot project, optionally by a specific metric.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
project_id: The ID of the DataRobot project.
|
|
60
|
+
metric: (Optional) The metric to use for best model selection (e.g., 'AUC', 'LogLoss').
|
|
61
|
+
|
|
62
|
+
Returns
|
|
63
|
+
-------
|
|
64
|
+
A formatted string describing the best model.
|
|
65
|
+
|
|
66
|
+
Raises
|
|
67
|
+
------
|
|
68
|
+
Exception: If project not found or no models exist in the project.
|
|
69
|
+
"""
|
|
70
|
+
client = get_sdk_client()
|
|
71
|
+
project = client.Project.get(project_id)
|
|
72
|
+
if not project:
|
|
73
|
+
logger.error(f"Project with ID {project_id} not found")
|
|
74
|
+
raise Exception(f"Project with ID {project_id} not found.")
|
|
75
|
+
|
|
76
|
+
leaderboard = project.get_models()
|
|
77
|
+
if not leaderboard:
|
|
78
|
+
logger.info(f"No models found for project {project_id}")
|
|
79
|
+
raise Exception("No models found for this project.")
|
|
80
|
+
|
|
81
|
+
if metric:
|
|
82
|
+
reverse_sort = metric.upper() in [
|
|
83
|
+
"AUC",
|
|
84
|
+
"ACCURACY",
|
|
85
|
+
"F1",
|
|
86
|
+
"PRECISION",
|
|
87
|
+
"RECALL",
|
|
88
|
+
]
|
|
89
|
+
leaderboard = sorted(
|
|
90
|
+
leaderboard,
|
|
91
|
+
key=lambda m: m.metrics.get(metric, {}).get(
|
|
92
|
+
"validation", float("-inf") if reverse_sort else float("inf")
|
|
93
|
+
),
|
|
94
|
+
reverse=reverse_sort,
|
|
95
|
+
)
|
|
96
|
+
logger.info(f"Sorted models by metric: {metric}")
|
|
97
|
+
|
|
98
|
+
best_model = leaderboard[0]
|
|
99
|
+
logger.info(f"Found best model {best_model.id} for project {project_id}")
|
|
100
|
+
|
|
101
|
+
# Format the response as a human-readable string
|
|
102
|
+
metric_info = ""
|
|
103
|
+
if metric and best_model.metrics and metric in best_model.metrics:
|
|
104
|
+
metric_value = best_model.metrics[metric].get("validation")
|
|
105
|
+
if metric_value is not None:
|
|
106
|
+
metric_info = f" with {metric}: {metric_value:.2f}"
|
|
107
|
+
|
|
108
|
+
return f"Best model: {best_model.model_type}{metric_info}"
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@dr_mcp_tool(tags={"model", "prediction", "scoring"})
|
|
112
|
+
async def score_dataset_with_model(project_id: str, model_id: str, dataset_url: str) -> str:
|
|
113
|
+
"""
|
|
114
|
+
Score a dataset using a specific DataRobot model.
|
|
115
|
+
|
|
116
|
+
Args:
|
|
117
|
+
project_id: The ID of the DataRobot project.
|
|
118
|
+
model_id: The ID of the DataRobot model to use for scoring.
|
|
119
|
+
dataset_url: The URL to the dataset to score (must be accessible to DataRobot).
|
|
120
|
+
|
|
121
|
+
Returns
|
|
122
|
+
-------
|
|
123
|
+
A string summary of the scoring job or a meaningful error message.
|
|
124
|
+
"""
|
|
125
|
+
client = get_sdk_client()
|
|
126
|
+
project = client.Project.get(project_id)
|
|
127
|
+
model = client.Model.get(project, model_id)
|
|
128
|
+
job = model.score(dataset_url)
|
|
129
|
+
logger.info(f"Started scoring job {job.id} for model {model_id}")
|
|
130
|
+
return f"Scoring job started: {job.id}"
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dr_mcp_tool(tags={"model", "management", "list"})
|
|
134
|
+
async def list_models(project_id: str) -> str:
|
|
135
|
+
"""
|
|
136
|
+
List all models in a project.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
project_id: The ID of the DataRobot project.
|
|
140
|
+
|
|
141
|
+
Returns
|
|
142
|
+
-------
|
|
143
|
+
A string summary of the models in the project.
|
|
144
|
+
"""
|
|
145
|
+
client = get_sdk_client()
|
|
146
|
+
project = client.Project.get(project_id)
|
|
147
|
+
models = project.get_models()
|
|
148
|
+
return json.dumps(models, indent=2, cls=ModelEncoder)
|