datarobot-genai 0.2.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of datarobot-genai might be problematic. Click here for more details.

Files changed (125) hide show
  1. datarobot_genai/__init__.py +19 -0
  2. datarobot_genai/core/__init__.py +0 -0
  3. datarobot_genai/core/agents/__init__.py +43 -0
  4. datarobot_genai/core/agents/base.py +195 -0
  5. datarobot_genai/core/chat/__init__.py +19 -0
  6. datarobot_genai/core/chat/auth.py +146 -0
  7. datarobot_genai/core/chat/client.py +178 -0
  8. datarobot_genai/core/chat/responses.py +297 -0
  9. datarobot_genai/core/cli/__init__.py +18 -0
  10. datarobot_genai/core/cli/agent_environment.py +47 -0
  11. datarobot_genai/core/cli/agent_kernel.py +211 -0
  12. datarobot_genai/core/custom_model.py +141 -0
  13. datarobot_genai/core/mcp/__init__.py +0 -0
  14. datarobot_genai/core/mcp/common.py +218 -0
  15. datarobot_genai/core/telemetry_agent.py +126 -0
  16. datarobot_genai/core/utils/__init__.py +3 -0
  17. datarobot_genai/core/utils/auth.py +234 -0
  18. datarobot_genai/core/utils/urls.py +64 -0
  19. datarobot_genai/crewai/__init__.py +24 -0
  20. datarobot_genai/crewai/agent.py +42 -0
  21. datarobot_genai/crewai/base.py +159 -0
  22. datarobot_genai/crewai/events.py +117 -0
  23. datarobot_genai/crewai/mcp.py +59 -0
  24. datarobot_genai/drmcp/__init__.py +78 -0
  25. datarobot_genai/drmcp/core/__init__.py +13 -0
  26. datarobot_genai/drmcp/core/auth.py +165 -0
  27. datarobot_genai/drmcp/core/clients.py +180 -0
  28. datarobot_genai/drmcp/core/config.py +364 -0
  29. datarobot_genai/drmcp/core/config_utils.py +174 -0
  30. datarobot_genai/drmcp/core/constants.py +18 -0
  31. datarobot_genai/drmcp/core/credentials.py +190 -0
  32. datarobot_genai/drmcp/core/dr_mcp_server.py +350 -0
  33. datarobot_genai/drmcp/core/dr_mcp_server_logo.py +136 -0
  34. datarobot_genai/drmcp/core/dynamic_prompts/__init__.py +13 -0
  35. datarobot_genai/drmcp/core/dynamic_prompts/controllers.py +130 -0
  36. datarobot_genai/drmcp/core/dynamic_prompts/dr_lib.py +70 -0
  37. datarobot_genai/drmcp/core/dynamic_prompts/register.py +205 -0
  38. datarobot_genai/drmcp/core/dynamic_prompts/utils.py +33 -0
  39. datarobot_genai/drmcp/core/dynamic_tools/__init__.py +14 -0
  40. datarobot_genai/drmcp/core/dynamic_tools/deployment/__init__.py +0 -0
  41. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/__init__.py +14 -0
  42. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/base.py +72 -0
  43. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/default.py +82 -0
  44. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/drum.py +238 -0
  45. datarobot_genai/drmcp/core/dynamic_tools/deployment/config.py +228 -0
  46. datarobot_genai/drmcp/core/dynamic_tools/deployment/controllers.py +63 -0
  47. datarobot_genai/drmcp/core/dynamic_tools/deployment/metadata.py +162 -0
  48. datarobot_genai/drmcp/core/dynamic_tools/deployment/register.py +87 -0
  49. datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_agentic_fallback_schema.json +36 -0
  50. datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_prediction_fallback_schema.json +10 -0
  51. datarobot_genai/drmcp/core/dynamic_tools/register.py +254 -0
  52. datarobot_genai/drmcp/core/dynamic_tools/schema.py +532 -0
  53. datarobot_genai/drmcp/core/exceptions.py +25 -0
  54. datarobot_genai/drmcp/core/logging.py +98 -0
  55. datarobot_genai/drmcp/core/mcp_instance.py +515 -0
  56. datarobot_genai/drmcp/core/memory_management/__init__.py +13 -0
  57. datarobot_genai/drmcp/core/memory_management/manager.py +820 -0
  58. datarobot_genai/drmcp/core/memory_management/memory_tools.py +201 -0
  59. datarobot_genai/drmcp/core/routes.py +439 -0
  60. datarobot_genai/drmcp/core/routes_utils.py +30 -0
  61. datarobot_genai/drmcp/core/server_life_cycle.py +107 -0
  62. datarobot_genai/drmcp/core/telemetry.py +424 -0
  63. datarobot_genai/drmcp/core/tool_config.py +111 -0
  64. datarobot_genai/drmcp/core/tool_filter.py +117 -0
  65. datarobot_genai/drmcp/core/utils.py +138 -0
  66. datarobot_genai/drmcp/server.py +19 -0
  67. datarobot_genai/drmcp/test_utils/__init__.py +13 -0
  68. datarobot_genai/drmcp/test_utils/clients/__init__.py +0 -0
  69. datarobot_genai/drmcp/test_utils/clients/anthropic.py +68 -0
  70. datarobot_genai/drmcp/test_utils/clients/base.py +300 -0
  71. datarobot_genai/drmcp/test_utils/clients/dr_gateway.py +58 -0
  72. datarobot_genai/drmcp/test_utils/clients/openai.py +68 -0
  73. datarobot_genai/drmcp/test_utils/elicitation_test_tool.py +89 -0
  74. datarobot_genai/drmcp/test_utils/integration_mcp_server.py +109 -0
  75. datarobot_genai/drmcp/test_utils/mcp_utils_ete.py +133 -0
  76. datarobot_genai/drmcp/test_utils/mcp_utils_integration.py +107 -0
  77. datarobot_genai/drmcp/test_utils/test_interactive.py +205 -0
  78. datarobot_genai/drmcp/test_utils/tool_base_ete.py +220 -0
  79. datarobot_genai/drmcp/test_utils/utils.py +91 -0
  80. datarobot_genai/drmcp/tools/__init__.py +14 -0
  81. datarobot_genai/drmcp/tools/clients/__init__.py +14 -0
  82. datarobot_genai/drmcp/tools/clients/atlassian.py +188 -0
  83. datarobot_genai/drmcp/tools/clients/confluence.py +584 -0
  84. datarobot_genai/drmcp/tools/clients/gdrive.py +832 -0
  85. datarobot_genai/drmcp/tools/clients/jira.py +334 -0
  86. datarobot_genai/drmcp/tools/clients/microsoft_graph.py +479 -0
  87. datarobot_genai/drmcp/tools/clients/s3.py +28 -0
  88. datarobot_genai/drmcp/tools/confluence/__init__.py +14 -0
  89. datarobot_genai/drmcp/tools/confluence/tools.py +321 -0
  90. datarobot_genai/drmcp/tools/gdrive/__init__.py +0 -0
  91. datarobot_genai/drmcp/tools/gdrive/tools.py +347 -0
  92. datarobot_genai/drmcp/tools/jira/__init__.py +14 -0
  93. datarobot_genai/drmcp/tools/jira/tools.py +243 -0
  94. datarobot_genai/drmcp/tools/microsoft_graph/__init__.py +13 -0
  95. datarobot_genai/drmcp/tools/microsoft_graph/tools.py +198 -0
  96. datarobot_genai/drmcp/tools/predictive/__init__.py +27 -0
  97. datarobot_genai/drmcp/tools/predictive/data.py +133 -0
  98. datarobot_genai/drmcp/tools/predictive/deployment.py +91 -0
  99. datarobot_genai/drmcp/tools/predictive/deployment_info.py +392 -0
  100. datarobot_genai/drmcp/tools/predictive/model.py +148 -0
  101. datarobot_genai/drmcp/tools/predictive/predict.py +254 -0
  102. datarobot_genai/drmcp/tools/predictive/predict_realtime.py +307 -0
  103. datarobot_genai/drmcp/tools/predictive/project.py +90 -0
  104. datarobot_genai/drmcp/tools/predictive/training.py +661 -0
  105. datarobot_genai/langgraph/__init__.py +0 -0
  106. datarobot_genai/langgraph/agent.py +341 -0
  107. datarobot_genai/langgraph/mcp.py +73 -0
  108. datarobot_genai/llama_index/__init__.py +16 -0
  109. datarobot_genai/llama_index/agent.py +50 -0
  110. datarobot_genai/llama_index/base.py +299 -0
  111. datarobot_genai/llama_index/mcp.py +79 -0
  112. datarobot_genai/nat/__init__.py +0 -0
  113. datarobot_genai/nat/agent.py +275 -0
  114. datarobot_genai/nat/datarobot_auth_provider.py +110 -0
  115. datarobot_genai/nat/datarobot_llm_clients.py +318 -0
  116. datarobot_genai/nat/datarobot_llm_providers.py +130 -0
  117. datarobot_genai/nat/datarobot_mcp_client.py +266 -0
  118. datarobot_genai/nat/helpers.py +87 -0
  119. datarobot_genai/py.typed +0 -0
  120. datarobot_genai-0.2.31.dist-info/METADATA +145 -0
  121. datarobot_genai-0.2.31.dist-info/RECORD +125 -0
  122. datarobot_genai-0.2.31.dist-info/WHEEL +4 -0
  123. datarobot_genai-0.2.31.dist-info/entry_points.txt +5 -0
  124. datarobot_genai-0.2.31.dist-info/licenses/AUTHORS +2 -0
  125. datarobot_genai-0.2.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,91 @@
1
+ # Copyright 2025 DataRobot, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+
18
+ from datarobot_genai.drmcp.core.clients import get_sdk_client
19
+ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
20
+
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @dr_mcp_tool(tags={"deployment", "management", "list"})
25
+ async def list_deployments() -> str:
26
+ """
27
+ List all DataRobot deployments for the authenticated user.
28
+
29
+ Returns
30
+ -------
31
+ A string summary of the user's DataRobot deployments.
32
+ """
33
+ client = get_sdk_client()
34
+ deployments = client.Deployment.list()
35
+ if not deployments:
36
+ logger.info("No deployments found")
37
+ return "No deployments found."
38
+ result = "\n".join(f"{d.id}: {d.label}" for d in deployments)
39
+ logger.info(f"Found {len(deployments)} deployments")
40
+ return result
41
+
42
+
43
+ @dr_mcp_tool(tags={"deployment", "model", "info"})
44
+ async def get_model_info_from_deployment(deployment_id: str) -> str:
45
+ """
46
+ Get model info associated with a given deployment ID.
47
+
48
+ Args:
49
+ deployment_id: The ID of the DataRobot deployment.
50
+
51
+ Returns
52
+ -------
53
+ The model info associated with the deployment as a JSON string.
54
+ """
55
+ client = get_sdk_client()
56
+ deployment = client.Deployment.get(deployment_id)
57
+ logger.info(f"Retrieved model info for deployment {deployment_id}")
58
+ return json.dumps(deployment.model, indent=2)
59
+
60
+
61
+ @dr_mcp_tool(tags={"deployment", "model", "create"})
62
+ async def deploy_model(model_id: str, label: str, description: str = "") -> str:
63
+ """
64
+ Deploy a model by creating a new DataRobot deployment.
65
+
66
+ Args:
67
+ model_id: The ID of the DataRobot model to deploy.
68
+ label: The label/name for the deployment.
69
+ description: Optional description for the deployment.
70
+
71
+ Returns
72
+ -------
73
+ JSON string with deployment ID and label, or error message.
74
+ """
75
+ client = get_sdk_client()
76
+ try:
77
+ prediction_servers = client.PredictionServer.list()
78
+ if not prediction_servers:
79
+ logger.error("No prediction servers available")
80
+ return json.dumps({"error": "No prediction servers available"})
81
+ deployment = client.Deployment.create_from_learning_model(
82
+ model_id=model_id,
83
+ label=label,
84
+ description=description,
85
+ default_prediction_server_id=prediction_servers[0].id,
86
+ )
87
+ logger.info(f"Created deployment {deployment.id} with label {label}")
88
+ return json.dumps({"deployment_id": deployment.id, "label": label})
89
+ except Exception as e:
90
+ logger.error(f"Error deploying model {model_id}: {type(e).__name__}: {e}")
91
+ return json.dumps({"error": f"Error deploying model {model_id}: {type(e).__name__}: {e}"})
@@ -0,0 +1,392 @@
1
+ # Copyright 2025 DataRobot, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Tools for retrieving deployment metadata and data requirements."""
16
+
17
+ import io
18
+ import json
19
+ import logging
20
+ from datetime import datetime
21
+ from datetime import timedelta
22
+ from typing import Any
23
+
24
+ import pandas as pd
25
+
26
+ from datarobot_genai.drmcp.core.clients import get_sdk_client
27
+ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ @dr_mcp_tool(tags={"deployment", "info", "metadata"})
33
+ async def get_deployment_info(deployment_id: str) -> str:
34
+ """
35
+ Retrieve information about the deployment, including the list of
36
+ features needed to make predictions on this deployment.
37
+
38
+ Args:
39
+ deployment_id: The ID of the DataRobot deployment
40
+
41
+ Returns
42
+ -------
43
+ JSON string containing model and feature information including:
44
+ For datarobot native models will return model information for custom models
45
+ this will likely just return features and total_features values.
46
+
47
+ - model_type: Type of model
48
+ - target: Name of the target feature
49
+ - target_type: Type of the target feature
50
+ - features: List of features with their importance and type
51
+ - total_features: Total number of features
52
+ - time_series_config: Time series configuration if applicable
53
+
54
+ for features:
55
+ - feature_name: Name of the feature
56
+ - ``name`` : str, feature name
57
+ - ``feature_type`` : str, feature type
58
+ - ``importance`` : float, numeric measure of the relationship strength between
59
+ the feature and target (independent of model or other features)
60
+ - ``date_format`` : str or None, the date format string for how this feature was
61
+ interpreted, null if not a date feature, compatible with
62
+ https://docs.python.org/2/library/time.html#time.strftime.
63
+ - ``known_in_advance`` : bool, whether the feature was selected as known in advance in
64
+ a time series model, false for non-time series models.
65
+ """
66
+ client = get_sdk_client()
67
+ deployment = client.Deployment.get(deployment_id)
68
+
69
+ # get features from the deployment
70
+ features_raw = deployment.get_features()
71
+ deployment.get_capabilities()
72
+
73
+ # Parse features if it's a JSON string
74
+ if isinstance(features_raw, str):
75
+ try:
76
+ features = json.loads(features_raw)
77
+ except json.JSONDecodeError:
78
+ features = []
79
+ else:
80
+ features = features_raw
81
+
82
+ # get model type if its not a custom model
83
+ project = None
84
+ if deployment.model.get("project_id") is None:
85
+ model_type = "custom"
86
+ target = ""
87
+ target_type = ""
88
+ else:
89
+ project = client.Project.get(deployment.model["project_id"])
90
+ model = client.Model.get(project=project, model_id=deployment.model["id"])
91
+ model_type = model.model_type
92
+ target = project.target
93
+ target_type = project.target_type
94
+
95
+ # Add model metadata
96
+ result = {
97
+ "deployment_id": deployment_id,
98
+ "model_type": model_type,
99
+ "target": target,
100
+ "target_type": target_type,
101
+ "features": sorted(features, key=lambda x: (x.get("importance") or 0), reverse=True),
102
+ "total_features": len(features),
103
+ }
104
+
105
+ # Add time series specific information if applicable
106
+ if project and hasattr(project, "datetime_partitioning"):
107
+ partition = project.datetime_partitioning
108
+ result["time_series_config"] = {
109
+ "datetime_column": partition.datetime_partition_column,
110
+ "forecast_window_start": partition.forecast_window_start,
111
+ "forecast_window_end": partition.forecast_window_end,
112
+ "series_id_columns": partition.multiseries_id_columns or [],
113
+ }
114
+
115
+ return json.dumps(result, indent=2)
116
+
117
+
118
+ @dr_mcp_tool(tags={"deployment", "template", "data"})
119
+ async def generate_prediction_data_template(deployment_id: str, n_rows: int = 1) -> str:
120
+ """
121
+ Generate a template CSV with the correct structure for making predictions.
122
+
123
+ This creates a template with:
124
+ - All required feature columns in the correct order
125
+ - Sample values based on feature types
126
+ - Comments explaining each feature
127
+ - When using this tool, always consider feature importance. For features with high importance,
128
+ try to infer or ask for a reasonable value, using frequent values or domain knowledge if
129
+ available. For less important features, you may leave them blank.
130
+ - If frequent values are available for a feature, they will be used as sample values;
131
+ otherwise, blank fields will be used.
132
+ Please note that using frequent values in your predictions data can influence the prediction,
133
+ think of it as sending in the average value for the feature. If you don't want this effect on
134
+ your predictions leave the field blank you in predictions dataset.
135
+
136
+ Args:
137
+ deployment_id: The ID of the DataRobot deployment
138
+ n_rows: Number of template rows to generate (default 1)
139
+
140
+ Returns
141
+ -------
142
+ CSV template string with sample data ready for predictions
143
+ """
144
+ # Get feature information
145
+ features_json = await get_deployment_features(deployment_id)
146
+ # Add error handling for empty or error responses
147
+ if not features_json or features_json.strip().startswith("Error"):
148
+ return f"Error: {features_json}"
149
+ features_info = json.loads(features_json)
150
+
151
+ # Create template data
152
+ template_data = {}
153
+
154
+ for feature in features_info["features"]:
155
+ feature_name = feature["name"]
156
+ feature_type = feature["feature_type"].lower() # Normalize to lowercase
157
+
158
+ # Use frequent values if available
159
+ frequent_values = feature.get("frequent_values")
160
+ if frequent_values and isinstance(frequent_values, list) and frequent_values:
161
+ template_data[feature_name] = [frequent_values[0]] * n_rows
162
+ continue
163
+
164
+ # Use only documented feature properties
165
+ min_val = feature.get("min", 0)
166
+ max_val = feature.get("max", 0)
167
+
168
+ # Handle None for min/max
169
+ if min_val is None:
170
+ min_val = 0 if feature_type == "numeric" else "2020-01-01"
171
+ if max_val is None:
172
+ max_val = 0 if feature_type == "numeric" else "2020-01-10"
173
+
174
+ # Generate sample values based on type
175
+ if feature_type == "numeric":
176
+ template_data[feature_name] = [None] * n_rows
177
+ elif feature_type == "date":
178
+ template_data[feature_name] = [None] * n_rows
179
+ elif feature_type == "summarized categorical":
180
+ template_data[feature_name] = [""] * n_rows
181
+ elif feature_type == "categorical":
182
+ template_data[feature_name] = [""] * n_rows
183
+ elif feature_type == "text":
184
+ template_data[feature_name] = [""] * n_rows
185
+ else:
186
+ template_data[feature_name] = [""] * n_rows
187
+
188
+ # Handle time series specific columns
189
+ if "time_series_config" in features_info:
190
+ ts_config = features_info["time_series_config"]
191
+
192
+ # Ensure datetime column exists
193
+ if ts_config["datetime_column"] not in template_data:
194
+ base_date = datetime.now()
195
+ dates = [base_date + timedelta(days=i) for i in range(n_rows)]
196
+ template_data[ts_config["datetime_column"]] = dates
197
+
198
+ # Add series ID columns if multiseries
199
+ for series_col in ts_config["series_id_columns"]:
200
+ if series_col not in template_data:
201
+ template_data[series_col] = ["series_A"] * n_rows
202
+
203
+ # Create DataFrame
204
+ df = pd.DataFrame(template_data)
205
+
206
+ # Add metadata comments
207
+ result = f"# Prediction Data Template for Deployment: {deployment_id}\n"
208
+ result += f"# Model Type: {features_info['model_type']}\n"
209
+ result += f"# Target: {features_info['target']} (Type: {features_info['target_type']})\n"
210
+
211
+ if "time_series_config" in features_info:
212
+ ts = features_info["time_series_config"]
213
+ result += f"# Time Series: datetime_column={ts['datetime_column']}, "
214
+ result += f"forecast_window=[{ts['forecast_window_start']}, {ts['forecast_window_end']}]\n"
215
+ if ts["series_id_columns"]:
216
+ result += f"# Multiseries ID Columns: {', '.join(ts['series_id_columns'])}\n"
217
+
218
+ result += f"# Total Features: {features_info['total_features']}\n"
219
+ result += df.to_csv(index=False)
220
+
221
+ return str(result)
222
+
223
+
224
+ @dr_mcp_tool(tags={"deployment", "validation", "data"})
225
+ async def validate_prediction_data(
226
+ deployment_id: str,
227
+ file_path: str | None = None,
228
+ csv_string: str | None = None,
229
+ ) -> str:
230
+ """
231
+ Validate if a CSV file is suitable for making predictions with a deployment.
232
+
233
+ Checks:
234
+ - All required features are present
235
+ - Feature types match expectations
236
+ - Missing values (null, empty string, or blank fields) are allowed and will not cause errors
237
+ - No critical issues that would prevent predictions
238
+
239
+ Args:
240
+ deployment_id: The ID of the DataRobot deployment
241
+ file_path: Path to the CSV file to validate (optional if csv_string is provided)
242
+ csv_string: CSV data as a string (optional, used if file_path is not provided)
243
+
244
+ Returns
245
+ -------
246
+ Validation report including any errors, warnings, and suggestions
247
+ """
248
+ # Load the data
249
+ if csv_string is not None:
250
+ df = pd.read_csv(io.StringIO(csv_string))
251
+ elif file_path is not None:
252
+ df = pd.read_csv(file_path)
253
+ else:
254
+ return json.dumps(
255
+ {
256
+ "status": "error",
257
+ "error": "Must provide either file_path or csv_string.",
258
+ },
259
+ indent=2,
260
+ )
261
+
262
+ # Get deployment features
263
+ features_json = await get_deployment_features(deployment_id)
264
+ features_info = json.loads(features_json)
265
+
266
+ validation_report: dict[str, Any] = {
267
+ "status": "valid",
268
+ "errors": [],
269
+ "warnings": [],
270
+ "info": [],
271
+ }
272
+
273
+ # Check each required feature
274
+ required_features = [f for f in features_info["features"]]
275
+ data_columns = set(df.columns)
276
+
277
+ # Threshold for considering a feature as important
278
+ importance_threshold = 0.1
279
+
280
+ for feature in required_features:
281
+ feature_name = feature["name"] if "name" in feature else feature["feature_name"]
282
+
283
+ # Check if feature exists
284
+ if feature_name not in data_columns:
285
+ if feature.get("importance", 0) > importance_threshold:
286
+ validation_report["warnings"].append(
287
+ f"Missing important feature: {feature_name} (importance: "
288
+ f"{feature.get('importance', 0):.2f})"
289
+ )
290
+ else:
291
+ validation_report["warnings"].append(
292
+ f"Missing feature column: {feature_name} (column will be treated as missing "
293
+ f"values)"
294
+ )
295
+ continue
296
+
297
+ # Check for missing values (allowed)
298
+ if df[feature_name].isnull().all() or (df[feature_name] == "").all():
299
+ validation_report["info"].append(
300
+ f"Feature {feature_name} is entirely missing or empty (this is allowed)"
301
+ )
302
+ continue
303
+
304
+ # Check data type compatibility (only if not all missing)
305
+ col_dtype = str(df[feature_name].dtype)
306
+ if feature["feature_type"] == "numeric" and not pd.api.types.is_numeric_dtype(
307
+ df[feature_name].dropna()
308
+ ):
309
+ validation_report["warnings"].append(
310
+ f"Feature {feature_name} should be numeric but is {col_dtype}"
311
+ )
312
+
313
+ # Check for extra columns
314
+ expected_features = {
315
+ f["name"] if "name" in f else f["feature_name"] for f in features_info["features"]
316
+ }
317
+ extra_columns = data_columns - expected_features
318
+ if extra_columns:
319
+ validation_report["info"].append(
320
+ f"Extra columns found (will be ignored): {', '.join(extra_columns)}"
321
+ )
322
+
323
+ # Time series specific validation
324
+ if "time_series_config" in features_info:
325
+ ts_config = features_info["time_series_config"]
326
+
327
+ # Check datetime column
328
+ if ts_config["datetime_column"] not in data_columns:
329
+ validation_report["errors"].append(
330
+ f"Missing required datetime column: {ts_config['datetime_column']}"
331
+ )
332
+ validation_report["status"] = "invalid"
333
+ elif (
334
+ not df[ts_config["datetime_column"]].isnull().all()
335
+ and not (df[ts_config["datetime_column"]] == "").all()
336
+ ):
337
+ try:
338
+ pd.to_datetime(df[ts_config["datetime_column"]])
339
+ except ValueError:
340
+ validation_report["errors"].append(
341
+ f"Datetime column {ts_config['datetime_column']} cannot be parsed as dates"
342
+ )
343
+ validation_report["status"] = "invalid"
344
+
345
+ # Check series ID columns for multiseries
346
+ for series_col in ts_config["series_id_columns"]:
347
+ if series_col not in data_columns:
348
+ validation_report["errors"].append(
349
+ f"Missing required series ID column: {series_col}"
350
+ )
351
+ validation_report["status"] = "invalid"
352
+
353
+ # Add summary
354
+ validation_report["summary"] = {
355
+ "file_path": file_path,
356
+ "rows": len(df),
357
+ "columns": len(df.columns),
358
+ "deployment_id": deployment_id,
359
+ "model_type": features_info["model_type"],
360
+ }
361
+
362
+ return json.dumps(validation_report, indent=2)
363
+
364
+
365
+ @dr_mcp_tool(tags={"deployment", "features", "info"})
366
+ async def get_deployment_features(deployment_id: str) -> str:
367
+ """
368
+ Retrieve only the features list for a deployment, as JSON string.
369
+ Args:
370
+ deployment_id: The ID of the DataRobot deployment
371
+ Returns:
372
+ JSON string containing only the features list and time series config if present.
373
+ """
374
+ info_json = await get_deployment_info(deployment_id)
375
+ if not info_json.strip().startswith("{"):
376
+ # Return a default error JSON
377
+ return json.dumps({"features": [], "total_features": 0, "error": info_json}, indent=2)
378
+ info = json.loads(info_json)
379
+ # Only keep features, time_series_config, and total_features
380
+ result = {
381
+ "features": info.get("features", []),
382
+ "total_features": info.get("total_features", 0),
383
+ }
384
+ if "time_series_config" in info:
385
+ result["time_series_config"] = info["time_series_config"]
386
+ if "model_type" in info:
387
+ result["model_type"] = info["model_type"]
388
+ if "target" in info:
389
+ result["target"] = info["target"]
390
+ if "target_type" in info:
391
+ result["target_type"] = info["target_type"]
392
+ return json.dumps(result, indent=2)
@@ -0,0 +1,148 @@
1
+ # Copyright 2025 DataRobot, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import json
16
+ import logging
17
+ from typing import Any
18
+
19
+ from datarobot.models.model import Model
20
+
21
+ from datarobot_genai.drmcp.core.clients import get_sdk_client
22
+ from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def model_to_dict(model: Any) -> dict[str, Any]:
28
+ """Convert a DataRobot Model object to a dictionary."""
29
+ try:
30
+ return {
31
+ "id": model.id,
32
+ "model_type": model.model_type,
33
+ "metrics": model.metrics,
34
+ }
35
+ except AttributeError as e:
36
+ logger.warning(f"Failed to access some model attributes: {e}")
37
+ # Return minimal information if some attributes are not accessible
38
+ return {
39
+ "id": getattr(model, "id", "unknown"),
40
+ "model_type": getattr(model, "model_type", "unknown"),
41
+ }
42
+
43
+
44
+ class ModelEncoder(json.JSONEncoder):
45
+ """Custom JSON encoder for DataRobot Model objects."""
46
+
47
+ def default(self, obj: Any) -> Any:
48
+ if isinstance(obj, Model):
49
+ return model_to_dict(obj)
50
+ return super().default(obj)
51
+
52
+
53
+ @dr_mcp_tool(tags={"model", "management", "info"})
54
+ async def get_best_model(project_id: str, metric: str | None = None) -> str:
55
+ """
56
+ Get the best model for a DataRobot project, optionally by a specific metric.
57
+
58
+ Args:
59
+ project_id: The ID of the DataRobot project.
60
+ metric: (Optional) The metric to use for best model selection (e.g., 'AUC', 'LogLoss').
61
+
62
+ Returns
63
+ -------
64
+ A formatted string describing the best model.
65
+
66
+ Raises
67
+ ------
68
+ Exception: If project not found or no models exist in the project.
69
+ """
70
+ client = get_sdk_client()
71
+ project = client.Project.get(project_id)
72
+ if not project:
73
+ logger.error(f"Project with ID {project_id} not found")
74
+ raise Exception(f"Project with ID {project_id} not found.")
75
+
76
+ leaderboard = project.get_models()
77
+ if not leaderboard:
78
+ logger.info(f"No models found for project {project_id}")
79
+ raise Exception("No models found for this project.")
80
+
81
+ if metric:
82
+ reverse_sort = metric.upper() in [
83
+ "AUC",
84
+ "ACCURACY",
85
+ "F1",
86
+ "PRECISION",
87
+ "RECALL",
88
+ ]
89
+ leaderboard = sorted(
90
+ leaderboard,
91
+ key=lambda m: m.metrics.get(metric, {}).get(
92
+ "validation", float("-inf") if reverse_sort else float("inf")
93
+ ),
94
+ reverse=reverse_sort,
95
+ )
96
+ logger.info(f"Sorted models by metric: {metric}")
97
+
98
+ best_model = leaderboard[0]
99
+ logger.info(f"Found best model {best_model.id} for project {project_id}")
100
+
101
+ # Format the response as a human-readable string
102
+ metric_info = ""
103
+ if metric and best_model.metrics and metric in best_model.metrics:
104
+ metric_value = best_model.metrics[metric].get("validation")
105
+ if metric_value is not None:
106
+ metric_info = f" with {metric}: {metric_value:.2f}"
107
+
108
+ return f"Best model: {best_model.model_type}{metric_info}"
109
+
110
+
111
+ @dr_mcp_tool(tags={"model", "prediction", "scoring"})
112
+ async def score_dataset_with_model(project_id: str, model_id: str, dataset_url: str) -> str:
113
+ """
114
+ Score a dataset using a specific DataRobot model.
115
+
116
+ Args:
117
+ project_id: The ID of the DataRobot project.
118
+ model_id: The ID of the DataRobot model to use for scoring.
119
+ dataset_url: The URL to the dataset to score (must be accessible to DataRobot).
120
+
121
+ Returns
122
+ -------
123
+ A string summary of the scoring job or a meaningful error message.
124
+ """
125
+ client = get_sdk_client()
126
+ project = client.Project.get(project_id)
127
+ model = client.Model.get(project, model_id)
128
+ job = model.score(dataset_url)
129
+ logger.info(f"Started scoring job {job.id} for model {model_id}")
130
+ return f"Scoring job started: {job.id}"
131
+
132
+
133
+ @dr_mcp_tool(tags={"model", "management", "list"})
134
+ async def list_models(project_id: str) -> str:
135
+ """
136
+ List all models in a project.
137
+
138
+ Args:
139
+ project_id: The ID of the DataRobot project.
140
+
141
+ Returns
142
+ -------
143
+ A string summary of the models in the project.
144
+ """
145
+ client = get_sdk_client()
146
+ project = client.Project.get(project_id)
147
+ models = project.get_models()
148
+ return json.dumps(models, indent=2, cls=ModelEncoder)