datarobot-genai 0.2.21__py3-none-any.whl → 0.2.24__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/drmcp/tools/clients/confluence.py +93 -1
- datarobot_genai/drmcp/tools/clients/gdrive.py +370 -8
- datarobot_genai/drmcp/tools/confluence/tools.py +67 -0
- datarobot_genai/drmcp/tools/gdrive/tools.py +94 -5
- datarobot_genai/drmcp/tools/predictive/training.py +160 -151
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/METADATA +1 -1
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/RECORD +11 -11
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/WHEEL +0 -0
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/entry_points.txt +0 -0
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/licenses/AUTHORS +0 -0
- {datarobot_genai-0.2.21.dist-info → datarobot_genai-0.2.24.dist-info}/licenses/LICENSE +0 -0
|
@@ -50,6 +50,7 @@ class ConfluencePage(BaseModel):
|
|
|
50
50
|
space_id: str = Field(..., description="Space ID where the page resides")
|
|
51
51
|
space_key: str | None = Field(None, description="Space key (if available)")
|
|
52
52
|
body: str = Field(..., description="Page content in storage format (HTML-like)")
|
|
53
|
+
version: int = Field(..., description="Current version number of the page")
|
|
53
54
|
|
|
54
55
|
def as_flat_dict(self) -> dict[str, Any]:
|
|
55
56
|
"""Return a flat dictionary representation of the page."""
|
|
@@ -59,6 +60,7 @@ class ConfluencePage(BaseModel):
|
|
|
59
60
|
"space_id": self.space_id,
|
|
60
61
|
"space_key": self.space_key,
|
|
61
62
|
"body": self.body,
|
|
63
|
+
"version": self.version,
|
|
62
64
|
}
|
|
63
65
|
|
|
64
66
|
|
|
@@ -111,7 +113,7 @@ class ConfluenceClient:
|
|
|
111
113
|
At the moment of creating this client, official Confluence SDK is not supporting async.
|
|
112
114
|
"""
|
|
113
115
|
|
|
114
|
-
EXPAND_FIELDS = "body.storage,space"
|
|
116
|
+
EXPAND_FIELDS = "body.storage,space,version"
|
|
115
117
|
|
|
116
118
|
def __init__(self, access_token: str) -> None:
|
|
117
119
|
"""
|
|
@@ -164,6 +166,8 @@ class ConfluenceClient:
|
|
|
164
166
|
space = data.get("space", {})
|
|
165
167
|
space_key = space.get("key") if isinstance(space, dict) else None
|
|
166
168
|
space_id = space.get("id", "") if isinstance(space, dict) else data.get("spaceId", "")
|
|
169
|
+
version_data = data.get("version", {})
|
|
170
|
+
version_number = version_data.get("number", 1) if isinstance(version_data, dict) else 1
|
|
167
171
|
|
|
168
172
|
return ConfluencePage(
|
|
169
173
|
page_id=str(data.get("id", "")),
|
|
@@ -171,6 +175,7 @@ class ConfluenceClient:
|
|
|
171
175
|
space_id=str(space_id),
|
|
172
176
|
space_key=space_key,
|
|
173
177
|
body=body_content,
|
|
178
|
+
version=version_number,
|
|
174
179
|
)
|
|
175
180
|
|
|
176
181
|
async def get_page_by_id(self, page_id: str) -> ConfluencePage:
|
|
@@ -339,6 +344,93 @@ class ConfluenceClient:
|
|
|
339
344
|
|
|
340
345
|
return self._parse_response(response.json())
|
|
341
346
|
|
|
347
|
+
async def update_page(
|
|
348
|
+
self,
|
|
349
|
+
page_id: str,
|
|
350
|
+
new_body_content: str,
|
|
351
|
+
version_number: int,
|
|
352
|
+
) -> ConfluencePage:
|
|
353
|
+
"""
|
|
354
|
+
Update the content of an existing Confluence page.
|
|
355
|
+
|
|
356
|
+
Args:
|
|
357
|
+
page_id: The ID of the page to update
|
|
358
|
+
new_body_content: The new content in Confluence Storage Format (XML) or raw text
|
|
359
|
+
version_number: The current version number of the page (for optimistic locking).
|
|
360
|
+
The update will increment this by 1.
|
|
361
|
+
|
|
362
|
+
Returns
|
|
363
|
+
-------
|
|
364
|
+
ConfluencePage with the updated page data including the new version number
|
|
365
|
+
|
|
366
|
+
Raises
|
|
367
|
+
------
|
|
368
|
+
ConfluenceError: If page not found (404), version conflict (409),
|
|
369
|
+
permission denied (403), invalid content (400),
|
|
370
|
+
or rate limited (429)
|
|
371
|
+
httpx.HTTPStatusError: If the API request fails with unexpected status
|
|
372
|
+
"""
|
|
373
|
+
cloud_id = await self._get_cloud_id()
|
|
374
|
+
url = f"{ATLASSIAN_API_BASE}/ex/confluence/{cloud_id}/wiki/rest/api/content/{page_id}"
|
|
375
|
+
|
|
376
|
+
try:
|
|
377
|
+
current_page = await self.get_page_by_id(page_id)
|
|
378
|
+
title_to_use = current_page.title
|
|
379
|
+
except ConfluenceError as e:
|
|
380
|
+
if e.status_code == 404:
|
|
381
|
+
raise ConfluenceError(
|
|
382
|
+
f"Page with ID '{page_id}' not found: cannot fetch existing title",
|
|
383
|
+
status_code=404,
|
|
384
|
+
)
|
|
385
|
+
raise
|
|
386
|
+
|
|
387
|
+
payload: dict[str, Any] = {
|
|
388
|
+
"type": "page",
|
|
389
|
+
"title": title_to_use,
|
|
390
|
+
"body": {
|
|
391
|
+
"storage": {
|
|
392
|
+
"value": new_body_content,
|
|
393
|
+
"representation": "storage",
|
|
394
|
+
}
|
|
395
|
+
},
|
|
396
|
+
"version": {
|
|
397
|
+
"number": version_number + 1,
|
|
398
|
+
},
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
response = await self._client.put(url, json=payload)
|
|
402
|
+
|
|
403
|
+
if response.status_code == HTTPStatus.NOT_FOUND:
|
|
404
|
+
error_msg = self._extract_error_message(response)
|
|
405
|
+
raise ConfluenceError(
|
|
406
|
+
f"Page with ID '{page_id}' not found: {error_msg}",
|
|
407
|
+
status_code=404,
|
|
408
|
+
)
|
|
409
|
+
|
|
410
|
+
if response.status_code == HTTPStatus.CONFLICT:
|
|
411
|
+
error_msg = self._extract_error_message(response)
|
|
412
|
+
raise ConfluenceError(
|
|
413
|
+
f"Version conflict: the page has been modified since version {version_number}. "
|
|
414
|
+
f"Please fetch the latest version and retry. Details: {error_msg}",
|
|
415
|
+
status_code=409,
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
if response.status_code == HTTPStatus.FORBIDDEN:
|
|
419
|
+
raise ConfluenceError(
|
|
420
|
+
f"Permission denied: you don't have access to update page '{page_id}'",
|
|
421
|
+
status_code=403,
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if response.status_code == HTTPStatus.BAD_REQUEST:
|
|
425
|
+
error_msg = self._extract_error_message(response)
|
|
426
|
+
raise ConfluenceError(f"Invalid request: {error_msg}", status_code=400)
|
|
427
|
+
|
|
428
|
+
if response.status_code == HTTPStatus.TOO_MANY_REQUESTS:
|
|
429
|
+
raise ConfluenceError("Rate limit exceeded. Please try again later.", status_code=429)
|
|
430
|
+
|
|
431
|
+
response.raise_for_status()
|
|
432
|
+
return self._parse_response(response.json())
|
|
433
|
+
|
|
342
434
|
def _parse_comment_response(self, data: dict, page_id: str) -> ConfluenceComment:
|
|
343
435
|
"""Parse API response into ConfluenceComment."""
|
|
344
436
|
body_content = ""
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
|
|
15
15
|
"""Google Drive API Client and utilities for OAuth."""
|
|
16
16
|
|
|
17
|
+
import io
|
|
17
18
|
import logging
|
|
18
19
|
from typing import Annotated
|
|
19
20
|
from typing import Any
|
|
@@ -24,16 +25,37 @@ from fastmcp.exceptions import ToolError
|
|
|
24
25
|
from pydantic import BaseModel
|
|
25
26
|
from pydantic import ConfigDict
|
|
26
27
|
from pydantic import Field
|
|
28
|
+
from pypdf import PdfReader
|
|
27
29
|
|
|
28
30
|
from datarobot_genai.drmcp.core.auth import get_access_token
|
|
29
31
|
|
|
30
32
|
logger = logging.getLogger(__name__)
|
|
31
33
|
|
|
32
|
-
|
|
34
|
+
SUPPORTED_FIELDS = {"id", "name", "size", "mimeType", "webViewLink", "createdTime", "modifiedTime"}
|
|
35
|
+
SUPPORTED_FIELDS_STR = ",".join(SUPPORTED_FIELDS)
|
|
36
|
+
DEFAULT_FIELDS = f"nextPageToken,files({SUPPORTED_FIELDS_STR})"
|
|
37
|
+
GOOGLE_DRIVE_FOLDER_MIME = "application/vnd.google-apps.folder"
|
|
33
38
|
DEFAULT_ORDER = "modifiedTime desc"
|
|
34
39
|
MAX_PAGE_SIZE = 100
|
|
35
40
|
LIMIT = 500
|
|
36
41
|
|
|
42
|
+
GOOGLE_WORKSPACE_EXPORT_MIMES: dict[str, str] = {
|
|
43
|
+
"application/vnd.google-apps.document": "text/markdown",
|
|
44
|
+
"application/vnd.google-apps.spreadsheet": "text/csv",
|
|
45
|
+
"application/vnd.google-apps.presentation": "text/plain",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
BINARY_MIME_PREFIXES = (
|
|
49
|
+
"image/",
|
|
50
|
+
"audio/",
|
|
51
|
+
"video/",
|
|
52
|
+
"application/zip",
|
|
53
|
+
"application/octet-stream",
|
|
54
|
+
"application/vnd.google-apps.drawing",
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
PDF_MIME_TYPE = "application/pdf"
|
|
58
|
+
|
|
37
59
|
|
|
38
60
|
async def get_gdrive_access_token() -> str | ToolError:
|
|
39
61
|
"""
|
|
@@ -113,6 +135,35 @@ class PaginatedResult(BaseModel):
|
|
|
113
135
|
next_page_token: str | None = None
|
|
114
136
|
|
|
115
137
|
|
|
138
|
+
class GoogleDriveFileContent(BaseModel):
|
|
139
|
+
"""Content retrieved from a Google Drive file."""
|
|
140
|
+
|
|
141
|
+
id: str
|
|
142
|
+
name: str
|
|
143
|
+
mime_type: str
|
|
144
|
+
content: str
|
|
145
|
+
original_mime_type: str
|
|
146
|
+
was_exported: bool = False
|
|
147
|
+
size: int | None = None
|
|
148
|
+
web_view_link: str | None = None
|
|
149
|
+
|
|
150
|
+
def as_flat_dict(self) -> dict[str, Any]:
|
|
151
|
+
"""Return a flat dictionary representation of the file content."""
|
|
152
|
+
result: dict[str, Any] = {
|
|
153
|
+
"id": self.id,
|
|
154
|
+
"name": self.name,
|
|
155
|
+
"mimeType": self.mime_type,
|
|
156
|
+
"content": self.content,
|
|
157
|
+
"originalMimeType": self.original_mime_type,
|
|
158
|
+
"wasExported": self.was_exported,
|
|
159
|
+
}
|
|
160
|
+
if self.size is not None:
|
|
161
|
+
result["size"] = self.size
|
|
162
|
+
if self.web_view_link is not None:
|
|
163
|
+
result["webViewLink"] = self.web_view_link
|
|
164
|
+
return result
|
|
165
|
+
|
|
166
|
+
|
|
116
167
|
class GoogleDriveClient:
|
|
117
168
|
"""Client for interacting with Google Drive API."""
|
|
118
169
|
|
|
@@ -129,6 +180,8 @@ class GoogleDriveClient:
|
|
|
129
180
|
limit: int,
|
|
130
181
|
page_token: str | None = None,
|
|
131
182
|
query: str | None = None,
|
|
183
|
+
folder_id: str | None = None,
|
|
184
|
+
recursive: bool = False,
|
|
132
185
|
) -> PaginatedResult:
|
|
133
186
|
"""
|
|
134
187
|
List files from Google Drive.
|
|
@@ -143,6 +196,10 @@ class GoogleDriveClient:
|
|
|
143
196
|
If not provided it'll list all authorized user files.
|
|
144
197
|
If the query doesn't contain operators (contains, =, etc.), it will be treated as
|
|
145
198
|
a name search: "name contains '{query}'".
|
|
199
|
+
folder_id: The ID of a specific folder to list or search within.
|
|
200
|
+
If omitted, searches the entire Drive.
|
|
201
|
+
recursive: If True, searches all subfolders.
|
|
202
|
+
If False and folder_id is provided, only lists immediate children.
|
|
146
203
|
|
|
147
204
|
Returns
|
|
148
205
|
-------
|
|
@@ -159,26 +216,85 @@ class GoogleDriveClient:
|
|
|
159
216
|
|
|
160
217
|
page_size = min(page_size, MAX_PAGE_SIZE)
|
|
161
218
|
limit = min(limit, LIMIT)
|
|
162
|
-
|
|
219
|
+
formatted_query = self._build_query(query, folder_id)
|
|
220
|
+
|
|
221
|
+
if not recursive or not folder_id:
|
|
222
|
+
files, next_token = await self._fetch_paginated(
|
|
223
|
+
page_size=page_size,
|
|
224
|
+
limit=limit,
|
|
225
|
+
page_token=page_token,
|
|
226
|
+
query=formatted_query,
|
|
227
|
+
)
|
|
228
|
+
return PaginatedResult(files=files, next_page_token=next_token)
|
|
229
|
+
|
|
230
|
+
files = await self._fetch_recursive(
|
|
231
|
+
root_folder_id=folder_id,
|
|
232
|
+
base_query=query,
|
|
233
|
+
page_size=page_size,
|
|
234
|
+
limit=limit,
|
|
235
|
+
)
|
|
163
236
|
|
|
164
|
-
|
|
237
|
+
return PaginatedResult(files=files, next_page_token=page_token)
|
|
165
238
|
|
|
239
|
+
async def _fetch_paginated(
|
|
240
|
+
self,
|
|
241
|
+
page_size: int,
|
|
242
|
+
limit: int,
|
|
243
|
+
page_token: str | None,
|
|
244
|
+
query: str | None,
|
|
245
|
+
) -> tuple[list[GoogleDriveFile], str | None]:
|
|
246
|
+
fetched = 0
|
|
166
247
|
files: list[GoogleDriveFile] = []
|
|
248
|
+
next_page_token = page_token
|
|
167
249
|
|
|
168
250
|
while fetched < limit:
|
|
169
251
|
data = await self._list_files(
|
|
170
252
|
page_size=page_size,
|
|
171
|
-
page_token=
|
|
172
|
-
query=
|
|
253
|
+
page_token=next_page_token,
|
|
254
|
+
query=query,
|
|
173
255
|
)
|
|
256
|
+
|
|
174
257
|
files.extend(data.files)
|
|
175
258
|
fetched += len(data.files)
|
|
176
|
-
|
|
259
|
+
next_page_token = data.next_page_token
|
|
177
260
|
|
|
178
|
-
if not
|
|
261
|
+
if not next_page_token:
|
|
179
262
|
break
|
|
180
263
|
|
|
181
|
-
return
|
|
264
|
+
return files, next_page_token
|
|
265
|
+
|
|
266
|
+
async def _fetch_recursive(
|
|
267
|
+
self,
|
|
268
|
+
root_folder_id: str,
|
|
269
|
+
base_query: str | None,
|
|
270
|
+
page_size: int,
|
|
271
|
+
limit: int,
|
|
272
|
+
) -> list[GoogleDriveFile]:
|
|
273
|
+
collected: list[GoogleDriveFile] = []
|
|
274
|
+
folders_to_visit: list[str] = [root_folder_id]
|
|
275
|
+
|
|
276
|
+
while folders_to_visit and len(collected) < limit:
|
|
277
|
+
current_folder = folders_to_visit.pop(0)
|
|
278
|
+
|
|
279
|
+
query = self._build_query(base_query, current_folder)
|
|
280
|
+
|
|
281
|
+
files, _ = await self._fetch_paginated(
|
|
282
|
+
page_size=page_size,
|
|
283
|
+
limit=limit - len(collected),
|
|
284
|
+
page_token=None,
|
|
285
|
+
query=query,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
for file in files:
|
|
289
|
+
collected.append(file)
|
|
290
|
+
|
|
291
|
+
if file.mime_type == GOOGLE_DRIVE_FOLDER_MIME:
|
|
292
|
+
folders_to_visit.append(file.id)
|
|
293
|
+
|
|
294
|
+
if len(collected) >= limit:
|
|
295
|
+
break
|
|
296
|
+
|
|
297
|
+
return collected
|
|
182
298
|
|
|
183
299
|
async def _list_files(
|
|
184
300
|
self,
|
|
@@ -207,6 +323,45 @@ class GoogleDriveClient:
|
|
|
207
323
|
next_page_token = data.get("nextPageToken")
|
|
208
324
|
return PaginatedResult(files=files, next_page_token=next_page_token)
|
|
209
325
|
|
|
326
|
+
def _build_query(self, query: str | None, folder_id: str | None) -> str | None:
|
|
327
|
+
"""Build Google Drive API query.
|
|
328
|
+
|
|
329
|
+
Args:
|
|
330
|
+
query: Optional search query string (e.g., "name contains 'report'"").
|
|
331
|
+
If the query doesn't contain operators (contains, =, etc.), it will be treated as
|
|
332
|
+
a name search: "name contains '{query}'".
|
|
333
|
+
folder_id: Optional folder id.
|
|
334
|
+
If provided it'll narrow query to search/list only in given folder.
|
|
335
|
+
|
|
336
|
+
Returns
|
|
337
|
+
-------
|
|
338
|
+
Correctly builded query (if provided)
|
|
339
|
+
"""
|
|
340
|
+
base_query = self._get_formatted_query(query)
|
|
341
|
+
|
|
342
|
+
if base_query:
|
|
343
|
+
# Case #1 -- Some query provided and contains in parents (gdrive "folder id")
|
|
344
|
+
if "in parents" in base_query and folder_id:
|
|
345
|
+
logger.debug(
|
|
346
|
+
"In-parents (parent folder) already used in query. "
|
|
347
|
+
"Omiting folder_id argument. "
|
|
348
|
+
f"Query: {base_query} | FolderId: {folder_id}"
|
|
349
|
+
)
|
|
350
|
+
return base_query
|
|
351
|
+
# Case #2 -- Some query provided without "in parents" and folder id provided.
|
|
352
|
+
elif folder_id:
|
|
353
|
+
return f"{base_query} and '{folder_id}' in parents"
|
|
354
|
+
# Case #3 -- Query provided without "in parents" and no folder id.
|
|
355
|
+
else:
|
|
356
|
+
return base_query
|
|
357
|
+
|
|
358
|
+
# Case #4 -- Base query is null but folder id provided
|
|
359
|
+
if folder_id:
|
|
360
|
+
return f"'{folder_id}' in parents"
|
|
361
|
+
|
|
362
|
+
# Case #5 -- Neither query not folder provided
|
|
363
|
+
return None
|
|
364
|
+
|
|
210
365
|
@staticmethod
|
|
211
366
|
def _get_formatted_query(query: str | None) -> str | None:
|
|
212
367
|
"""Get formatted Google Drive API query.
|
|
@@ -237,6 +392,213 @@ class GoogleDriveClient:
|
|
|
237
392
|
logger.debug(f"Auto-formatted query '{query}' to '{formatted_query}'")
|
|
238
393
|
return formatted_query
|
|
239
394
|
|
|
395
|
+
@staticmethod
|
|
396
|
+
def _is_binary_mime_type(mime_type: str) -> bool:
|
|
397
|
+
"""Check if MIME type indicates binary content that's not useful for LLM consumption.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
mime_type: The MIME type to check.
|
|
401
|
+
|
|
402
|
+
Returns
|
|
403
|
+
-------
|
|
404
|
+
True if the MIME type is considered binary, False otherwise.
|
|
405
|
+
"""
|
|
406
|
+
return any(mime_type.startswith(prefix) for prefix in BINARY_MIME_PREFIXES)
|
|
407
|
+
|
|
408
|
+
async def get_file_metadata(self, file_id: str) -> GoogleDriveFile:
|
|
409
|
+
"""Get file metadata from Google Drive.
|
|
410
|
+
|
|
411
|
+
Args:
|
|
412
|
+
file_id: The ID of the file to get metadata for.
|
|
413
|
+
|
|
414
|
+
Returns
|
|
415
|
+
-------
|
|
416
|
+
GoogleDriveFile with file metadata.
|
|
417
|
+
|
|
418
|
+
Raises
|
|
419
|
+
------
|
|
420
|
+
GoogleDriveError: If the file is not found or access is denied.
|
|
421
|
+
"""
|
|
422
|
+
params = {"fields": SUPPORTED_FIELDS_STR}
|
|
423
|
+
response = await self._client.get(f"/{file_id}", params=params)
|
|
424
|
+
|
|
425
|
+
if response.status_code == 404:
|
|
426
|
+
raise GoogleDriveError(f"File with ID '{file_id}' not found.")
|
|
427
|
+
if response.status_code == 403:
|
|
428
|
+
raise GoogleDriveError(f"Permission denied: you don't have access to file '{file_id}'.")
|
|
429
|
+
if response.status_code == 429:
|
|
430
|
+
raise GoogleDriveError("Rate limit exceeded. Please try again later.")
|
|
431
|
+
|
|
432
|
+
response.raise_for_status()
|
|
433
|
+
return GoogleDriveFile.from_api_response(response.json())
|
|
434
|
+
|
|
435
|
+
async def _export_workspace_file(self, file_id: str, export_mime_type: str) -> str:
|
|
436
|
+
"""Export a Google Workspace file to the specified format.
|
|
437
|
+
|
|
438
|
+
Args:
|
|
439
|
+
file_id: The ID of the Google Workspace file.
|
|
440
|
+
export_mime_type: The MIME type to export to (e.g., 'text/markdown').
|
|
441
|
+
|
|
442
|
+
Returns
|
|
443
|
+
-------
|
|
444
|
+
The exported content as a string.
|
|
445
|
+
|
|
446
|
+
Raises
|
|
447
|
+
------
|
|
448
|
+
GoogleDriveError: If export fails.
|
|
449
|
+
"""
|
|
450
|
+
response = await self._client.get(
|
|
451
|
+
f"/{file_id}/export",
|
|
452
|
+
params={"mimeType": export_mime_type},
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if response.status_code == 404:
|
|
456
|
+
raise GoogleDriveError(f"File with ID '{file_id}' not found.")
|
|
457
|
+
if response.status_code == 403:
|
|
458
|
+
raise GoogleDriveError(
|
|
459
|
+
f"Permission denied: you don't have access to export file '{file_id}'."
|
|
460
|
+
)
|
|
461
|
+
if response.status_code == 400:
|
|
462
|
+
raise GoogleDriveError(
|
|
463
|
+
f"Cannot export file '{file_id}' to format '{export_mime_type}'. "
|
|
464
|
+
"The file may not support this export format."
|
|
465
|
+
)
|
|
466
|
+
if response.status_code == 429:
|
|
467
|
+
raise GoogleDriveError("Rate limit exceeded. Please try again later.")
|
|
468
|
+
|
|
469
|
+
response.raise_for_status()
|
|
470
|
+
return response.text
|
|
471
|
+
|
|
472
|
+
async def _download_file(self, file_id: str) -> str:
|
|
473
|
+
"""Download a regular file's content from Google Drive as text."""
|
|
474
|
+
content = await self._download_file_bytes(file_id)
|
|
475
|
+
return content.decode("utf-8")
|
|
476
|
+
|
|
477
|
+
async def _download_file_bytes(self, file_id: str) -> bytes:
|
|
478
|
+
"""Download a file's content as bytes from Google Drive.
|
|
479
|
+
|
|
480
|
+
Args:
|
|
481
|
+
file_id: The ID of the file to download.
|
|
482
|
+
|
|
483
|
+
Returns
|
|
484
|
+
-------
|
|
485
|
+
The file content as bytes.
|
|
486
|
+
|
|
487
|
+
Raises
|
|
488
|
+
------
|
|
489
|
+
GoogleDriveError: If download fails.
|
|
490
|
+
"""
|
|
491
|
+
response = await self._client.get(
|
|
492
|
+
f"/{file_id}",
|
|
493
|
+
params={"alt": "media"},
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if response.status_code == 404:
|
|
497
|
+
raise GoogleDriveError(f"File with ID '{file_id}' not found.")
|
|
498
|
+
if response.status_code == 403:
|
|
499
|
+
raise GoogleDriveError(
|
|
500
|
+
f"Permission denied: you don't have access to download file '{file_id}'."
|
|
501
|
+
)
|
|
502
|
+
if response.status_code == 429:
|
|
503
|
+
raise GoogleDriveError("Rate limit exceeded. Please try again later.")
|
|
504
|
+
|
|
505
|
+
response.raise_for_status()
|
|
506
|
+
return response.content
|
|
507
|
+
|
|
508
|
+
def _extract_text_from_pdf(self, pdf_bytes: bytes) -> str:
|
|
509
|
+
"""Extract text from PDF bytes using pypdf.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
pdf_bytes: The PDF file content as bytes.
|
|
513
|
+
|
|
514
|
+
Returns
|
|
515
|
+
-------
|
|
516
|
+
Extracted text from the PDF.
|
|
517
|
+
|
|
518
|
+
Raises
|
|
519
|
+
------
|
|
520
|
+
GoogleDriveError: If PDF text extraction fails.
|
|
521
|
+
"""
|
|
522
|
+
try:
|
|
523
|
+
reader = PdfReader(io.BytesIO(pdf_bytes))
|
|
524
|
+
text_parts = []
|
|
525
|
+
for page in reader.pages:
|
|
526
|
+
page_text = page.extract_text()
|
|
527
|
+
if page_text:
|
|
528
|
+
text_parts.append(page_text)
|
|
529
|
+
return "\n\n".join(text_parts)
|
|
530
|
+
except Exception as e:
|
|
531
|
+
raise GoogleDriveError(f"Failed to extract text from PDF: {e}")
|
|
532
|
+
|
|
533
|
+
async def read_file_content(
|
|
534
|
+
self, file_id: str, target_format: str | None = None
|
|
535
|
+
) -> GoogleDriveFileContent:
|
|
536
|
+
"""Read the content of a file from Google Drive.
|
|
537
|
+
|
|
538
|
+
Google Workspace files (Docs, Sheets, Slides) are automatically exported to
|
|
539
|
+
LLM-readable formats:
|
|
540
|
+
- Google Docs -> Markdown (text/markdown)
|
|
541
|
+
- Google Sheets -> CSV (text/csv)
|
|
542
|
+
- Google Slides -> Plain text (text/plain)
|
|
543
|
+
- PDF files -> Extracted text (text/plain)
|
|
544
|
+
|
|
545
|
+
Regular text files are downloaded directly.
|
|
546
|
+
Binary files (images, videos, etc.) will raise an error.
|
|
547
|
+
|
|
548
|
+
Args:
|
|
549
|
+
file_id: The ID of the file to read.
|
|
550
|
+
target_format: Optional MIME type to export Google Workspace files to.
|
|
551
|
+
If not specified, uses sensible defaults. Has no effect on non-Workspace files.
|
|
552
|
+
|
|
553
|
+
Returns
|
|
554
|
+
-------
|
|
555
|
+
GoogleDriveFileContent with the file content and metadata.
|
|
556
|
+
|
|
557
|
+
Raises
|
|
558
|
+
------
|
|
559
|
+
GoogleDriveError: If the file cannot be read (not found, permission denied,
|
|
560
|
+
binary file, etc.).
|
|
561
|
+
"""
|
|
562
|
+
file_metadata = await self.get_file_metadata(file_id)
|
|
563
|
+
original_mime_type = file_metadata.mime_type
|
|
564
|
+
|
|
565
|
+
if self._is_binary_mime_type(original_mime_type):
|
|
566
|
+
raise GoogleDriveError(
|
|
567
|
+
f"Binary files are not supported for reading. "
|
|
568
|
+
f"File '{file_metadata.name}' has MIME type '{original_mime_type}'."
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
if original_mime_type == GOOGLE_DRIVE_FOLDER_MIME:
|
|
572
|
+
raise GoogleDriveError(
|
|
573
|
+
f"Cannot read content of a folder. '{file_metadata.name}' is a folder, not a file."
|
|
574
|
+
)
|
|
575
|
+
|
|
576
|
+
was_exported = False
|
|
577
|
+
if original_mime_type in GOOGLE_WORKSPACE_EXPORT_MIMES:
|
|
578
|
+
export_mime = target_format or GOOGLE_WORKSPACE_EXPORT_MIMES[original_mime_type]
|
|
579
|
+
content = await self._export_workspace_file(file_id, export_mime)
|
|
580
|
+
result_mime_type = export_mime
|
|
581
|
+
was_exported = True
|
|
582
|
+
elif original_mime_type == PDF_MIME_TYPE:
|
|
583
|
+
pdf_bytes = await self._download_file_bytes(file_id)
|
|
584
|
+
content = self._extract_text_from_pdf(pdf_bytes)
|
|
585
|
+
result_mime_type = "text/plain"
|
|
586
|
+
was_exported = True
|
|
587
|
+
else:
|
|
588
|
+
content = await self._download_file(file_id)
|
|
589
|
+
result_mime_type = original_mime_type
|
|
590
|
+
|
|
591
|
+
return GoogleDriveFileContent(
|
|
592
|
+
id=file_metadata.id,
|
|
593
|
+
name=file_metadata.name,
|
|
594
|
+
mime_type=result_mime_type,
|
|
595
|
+
content=content,
|
|
596
|
+
original_mime_type=original_mime_type,
|
|
597
|
+
was_exported=was_exported,
|
|
598
|
+
size=file_metadata.size,
|
|
599
|
+
web_view_link=file_metadata.web_view_link,
|
|
600
|
+
)
|
|
601
|
+
|
|
240
602
|
async def __aenter__(self) -> "GoogleDriveClient":
|
|
241
603
|
"""Async context manager entry."""
|
|
242
604
|
return self
|
|
@@ -252,3 +252,70 @@ async def confluence_search(
|
|
|
252
252
|
content=f"Successfully executed CQL query and retrieved {n} result(s).",
|
|
253
253
|
structured_content={"data": data, "count": n},
|
|
254
254
|
)
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
@dr_mcp_tool(tags={"confluence", "write", "update", "page"})
|
|
258
|
+
async def confluence_update_page(
|
|
259
|
+
*,
|
|
260
|
+
page_id: Annotated[str, "The ID of the Confluence page to update."],
|
|
261
|
+
new_body_content: Annotated[
|
|
262
|
+
str,
|
|
263
|
+
"The full updated content of the page in Confluence Storage Format (XML) or raw text.",
|
|
264
|
+
],
|
|
265
|
+
version_number: Annotated[
|
|
266
|
+
int,
|
|
267
|
+
"The current version number of the page, required to prevent update conflicts. "
|
|
268
|
+
"Get this from the confluence_get_page tool.",
|
|
269
|
+
],
|
|
270
|
+
) -> ToolResult:
|
|
271
|
+
"""Update the content of an existing Confluence page.
|
|
272
|
+
|
|
273
|
+
Requires the current version number to ensure atomic updates.
|
|
274
|
+
Use this tool to update the body content of an existing Confluence page.
|
|
275
|
+
The version_number is required for optimistic locking - it prevents overwriting
|
|
276
|
+
changes made by others since you last fetched the page.
|
|
277
|
+
|
|
278
|
+
Usage:
|
|
279
|
+
page_id="856391684", new_body_content="<p>New content</p>", version_number=5
|
|
280
|
+
|
|
281
|
+
Important: Always fetch the page first using confluence_get_page to get the
|
|
282
|
+
current version number before updating.
|
|
283
|
+
"""
|
|
284
|
+
if not page_id:
|
|
285
|
+
raise ToolError("Argument validation error: 'page_id' cannot be empty.")
|
|
286
|
+
|
|
287
|
+
if not new_body_content:
|
|
288
|
+
raise ToolError("Argument validation error: 'new_body_content' cannot be empty.")
|
|
289
|
+
|
|
290
|
+
if version_number < 1:
|
|
291
|
+
raise ToolError(
|
|
292
|
+
"Argument validation error: 'version_number' must be a positive integer (>= 1)."
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
access_token = await get_atlassian_access_token()
|
|
296
|
+
if isinstance(access_token, ToolError):
|
|
297
|
+
raise access_token
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
async with ConfluenceClient(access_token) as client:
|
|
301
|
+
page_response = await client.update_page(
|
|
302
|
+
page_id=page_id,
|
|
303
|
+
new_body_content=new_body_content,
|
|
304
|
+
version_number=version_number,
|
|
305
|
+
)
|
|
306
|
+
except ConfluenceError as e:
|
|
307
|
+
logger.error(f"Confluence error updating page: {e}")
|
|
308
|
+
raise ToolError(str(e))
|
|
309
|
+
except Exception as e:
|
|
310
|
+
logger.error(f"Unexpected error updating Confluence page: {e}")
|
|
311
|
+
raise ToolError(
|
|
312
|
+
f"An unexpected error occurred while updating Confluence page '{page_id}': {str(e)}"
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
return ToolResult(
|
|
316
|
+
content=f"Page ID {page_id} updated successfully to version {page_response.version}.",
|
|
317
|
+
structured_content={
|
|
318
|
+
"updated_page_id": page_response.page_id,
|
|
319
|
+
"new_version": page_response.version,
|
|
320
|
+
},
|
|
321
|
+
)
|
|
@@ -23,6 +23,8 @@ from fastmcp.tools.tool import ToolResult
|
|
|
23
23
|
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
24
24
|
from datarobot_genai.drmcp.tools.clients.gdrive import LIMIT
|
|
25
25
|
from datarobot_genai.drmcp.tools.clients.gdrive import MAX_PAGE_SIZE
|
|
26
|
+
from datarobot_genai.drmcp.tools.clients.gdrive import SUPPORTED_FIELDS
|
|
27
|
+
from datarobot_genai.drmcp.tools.clients.gdrive import SUPPORTED_FIELDS_STR
|
|
26
28
|
from datarobot_genai.drmcp.tools.clients.gdrive import GoogleDriveClient
|
|
27
29
|
from datarobot_genai.drmcp.tools.clients.gdrive import GoogleDriveError
|
|
28
30
|
from datarobot_genai.drmcp.tools.clients.gdrive import get_gdrive_access_token
|
|
@@ -30,8 +32,8 @@ from datarobot_genai.drmcp.tools.clients.gdrive import get_gdrive_access_token
|
|
|
30
32
|
logger = logging.getLogger(__name__)
|
|
31
33
|
|
|
32
34
|
|
|
33
|
-
@dr_mcp_tool(tags={"google", "gdrive", "list", "files"})
|
|
34
|
-
async def
|
|
35
|
+
@dr_mcp_tool(tags={"google", "gdrive", "list", "search", "files", "find", "contents"})
|
|
36
|
+
async def gdrive_find_contents(
|
|
35
37
|
*,
|
|
36
38
|
page_size: Annotated[
|
|
37
39
|
int, f"Maximum number of files to return per page (max {MAX_PAGE_SIZE})."
|
|
@@ -43,9 +45,24 @@ async def google_drive_list_files(
|
|
|
43
45
|
query: Annotated[
|
|
44
46
|
str | None, "Optional filter to narrow results (e.g., 'trashed = false')."
|
|
45
47
|
] = None,
|
|
48
|
+
folder_id: Annotated[
|
|
49
|
+
str | None,
|
|
50
|
+
"The ID of a specific folder to list or search within. "
|
|
51
|
+
"If omitted, searches the entire Drive.",
|
|
52
|
+
] = None,
|
|
53
|
+
recursive: Annotated[
|
|
54
|
+
bool,
|
|
55
|
+
"If True, searches all subfolders. "
|
|
56
|
+
"If False and folder_id is provided, only lists immediate children.",
|
|
57
|
+
] = False,
|
|
58
|
+
fields: Annotated[
|
|
59
|
+
list[str] | None,
|
|
60
|
+
"Optional list of metadata fields to include. Ex. id, name, mimeType. "
|
|
61
|
+
f"Default = {SUPPORTED_FIELDS_STR}",
|
|
62
|
+
] = None,
|
|
46
63
|
) -> ToolResult | ToolError:
|
|
47
64
|
"""
|
|
48
|
-
|
|
65
|
+
Search or list files in the user's Google Drive with pagination and filtering support.
|
|
49
66
|
Use this tool to discover file names and IDs for use with other tools.
|
|
50
67
|
|
|
51
68
|
Limit must be bigger than or equal to page size and it must be multiplication of page size.
|
|
@@ -61,7 +78,12 @@ async def google_drive_list_files(
|
|
|
61
78
|
try:
|
|
62
79
|
async with GoogleDriveClient(access_token) as client:
|
|
63
80
|
data = await client.list_files(
|
|
64
|
-
page_size=page_size,
|
|
81
|
+
page_size=page_size,
|
|
82
|
+
page_token=page_token,
|
|
83
|
+
query=query,
|
|
84
|
+
limit=limit,
|
|
85
|
+
folder_id=folder_id,
|
|
86
|
+
recursive=recursive,
|
|
65
87
|
)
|
|
66
88
|
except GoogleDriveError as e:
|
|
67
89
|
logger.error(f"Google Drive error listing files: {e}")
|
|
@@ -70,6 +92,7 @@ async def google_drive_list_files(
|
|
|
70
92
|
logger.error(f"Unexpected error listing Google Drive files: {e}")
|
|
71
93
|
raise ToolError(f"An unexpected error occurred while listing Google Drive files: {str(e)}")
|
|
72
94
|
|
|
95
|
+
filtered_fields = set(fields).intersection(SUPPORTED_FIELDS) if fields else SUPPORTED_FIELDS
|
|
73
96
|
number_of_files = len(data.files)
|
|
74
97
|
next_page_info = (
|
|
75
98
|
f"Next page token needed to fetch more data: {data.next_page_token}"
|
|
@@ -80,9 +103,75 @@ async def google_drive_list_files(
|
|
|
80
103
|
content=f"Successfully listed {number_of_files} files. {next_page_info}",
|
|
81
104
|
structured_content={
|
|
82
105
|
"files": [
|
|
83
|
-
file.model_dump(by_alias=True, include=
|
|
106
|
+
file.model_dump(by_alias=True, include=filtered_fields) for file in data.files
|
|
84
107
|
],
|
|
85
108
|
"count": number_of_files,
|
|
86
109
|
"nextPageToken": data.next_page_token,
|
|
87
110
|
},
|
|
88
111
|
)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dr_mcp_tool(tags={"google", "gdrive", "read", "content", "file", "download"})
|
|
115
|
+
async def gdrive_read_content(
|
|
116
|
+
*,
|
|
117
|
+
file_id: Annotated[str, "The ID of the file to read."],
|
|
118
|
+
target_format: Annotated[
|
|
119
|
+
str | None,
|
|
120
|
+
"The preferred output format for Google Workspace files "
|
|
121
|
+
"(e.g., 'text/markdown' for Docs, 'text/csv' for Sheets). "
|
|
122
|
+
"If not specified, uses sensible defaults. Has no effect on regular files.",
|
|
123
|
+
] = None,
|
|
124
|
+
) -> ToolResult | ToolError:
|
|
125
|
+
"""
|
|
126
|
+
Retrieve the content of a specific file by its ID. Google Workspace files are
|
|
127
|
+
automatically exported to LLM-readable formats (Push-Down).
|
|
128
|
+
|
|
129
|
+
Usage:
|
|
130
|
+
- Basic: gdrive_read_content(file_id="1ABC123def456")
|
|
131
|
+
- Custom format: gdrive_read_content(file_id="1ABC...", target_format="text/plain")
|
|
132
|
+
- First use gdrive_find_contents to discover file IDs
|
|
133
|
+
|
|
134
|
+
Supported conversions (defaults):
|
|
135
|
+
- Google Docs -> Markdown (text/markdown)
|
|
136
|
+
- Google Sheets -> CSV (text/csv)
|
|
137
|
+
- Google Slides -> Plain text (text/plain)
|
|
138
|
+
- PDF files -> Extracted text (text/plain)
|
|
139
|
+
- Other text files -> Downloaded as-is
|
|
140
|
+
|
|
141
|
+
Note: Binary files (images, videos, etc.) are not supported and will return an error.
|
|
142
|
+
Large Google Workspace files (>10MB) may fail to export due to API limits.
|
|
143
|
+
|
|
144
|
+
Refer to Google Drive export formats documentation:
|
|
145
|
+
https://developers.google.com/workspace/drive/api/guides/ref-export-formats
|
|
146
|
+
"""
|
|
147
|
+
if not file_id or not file_id.strip():
|
|
148
|
+
raise ToolError("Argument validation error: 'file_id' cannot be empty.")
|
|
149
|
+
|
|
150
|
+
access_token = await get_gdrive_access_token()
|
|
151
|
+
if isinstance(access_token, ToolError):
|
|
152
|
+
raise access_token
|
|
153
|
+
|
|
154
|
+
try:
|
|
155
|
+
async with GoogleDriveClient(access_token) as client:
|
|
156
|
+
file_content = await client.read_file_content(file_id, target_format)
|
|
157
|
+
except GoogleDriveError as e:
|
|
158
|
+
logger.error(f"Google Drive error reading file content: {e}")
|
|
159
|
+
raise ToolError(str(e))
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"Unexpected error reading Google Drive file content: {e}")
|
|
162
|
+
raise ToolError(
|
|
163
|
+
f"An unexpected error occurred while reading Google Drive file content: {str(e)}"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
# Provide helpful context about the conversion
|
|
167
|
+
export_info = ""
|
|
168
|
+
if file_content.was_exported:
|
|
169
|
+
export_info = f" (exported from {file_content.original_mime_type})"
|
|
170
|
+
|
|
171
|
+
return ToolResult(
|
|
172
|
+
content=(
|
|
173
|
+
f"Successfully retrieved content of '{file_content.name}' "
|
|
174
|
+
f"({file_content.mime_type}){export_info}."
|
|
175
|
+
),
|
|
176
|
+
structured_content=file_content.as_flat_dict(),
|
|
177
|
+
)
|
|
@@ -18,8 +18,11 @@ import json
|
|
|
18
18
|
import logging
|
|
19
19
|
from dataclasses import asdict
|
|
20
20
|
from dataclasses import dataclass
|
|
21
|
+
from typing import Annotated
|
|
21
22
|
|
|
22
23
|
import pandas as pd
|
|
24
|
+
from fastmcp.exceptions import ToolError
|
|
25
|
+
from fastmcp.tools.tool import ToolResult
|
|
23
26
|
|
|
24
27
|
from datarobot_genai.drmcp.core.clients import get_sdk_client
|
|
25
28
|
from datarobot_genai.drmcp.core.mcp_instance import dr_mcp_tool
|
|
@@ -53,22 +56,15 @@ class DatasetInsight:
|
|
|
53
56
|
missing_data_summary: dict[str, float]
|
|
54
57
|
|
|
55
58
|
|
|
56
|
-
@dr_mcp_tool(tags={"training", "analysis", "dataset"})
|
|
57
|
-
async def analyze_dataset(
|
|
58
|
-
|
|
59
|
-
|
|
59
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "dataset"})
|
|
60
|
+
async def analyze_dataset(
|
|
61
|
+
*,
|
|
62
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
63
|
+
) -> ToolError | ToolResult:
|
|
64
|
+
"""Analyze a dataset to understand its structure and potential use cases."""
|
|
65
|
+
if not dataset_id:
|
|
66
|
+
return ToolError("Dataset ID must be provided")
|
|
60
67
|
|
|
61
|
-
Args:
|
|
62
|
-
dataset_id: The ID of the DataRobot dataset to analyze
|
|
63
|
-
|
|
64
|
-
Returns
|
|
65
|
-
-------
|
|
66
|
-
JSON string containing dataset insights including:
|
|
67
|
-
- Basic statistics (rows, columns)
|
|
68
|
-
- Column types (numerical, categorical, datetime, text)
|
|
69
|
-
- Potential target columns
|
|
70
|
-
- Missing data summary
|
|
71
|
-
"""
|
|
72
68
|
client = get_sdk_client()
|
|
73
69
|
dataset = client.Dataset.get(dataset_id)
|
|
74
70
|
df = dataset.get_as_dataframe()
|
|
@@ -105,27 +101,23 @@ async def analyze_dataset(dataset_id: str) -> str:
|
|
|
105
101
|
potential_targets=potential_targets,
|
|
106
102
|
missing_data_summary=missing_data,
|
|
107
103
|
)
|
|
104
|
+
insights_dict = asdict(insights)
|
|
108
105
|
|
|
109
|
-
return
|
|
110
|
-
|
|
106
|
+
return ToolResult(
|
|
107
|
+
content=json.dumps(insights_dict, indent=2),
|
|
108
|
+
structured_content=insights_dict,
|
|
109
|
+
)
|
|
111
110
|
|
|
112
|
-
@dr_mcp_tool(tags={"training", "analysis", "usecase"})
|
|
113
|
-
async def suggest_use_cases(dataset_id: str) -> str:
|
|
114
|
-
"""
|
|
115
|
-
Analyze a dataset and suggest potential machine learning use cases.
|
|
116
111
|
|
|
117
|
-
|
|
118
|
-
|
|
112
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "usecase"})
|
|
113
|
+
async def suggest_use_cases(
|
|
114
|
+
*,
|
|
115
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
116
|
+
) -> ToolError | ToolResult:
|
|
117
|
+
"""Analyze a dataset and suggest potential machine learning use cases."""
|
|
118
|
+
if not dataset_id:
|
|
119
|
+
return ToolError("Dataset ID must be provided")
|
|
119
120
|
|
|
120
|
-
Returns
|
|
121
|
-
-------
|
|
122
|
-
JSON string containing suggested use cases with:
|
|
123
|
-
- Use case name and description
|
|
124
|
-
- Suggested target column
|
|
125
|
-
- Problem type
|
|
126
|
-
- Confidence score
|
|
127
|
-
- Reasoning for the suggestion
|
|
128
|
-
"""
|
|
129
121
|
client = get_sdk_client()
|
|
130
122
|
dataset = client.Dataset.get(dataset_id)
|
|
131
123
|
df = dataset.get_as_dataframe()
|
|
@@ -141,27 +133,23 @@ async def suggest_use_cases(dataset_id: str) -> str:
|
|
|
141
133
|
|
|
142
134
|
# Sort by confidence score
|
|
143
135
|
suggestions.sort(key=lambda x: x["confidence"], reverse=True)
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
- Feature correlations with target
|
|
162
|
-
- Missing data analysis
|
|
163
|
-
- Data type distribution
|
|
164
|
-
"""
|
|
136
|
+
|
|
137
|
+
return ToolResult(
|
|
138
|
+
content=json.dumps(suggestions, indent=2),
|
|
139
|
+
structured_content={"use_case_suggestions": suggestions},
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "analysis", "eda"})
|
|
144
|
+
async def get_exploratory_insights(
|
|
145
|
+
*,
|
|
146
|
+
dataset_id: Annotated[str, "The ID of the DataRobot dataset to analyze"] | None = None,
|
|
147
|
+
target_col: Annotated[str, "Optional target column to focus EDA insights on"] | None = None,
|
|
148
|
+
) -> ToolError | ToolResult:
|
|
149
|
+
"""Generate exploratory data insights for a dataset."""
|
|
150
|
+
if not dataset_id:
|
|
151
|
+
return ToolError("Dataset ID must be provided")
|
|
152
|
+
|
|
165
153
|
client = get_sdk_client()
|
|
166
154
|
dataset = client.Dataset.get(dataset_id)
|
|
167
155
|
df = dataset.get_as_dataframe()
|
|
@@ -238,8 +226,10 @@ async def get_exploratory_insights(dataset_id: str, target_col: str | None = Non
|
|
|
238
226
|
sorted(correlations.items(), key=lambda x: abs(x[1]), reverse=True)
|
|
239
227
|
)
|
|
240
228
|
|
|
241
|
-
|
|
242
|
-
|
|
229
|
+
return ToolResult(
|
|
230
|
+
content=json.dumps(eda_insights, indent=2),
|
|
231
|
+
structured_content=eda_insights,
|
|
232
|
+
)
|
|
243
233
|
|
|
244
234
|
|
|
245
235
|
def _identify_potential_targets(
|
|
@@ -450,47 +440,50 @@ def _analyze_target_for_use_cases(df: pd.DataFrame, target_col: str) -> list[Use
|
|
|
450
440
|
return suggestions
|
|
451
441
|
|
|
452
442
|
|
|
453
|
-
@dr_mcp_tool(tags={"training", "autopilot", "model"})
|
|
443
|
+
@dr_mcp_tool(tags={"predictive", "training", "write", "autopilot", "model"})
|
|
454
444
|
async def start_autopilot(
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
445
|
+
*,
|
|
446
|
+
target: Annotated[str, "Name of the target column for modeling"] | None = None,
|
|
447
|
+
project_id: Annotated[
|
|
448
|
+
str, "Optional, the ID of the DataRobot project or a new project if no id is provided"
|
|
449
|
+
]
|
|
450
|
+
| None = None,
|
|
451
|
+
mode: Annotated[str, "Optional, Autopilot mode ('quick', 'comprehensive', or 'manual')"]
|
|
452
|
+
| None = "quick",
|
|
453
|
+
dataset_url: Annotated[
|
|
454
|
+
str,
|
|
455
|
+
"""
|
|
456
|
+
Optional, The URL to the dataset to upload
|
|
457
|
+
(optional if dataset_id is provided) for a new project.
|
|
458
|
+
""",
|
|
459
|
+
]
|
|
460
|
+
| None = None,
|
|
461
|
+
dataset_id: Annotated[
|
|
462
|
+
str,
|
|
463
|
+
"""
|
|
464
|
+
Optional, The ID of an existing dataset in AI Catalog
|
|
465
|
+
(optional if dataset_url is provided) for a new project.
|
|
466
|
+
""",
|
|
467
|
+
]
|
|
468
|
+
| None = None,
|
|
469
|
+
project_name: Annotated[
|
|
470
|
+
str, "Optional, name for the project if no id is provided, creates a new project"
|
|
471
|
+
]
|
|
472
|
+
| None = "MCP Project",
|
|
473
|
+
use_case_id: Annotated[
|
|
474
|
+
str,
|
|
475
|
+
"Optional, ID of the use case to associate this project (required for next-gen platform)",
|
|
476
|
+
]
|
|
477
|
+
| None = None,
|
|
478
|
+
) -> ToolError | ToolResult:
|
|
479
|
+
"""Start automated model training (Autopilot) for a project."""
|
|
487
480
|
client = get_sdk_client()
|
|
488
481
|
|
|
489
482
|
if not project_id:
|
|
490
483
|
if not dataset_url and not dataset_id:
|
|
491
|
-
return "
|
|
484
|
+
return ToolError("Either dataset_url or dataset_id must be provided")
|
|
492
485
|
if dataset_url and dataset_id:
|
|
493
|
-
return "
|
|
486
|
+
return ToolError("Please provide either dataset_url or dataset_id, not both")
|
|
494
487
|
|
|
495
488
|
if dataset_url:
|
|
496
489
|
dataset = client.Dataset.create_from_url(dataset_url)
|
|
@@ -504,7 +497,7 @@ async def start_autopilot(
|
|
|
504
497
|
project = client.Project.get(project_id)
|
|
505
498
|
|
|
506
499
|
if not target:
|
|
507
|
-
return "
|
|
500
|
+
return ToolError("Target variable must be specified")
|
|
508
501
|
|
|
509
502
|
try:
|
|
510
503
|
# Start modeling
|
|
@@ -515,40 +508,48 @@ async def start_autopilot(
|
|
|
515
508
|
"target": target,
|
|
516
509
|
"mode": mode,
|
|
517
510
|
"status": project.get_status(),
|
|
518
|
-
"ui_panel": ["eda", "model-training", "leaderboard"],
|
|
519
511
|
"use_case_id": project.use_case_id,
|
|
520
512
|
}
|
|
521
513
|
|
|
522
|
-
return
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
{
|
|
526
|
-
"error": f"Failed to start Autopilot: {str(e)}",
|
|
527
|
-
"project_id": project.id,
|
|
528
|
-
"target": target,
|
|
529
|
-
"mode": mode,
|
|
530
|
-
},
|
|
531
|
-
indent=2,
|
|
514
|
+
return ToolResult(
|
|
515
|
+
content=json.dumps(result, indent=2),
|
|
516
|
+
structured_content=result,
|
|
532
517
|
)
|
|
533
518
|
|
|
519
|
+
except Exception as e:
|
|
520
|
+
return ToolError(
|
|
521
|
+
content=json.dumps(
|
|
522
|
+
{
|
|
523
|
+
"error": f"Failed to start Autopilot: {str(e)}",
|
|
524
|
+
"project_id": project.id if project else None,
|
|
525
|
+
"target": target,
|
|
526
|
+
"mode": mode,
|
|
527
|
+
},
|
|
528
|
+
indent=2,
|
|
529
|
+
)
|
|
530
|
+
)
|
|
534
531
|
|
|
535
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
536
|
-
async def get_model_roc_curve(project_id: str, model_id: str, source: str = "validation") -> str:
|
|
537
|
-
"""
|
|
538
|
-
Get detailed ROC curve for a specific model.
|
|
539
532
|
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
533
|
+
@dr_mcp_tool(tags={"prediction", "training", "read", "model", "evaluation"})
|
|
534
|
+
async def get_model_roc_curve(
|
|
535
|
+
*,
|
|
536
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
537
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
538
|
+
source: Annotated[
|
|
539
|
+
str,
|
|
540
|
+
"""
|
|
541
|
+
The source of the data to use for the ROC curve
|
|
542
|
+
('validation' or 'holdout' or 'crossValidation')
|
|
543
|
+
""",
|
|
544
|
+
]
|
|
545
|
+
| str = "validation",
|
|
546
|
+
) -> ToolError | ToolResult:
|
|
547
|
+
"""Get detailed ROC curve for a specific model."""
|
|
548
|
+
if not project_id:
|
|
549
|
+
return ToolError("Project ID must be provided")
|
|
550
|
+
if not model_id:
|
|
551
|
+
return ToolError("Model ID must be provided")
|
|
545
552
|
|
|
546
|
-
Returns
|
|
547
|
-
-------
|
|
548
|
-
JSON string containing:
|
|
549
|
-
- roc_curve: ROC curve data
|
|
550
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
551
|
-
"""
|
|
552
553
|
client = get_sdk_client()
|
|
553
554
|
project = client.Project.get(project_id)
|
|
554
555
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -581,26 +582,26 @@ async def get_model_roc_curve(project_id: str, model_id: str, source: str = "val
|
|
|
581
582
|
"source": source,
|
|
582
583
|
}
|
|
583
584
|
|
|
584
|
-
return
|
|
585
|
+
return ToolResult(
|
|
586
|
+
content=json.dumps({"data": roc_data}, indent=2),
|
|
587
|
+
structured_content={"data": roc_data},
|
|
588
|
+
)
|
|
585
589
|
except Exception as e:
|
|
586
|
-
return
|
|
590
|
+
return ToolError(f"Failed to get ROC curve: {str(e)}")
|
|
587
591
|
|
|
588
592
|
|
|
589
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
590
|
-
async def get_model_feature_impact(
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
593
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "model", "evaluation"})
|
|
594
|
+
async def get_model_feature_impact(
|
|
595
|
+
*,
|
|
596
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
597
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
598
|
+
) -> ToolError | ToolResult:
|
|
599
|
+
"""Get detailed feature impact for a specific model."""
|
|
600
|
+
if not project_id:
|
|
601
|
+
return ToolError("Project ID must be provided")
|
|
602
|
+
if not model_id:
|
|
603
|
+
return ToolError("Model ID must be provided")
|
|
597
604
|
|
|
598
|
-
Returns
|
|
599
|
-
-------
|
|
600
|
-
JSON string containing:
|
|
601
|
-
- feature_impact: Feature importance scores
|
|
602
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
603
|
-
"""
|
|
604
605
|
client = get_sdk_client()
|
|
605
606
|
project = client.Project.get(project_id)
|
|
606
607
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -608,26 +609,31 @@ async def get_model_feature_impact(project_id: str, model_id: str) -> str:
|
|
|
608
609
|
model.request_feature_impact()
|
|
609
610
|
feature_impact = model.get_or_request_feature_impact()
|
|
610
611
|
|
|
611
|
-
return
|
|
612
|
-
|
|
612
|
+
return ToolResult(
|
|
613
|
+
content=json.dumps({"data": feature_impact}, indent=2),
|
|
614
|
+
structured_content={"data": feature_impact},
|
|
615
|
+
)
|
|
613
616
|
|
|
614
|
-
@dr_mcp_tool(tags={"training", "model", "evaluation"})
|
|
615
|
-
async def get_model_lift_chart(project_id: str, model_id: str, source: str = "validation") -> str:
|
|
616
|
-
"""
|
|
617
|
-
Get detailed lift chart for a specific model.
|
|
618
617
|
|
|
619
|
-
|
|
620
|
-
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
618
|
+
@dr_mcp_tool(tags={"predictive", "training", "read", "model", "evaluation"})
|
|
619
|
+
async def get_model_lift_chart(
|
|
620
|
+
project_id: Annotated[str, "The ID of the DataRobot project"] | None = None,
|
|
621
|
+
model_id: Annotated[str, "The ID of the model to analyze"] | None = None,
|
|
622
|
+
source: Annotated[
|
|
623
|
+
str,
|
|
624
|
+
"""
|
|
625
|
+
The source of the data to use for the lift chart
|
|
626
|
+
('validation' or 'holdout' or 'crossValidation')
|
|
627
|
+
""",
|
|
628
|
+
]
|
|
629
|
+
| str = "validation",
|
|
630
|
+
) -> ToolError | ToolResult:
|
|
631
|
+
"""Get detailed lift chart for a specific model."""
|
|
632
|
+
if not project_id:
|
|
633
|
+
return ToolError("Project ID must be provided")
|
|
634
|
+
if not model_id:
|
|
635
|
+
return ToolError("Model ID must be provided")
|
|
624
636
|
|
|
625
|
-
Returns
|
|
626
|
-
-------
|
|
627
|
-
JSON string containing:
|
|
628
|
-
- lift_chart: Lift chart data
|
|
629
|
-
- ui_panel: List of recommended UI panels for visualization
|
|
630
|
-
"""
|
|
631
637
|
client = get_sdk_client()
|
|
632
638
|
project = client.Project.get(project_id)
|
|
633
639
|
model = client.Model.get(project=project, model_id=model_id)
|
|
@@ -648,4 +654,7 @@ async def get_model_lift_chart(project_id: str, model_id: str, source: str = "va
|
|
|
648
654
|
"target_class": lift_chart.target_class,
|
|
649
655
|
}
|
|
650
656
|
|
|
651
|
-
return
|
|
657
|
+
return ToolResult(
|
|
658
|
+
content=json.dumps({"data": lift_chart_data}, indent=2),
|
|
659
|
+
structured_content={"data": lift_chart_data},
|
|
660
|
+
)
|
|
@@ -78,14 +78,14 @@ datarobot_genai/drmcp/test_utils/utils.py,sha256=esGKFv8aO31-Qg3owayeWp32BYe1CdY
|
|
|
78
78
|
datarobot_genai/drmcp/tools/__init__.py,sha256=0kq9vMkF7EBsS6lkEdiLibmUrghTQqosHbZ5k-V9a5g,578
|
|
79
79
|
datarobot_genai/drmcp/tools/clients/__init__.py,sha256=0kq9vMkF7EBsS6lkEdiLibmUrghTQqosHbZ5k-V9a5g,578
|
|
80
80
|
datarobot_genai/drmcp/tools/clients/atlassian.py,sha256=__M_uz7FrcbKCYRzeMn24DCEYD6OmFx_LuywHCxgXsA,6472
|
|
81
|
-
datarobot_genai/drmcp/tools/clients/confluence.py,sha256=
|
|
82
|
-
datarobot_genai/drmcp/tools/clients/gdrive.py,sha256=
|
|
81
|
+
datarobot_genai/drmcp/tools/clients/confluence.py,sha256=h_G0By_kDnJeWDT_d-IREsaZ5-0xB5GoLXOqblYP5MA,20706
|
|
82
|
+
datarobot_genai/drmcp/tools/clients/gdrive.py,sha256=e28XwX0C8E3nql85-_NbUEMB-4s0lsQ2f5spj9BgsgM,21455
|
|
83
83
|
datarobot_genai/drmcp/tools/clients/jira.py,sha256=Rm91JAyrNIqxu66-9rU1YqoRXVnWbEy-Ahvy6f6HlVg,9823
|
|
84
84
|
datarobot_genai/drmcp/tools/clients/s3.py,sha256=GmwzvurFdNfvxOooA8g5S4osRysHYU0S9ypg_177Glg,953
|
|
85
85
|
datarobot_genai/drmcp/tools/confluence/__init__.py,sha256=0kq9vMkF7EBsS6lkEdiLibmUrghTQqosHbZ5k-V9a5g,578
|
|
86
|
-
datarobot_genai/drmcp/tools/confluence/tools.py,sha256=
|
|
86
|
+
datarobot_genai/drmcp/tools/confluence/tools.py,sha256=_-ws65WLK8KZP_mKkf4yJ7ZunR8qdyoiMwHQX47MSMw,12362
|
|
87
87
|
datarobot_genai/drmcp/tools/gdrive/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
88
|
-
datarobot_genai/drmcp/tools/gdrive/tools.py,sha256=
|
|
88
|
+
datarobot_genai/drmcp/tools/gdrive/tools.py,sha256=EvoEr3AEI-xRldwCTZHiQfBRHQfLtgHuojTx8mXhlU4,7074
|
|
89
89
|
datarobot_genai/drmcp/tools/jira/__init__.py,sha256=0kq9vMkF7EBsS6lkEdiLibmUrghTQqosHbZ5k-V9a5g,578
|
|
90
90
|
datarobot_genai/drmcp/tools/jira/tools.py,sha256=dfkqTU2HH-7n44hX80ODFacKq0p0LOchFcZtIIKFNMM,9687
|
|
91
91
|
datarobot_genai/drmcp/tools/predictive/__init__.py,sha256=WuOHlNNEpEmcF7gVnhckruJRKU2qtmJLE3E7zoCGLDo,1030
|
|
@@ -96,7 +96,7 @@ datarobot_genai/drmcp/tools/predictive/model.py,sha256=Yih5-KedJ-1yupPLXCJsCXOdy
|
|
|
96
96
|
datarobot_genai/drmcp/tools/predictive/predict.py,sha256=Qoob2_t2crfWtyPzkXMRz2ITZumnczU6Dq4C7q9RBMI,9370
|
|
97
97
|
datarobot_genai/drmcp/tools/predictive/predict_realtime.py,sha256=urq6rPyZFsAP-bPyclSNzrkvb6FTamdlFau8q0IWWJ0,13472
|
|
98
98
|
datarobot_genai/drmcp/tools/predictive/project.py,sha256=KaMDAvJY4s12j_4ybA7-KcCS1yMOj-KPIKNBgCSE2iM,2536
|
|
99
|
-
datarobot_genai/drmcp/tools/predictive/training.py,sha256=
|
|
99
|
+
datarobot_genai/drmcp/tools/predictive/training.py,sha256=S9V7AlO6mAgIAJNww0g5agFOw4YqRiCsIGaRDJcOe4A,23991
|
|
100
100
|
datarobot_genai/langgraph/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
101
101
|
datarobot_genai/langgraph/agent.py,sha256=DRnywmS9KDywyChtuIZZwNKbJs8BpC259EG_kxYbiQ8,15828
|
|
102
102
|
datarobot_genai/langgraph/mcp.py,sha256=iA2_j46mZAaNaL7ntXT-LW6C-NMJkzr3VfKDDfe7mh8,2851
|
|
@@ -111,9 +111,9 @@ datarobot_genai/nat/datarobot_llm_clients.py,sha256=Yu208Ed_p_4P3HdpuM7fYnKcXtim
|
|
|
111
111
|
datarobot_genai/nat/datarobot_llm_providers.py,sha256=aDoQcTeGI-odqydPXEX9OGGNFbzAtpqzTvHHEkmJuEQ,4963
|
|
112
112
|
datarobot_genai/nat/datarobot_mcp_client.py,sha256=35FzilxNp4VqwBYI0NsOc91-xZm1C-AzWqrOdDy962A,9612
|
|
113
113
|
datarobot_genai/nat/helpers.py,sha256=Q7E3ADZdtFfS8E6OQPyw2wgA6laQ58N3bhLj5CBWwJs,3265
|
|
114
|
-
datarobot_genai-0.2.
|
|
115
|
-
datarobot_genai-0.2.
|
|
116
|
-
datarobot_genai-0.2.
|
|
117
|
-
datarobot_genai-0.2.
|
|
118
|
-
datarobot_genai-0.2.
|
|
119
|
-
datarobot_genai-0.2.
|
|
114
|
+
datarobot_genai-0.2.24.dist-info/METADATA,sha256=-QGxEqh8oSYn-gL93kIxqGhOTkrlHQRMf4HqMYTyfDs,6301
|
|
115
|
+
datarobot_genai-0.2.24.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
116
|
+
datarobot_genai-0.2.24.dist-info/entry_points.txt,sha256=jEW3WxDZ8XIK9-ISmTyt5DbmBb047rFlzQuhY09rGrM,284
|
|
117
|
+
datarobot_genai-0.2.24.dist-info/licenses/AUTHORS,sha256=isJGUXdjq1U7XZ_B_9AH8Qf0u4eX0XyQifJZ_Sxm4sA,80
|
|
118
|
+
datarobot_genai-0.2.24.dist-info/licenses/LICENSE,sha256=U2_VkLIktQoa60Nf6Tbt7E4RMlfhFSjWjcJJfVC-YCE,11341
|
|
119
|
+
datarobot_genai-0.2.24.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|