datarobot-genai 0.2.24__py3-none-any.whl → 0.2.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datarobot_genai/drmcp/core/config.py +121 -83
- datarobot_genai/drmcp/core/dr_mcp_server.py +0 -3
- datarobot_genai/drmcp/core/mcp_instance.py +37 -103
- datarobot_genai/drmcp/core/tool_config.py +17 -9
- datarobot_genai/drmcp/core/tool_filter.py +10 -1
- datarobot_genai/drmcp/test_utils/tool_base_ete.py +68 -1
- datarobot_genai/drmcp/tools/clients/gdrive.py +127 -0
- datarobot_genai/drmcp/tools/clients/microsoft_graph.py +479 -0
- datarobot_genai/drmcp/tools/gdrive/tools.py +95 -2
- datarobot_genai/drmcp/tools/microsoft_graph/__init__.py +13 -0
- datarobot_genai/drmcp/tools/microsoft_graph/tools.py +198 -0
- datarobot_genai/drmcp/tools/predictive/data.py +11 -3
- datarobot_genai/drmcp/tools/predictive/project.py +45 -27
- datarobot_genai/drmcp/tools/predictive/training.py +1 -0
- datarobot_genai/nat/datarobot_llm_clients.py +90 -54
- datarobot_genai/nat/datarobot_mcp_client.py +47 -15
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/METADATA +1 -1
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/RECORD +22 -20
- datarobot_genai/drmcp/core/mcp_server_tools.py +0 -129
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/WHEEL +0 -0
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/entry_points.txt +0 -0
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/licenses/AUTHORS +0 -0
- {datarobot_genai-0.2.24.dist-info → datarobot_genai-0.2.29.dist-info}/licenses/LICENSE +0 -0
|
@@ -15,7 +15,9 @@
|
|
|
15
15
|
"""Google Drive API Client and utilities for OAuth."""
|
|
16
16
|
|
|
17
17
|
import io
|
|
18
|
+
import json
|
|
18
19
|
import logging
|
|
20
|
+
import uuid
|
|
19
21
|
from typing import Annotated
|
|
20
22
|
from typing import Any
|
|
21
23
|
|
|
@@ -45,6 +47,12 @@ GOOGLE_WORKSPACE_EXPORT_MIMES: dict[str, str] = {
|
|
|
45
47
|
"application/vnd.google-apps.presentation": "text/plain",
|
|
46
48
|
}
|
|
47
49
|
|
|
50
|
+
# MIME type mappings for content conversion during upload to Google Workspace formats
|
|
51
|
+
UPLOAD_CONTENT_TYPES: dict[str, str] = {
|
|
52
|
+
"application/vnd.google-apps.document": "text/plain",
|
|
53
|
+
"application/vnd.google-apps.spreadsheet": "text/csv",
|
|
54
|
+
}
|
|
55
|
+
|
|
48
56
|
BINARY_MIME_PREFIXES = (
|
|
49
57
|
"image/",
|
|
50
58
|
"audio/",
|
|
@@ -599,6 +607,125 @@ class GoogleDriveClient:
|
|
|
599
607
|
web_view_link=file_metadata.web_view_link,
|
|
600
608
|
)
|
|
601
609
|
|
|
610
|
+
async def create_file(
|
|
611
|
+
self,
|
|
612
|
+
name: str,
|
|
613
|
+
mime_type: str,
|
|
614
|
+
parent_id: str | None = None,
|
|
615
|
+
initial_content: str | None = None,
|
|
616
|
+
) -> GoogleDriveFile:
|
|
617
|
+
"""Create a new file or folder in Google Drive.
|
|
618
|
+
|
|
619
|
+
Creates a new file with the specified name and MIME type. Optionally places
|
|
620
|
+
it in a specific folder and populates it with initial content.
|
|
621
|
+
|
|
622
|
+
For Google Workspace files (Docs, Sheets), the Drive API automatically
|
|
623
|
+
converts plain text content to the appropriate format.
|
|
624
|
+
|
|
625
|
+
Args:
|
|
626
|
+
name: The name for the new file or folder.
|
|
627
|
+
mime_type: The MIME type of the file (e.g., 'text/plain',
|
|
628
|
+
'application/vnd.google-apps.document',
|
|
629
|
+
'application/vnd.google-apps.folder').
|
|
630
|
+
parent_id: Optional ID of the parent folder. If not specified,
|
|
631
|
+
the file is created in the root of the user's Drive.
|
|
632
|
+
initial_content: Optional text content to populate the file.
|
|
633
|
+
Ignored for folders.
|
|
634
|
+
|
|
635
|
+
Returns
|
|
636
|
+
-------
|
|
637
|
+
GoogleDriveFile with the created file's metadata.
|
|
638
|
+
|
|
639
|
+
Raises
|
|
640
|
+
------
|
|
641
|
+
GoogleDriveError: If file creation fails (permission denied,
|
|
642
|
+
parent not found, rate limited, etc.).
|
|
643
|
+
"""
|
|
644
|
+
metadata: dict[str, Any] = {
|
|
645
|
+
"name": name,
|
|
646
|
+
"mimeType": mime_type,
|
|
647
|
+
}
|
|
648
|
+
if parent_id:
|
|
649
|
+
metadata["parents"] = [parent_id]
|
|
650
|
+
|
|
651
|
+
if mime_type == GOOGLE_DRIVE_FOLDER_MIME or not initial_content:
|
|
652
|
+
response = await self._client.post(
|
|
653
|
+
"/",
|
|
654
|
+
json=metadata,
|
|
655
|
+
params={"fields": SUPPORTED_FIELDS_STR, "supportsAllDrives": "true"},
|
|
656
|
+
)
|
|
657
|
+
else:
|
|
658
|
+
response = await self._create_file_with_content(
|
|
659
|
+
metadata=metadata,
|
|
660
|
+
content=initial_content,
|
|
661
|
+
target_mime_type=mime_type,
|
|
662
|
+
)
|
|
663
|
+
|
|
664
|
+
if response.status_code == 404:
|
|
665
|
+
raise GoogleDriveError(
|
|
666
|
+
f"Parent folder with ID '{parent_id}' not found."
|
|
667
|
+
if parent_id
|
|
668
|
+
else "Resource not found."
|
|
669
|
+
)
|
|
670
|
+
if response.status_code == 403:
|
|
671
|
+
raise GoogleDriveError(
|
|
672
|
+
"Permission denied: you don't have permission to create files in this location."
|
|
673
|
+
)
|
|
674
|
+
if response.status_code == 400:
|
|
675
|
+
raise GoogleDriveError(
|
|
676
|
+
f"Bad request: invalid parameters for file creation. "
|
|
677
|
+
f"Check that the MIME type '{mime_type}' is valid."
|
|
678
|
+
)
|
|
679
|
+
if response.status_code == 429:
|
|
680
|
+
raise GoogleDriveError("Rate limit exceeded. Please try again later.")
|
|
681
|
+
|
|
682
|
+
response.raise_for_status()
|
|
683
|
+
return GoogleDriveFile.from_api_response(response.json())
|
|
684
|
+
|
|
685
|
+
async def _create_file_with_content(
|
|
686
|
+
self,
|
|
687
|
+
metadata: dict[str, Any],
|
|
688
|
+
content: str,
|
|
689
|
+
target_mime_type: str,
|
|
690
|
+
) -> httpx.Response:
|
|
691
|
+
"""Create a file with content using multipart upload.
|
|
692
|
+
|
|
693
|
+
Args:
|
|
694
|
+
metadata: File metadata dictionary.
|
|
695
|
+
content: Text content for the file.
|
|
696
|
+
target_mime_type: The target MIME type for the file.
|
|
697
|
+
|
|
698
|
+
Returns
|
|
699
|
+
-------
|
|
700
|
+
The HTTP response from the upload.
|
|
701
|
+
"""
|
|
702
|
+
content_type = UPLOAD_CONTENT_TYPES.get(target_mime_type, "text/plain")
|
|
703
|
+
boundary = f"===gdrive_boundary_{uuid.uuid4().hex}==="
|
|
704
|
+
body_parts = [
|
|
705
|
+
f"--{boundary}",
|
|
706
|
+
"Content-Type: application/json; charset=UTF-8",
|
|
707
|
+
"",
|
|
708
|
+
json.dumps(metadata),
|
|
709
|
+
f"--{boundary}",
|
|
710
|
+
f"Content-Type: {content_type}",
|
|
711
|
+
"",
|
|
712
|
+
content,
|
|
713
|
+
f"--{boundary}--",
|
|
714
|
+
]
|
|
715
|
+
body = "\r\n".join(body_parts)
|
|
716
|
+
|
|
717
|
+
upload_url = "https://www.googleapis.com/upload/drive/v3/files"
|
|
718
|
+
return await self._client.post(
|
|
719
|
+
upload_url,
|
|
720
|
+
content=body.encode("utf-8"),
|
|
721
|
+
params={
|
|
722
|
+
"uploadType": "multipart",
|
|
723
|
+
"fields": SUPPORTED_FIELDS_STR,
|
|
724
|
+
"supportsAllDrives": "true",
|
|
725
|
+
},
|
|
726
|
+
headers={"Content-Type": f"multipart/related; boundary={boundary}"},
|
|
727
|
+
)
|
|
728
|
+
|
|
602
729
|
async def __aenter__(self) -> "GoogleDriveClient":
|
|
603
730
|
"""Async context manager entry."""
|
|
604
731
|
return self
|
|
@@ -0,0 +1,479 @@
|
|
|
1
|
+
# Copyright 2026 DataRobot, Inc.
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Microsoft Graph API Client for searching SharePoint and OneDrive content."""
|
|
16
|
+
|
|
17
|
+
import logging
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
import httpx
|
|
21
|
+
from datarobot.auth.datarobot.exceptions import OAuthServiceClientErr
|
|
22
|
+
from fastmcp.exceptions import ToolError
|
|
23
|
+
from pydantic import BaseModel
|
|
24
|
+
from pydantic import Field
|
|
25
|
+
|
|
26
|
+
from datarobot_genai.drmcp.core.auth import get_access_token
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
GRAPH_API_BASE = "https://graph.microsoft.com/v1.0"
|
|
31
|
+
MAX_SEARCH_RESULTS = 250
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
async def get_microsoft_graph_access_token() -> str | ToolError:
|
|
35
|
+
"""
|
|
36
|
+
Get Microsoft Graph OAuth access token with error handling.
|
|
37
|
+
|
|
38
|
+
Returns
|
|
39
|
+
-------
|
|
40
|
+
Access token string on success, ToolError on failure
|
|
41
|
+
|
|
42
|
+
Example:
|
|
43
|
+
```python
|
|
44
|
+
token = await get_microsoft_graph_access_token()
|
|
45
|
+
if isinstance(token, ToolError):
|
|
46
|
+
# Handle error
|
|
47
|
+
return token
|
|
48
|
+
# Use token
|
|
49
|
+
```
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
access_token = await get_access_token("microsoft")
|
|
53
|
+
if not access_token:
|
|
54
|
+
logger.warning("Empty access token received")
|
|
55
|
+
return ToolError("Received empty access token. Please complete the OAuth flow.")
|
|
56
|
+
return access_token
|
|
57
|
+
except OAuthServiceClientErr as e:
|
|
58
|
+
logger.error(f"OAuth client error: {e}", exc_info=True)
|
|
59
|
+
return ToolError(
|
|
60
|
+
"Could not obtain access token for Microsoft. Make sure the OAuth "
|
|
61
|
+
"permission was granted for the application to act on your behalf."
|
|
62
|
+
)
|
|
63
|
+
except Exception as e:
|
|
64
|
+
error_msg = str(e)
|
|
65
|
+
logger.error(f"Unexpected error obtaining access token: {error_msg}", exc_info=True)
|
|
66
|
+
return ToolError("An unexpected error occurred while obtaining access token for Microsoft.")
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class MicrosoftGraphError(Exception):
|
|
70
|
+
"""Exception for Microsoft Graph API errors."""
|
|
71
|
+
|
|
72
|
+
def __init__(self, message: str) -> None:
|
|
73
|
+
super().__init__(message)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class MicrosoftGraphItem(BaseModel):
|
|
77
|
+
"""Represents an item (file or folder) from Microsoft Graph (SharePoint/OneDrive)."""
|
|
78
|
+
|
|
79
|
+
id: str
|
|
80
|
+
name: str
|
|
81
|
+
web_url: str | None = Field(None, alias="webUrl")
|
|
82
|
+
size: int | None = None
|
|
83
|
+
created_datetime: str | None = Field(None, alias="createdDateTime")
|
|
84
|
+
last_modified_datetime: str | None = Field(None, alias="lastModifiedDateTime")
|
|
85
|
+
is_folder: bool = False
|
|
86
|
+
mime_type: str | None = Field(None, alias="mimeType")
|
|
87
|
+
drive_id: str | None = Field(None, alias="driveId")
|
|
88
|
+
parent_folder_id: str | None = Field(None, alias="parentFolderId")
|
|
89
|
+
|
|
90
|
+
model_config = {"populate_by_name": True}
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def from_api_response(cls, data: dict[str, Any]) -> "MicrosoftGraphItem":
|
|
94
|
+
"""Create a MicrosoftGraphItem from Microsoft Graph API response data."""
|
|
95
|
+
parent_ref = data.get("parentReference", {})
|
|
96
|
+
return cls(
|
|
97
|
+
id=data.get("id", ""),
|
|
98
|
+
name=data.get("name", "Unknown"),
|
|
99
|
+
web_url=data.get("webUrl"),
|
|
100
|
+
size=data.get("size"),
|
|
101
|
+
created_datetime=data.get("createdDateTime"),
|
|
102
|
+
last_modified_datetime=data.get("lastModifiedDateTime"),
|
|
103
|
+
is_folder="folder" in data,
|
|
104
|
+
mime_type=data.get("file", {}).get("mimeType") if "file" in data else None,
|
|
105
|
+
drive_id=parent_ref.get("driveId"),
|
|
106
|
+
parent_folder_id=parent_ref.get("id"),
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class MicrosoftGraphClient:
|
|
111
|
+
"""Client for interacting with Microsoft Graph API to search SharePoint and OneDrive content."""
|
|
112
|
+
|
|
113
|
+
def __init__(self, access_token: str, site_url: str | None = None):
|
|
114
|
+
"""
|
|
115
|
+
Initialize Microsoft Graph client with access token.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
access_token: OAuth access token for Microsoft Graph API
|
|
119
|
+
site_url: Optional SharePoint site URL (e.g., https://tenant.sharepoint.com/sites/sitename)
|
|
120
|
+
If not provided, searches across all accessible sites and OneDrive
|
|
121
|
+
"""
|
|
122
|
+
self.access_token = access_token
|
|
123
|
+
self.site_url = site_url
|
|
124
|
+
self._client = httpx.AsyncClient(
|
|
125
|
+
headers={
|
|
126
|
+
"Authorization": f"Bearer {access_token}",
|
|
127
|
+
"Content-Type": "application/json",
|
|
128
|
+
},
|
|
129
|
+
timeout=30.0,
|
|
130
|
+
)
|
|
131
|
+
self._site_id: str | None = None
|
|
132
|
+
|
|
133
|
+
async def _get_site_id(self) -> str:
|
|
134
|
+
"""Get the SharePoint site ID from the site URL or return root site ID."""
|
|
135
|
+
if self._site_id:
|
|
136
|
+
return self._site_id
|
|
137
|
+
|
|
138
|
+
# If no site_url provided, use root site
|
|
139
|
+
if not self.site_url:
|
|
140
|
+
# Get root site ID
|
|
141
|
+
graph_url = f"{GRAPH_API_BASE}/sites/root"
|
|
142
|
+
try:
|
|
143
|
+
response = await self._client.get(graph_url)
|
|
144
|
+
response.raise_for_status()
|
|
145
|
+
data = response.json()
|
|
146
|
+
self._site_id = data.get("id", "")
|
|
147
|
+
return self._site_id
|
|
148
|
+
except httpx.HTTPStatusError as e:
|
|
149
|
+
raise self._handle_http_error(e, "Failed to get root site ID") from e
|
|
150
|
+
|
|
151
|
+
# Extract site path from URL
|
|
152
|
+
# Format: https://{tenant}.sharepoint.com/sites/{site-name}
|
|
153
|
+
# or: https://{tenant}.sharepoint.com/sites/{site-name}/...
|
|
154
|
+
url_parts = self.site_url.replace("https://", "").split("/")
|
|
155
|
+
if len(url_parts) < 3:
|
|
156
|
+
raise MicrosoftGraphError(f"Invalid SharePoint site URL: {self.site_url}")
|
|
157
|
+
|
|
158
|
+
hostname = url_parts[0] # tenant.sharepoint.com
|
|
159
|
+
site_path = "/".join(url_parts[1:]) # sites/site-name/...
|
|
160
|
+
|
|
161
|
+
# Use Microsoft Graph API to get site ID
|
|
162
|
+
graph_url = f"{GRAPH_API_BASE}/sites/{hostname}:/{site_path}"
|
|
163
|
+
try:
|
|
164
|
+
response = await self._client.get(graph_url)
|
|
165
|
+
response.raise_for_status()
|
|
166
|
+
data = response.json()
|
|
167
|
+
self._site_id = data.get("id", "")
|
|
168
|
+
return self._site_id
|
|
169
|
+
except httpx.HTTPStatusError as e:
|
|
170
|
+
raise self._handle_http_error(
|
|
171
|
+
e, f"Failed to get site ID from URL: {self.site_url}"
|
|
172
|
+
) from e
|
|
173
|
+
|
|
174
|
+
def _handle_http_error(
|
|
175
|
+
self, error: httpx.HTTPStatusError, base_message: str
|
|
176
|
+
) -> MicrosoftGraphError:
|
|
177
|
+
"""Handle HTTP errors and return appropriate MicrosoftGraphError with user-friendly messages.""" # noqa: E501
|
|
178
|
+
error_msg = base_message
|
|
179
|
+
|
|
180
|
+
if error.response.status_code == 403:
|
|
181
|
+
error_msg += (
|
|
182
|
+
": Insufficient permissions. Requires Sites.Read.All or Sites.Search.All "
|
|
183
|
+
"permission."
|
|
184
|
+
)
|
|
185
|
+
elif error.response.status_code == 400:
|
|
186
|
+
try:
|
|
187
|
+
error_data = error.response.json()
|
|
188
|
+
api_message = error_data.get("error", {}).get("message", "Invalid request")
|
|
189
|
+
error_msg += f": {api_message}"
|
|
190
|
+
except Exception:
|
|
191
|
+
error_msg += ": Invalid request parameters."
|
|
192
|
+
else:
|
|
193
|
+
error_msg += f": HTTP {error.response.status_code}"
|
|
194
|
+
|
|
195
|
+
return MicrosoftGraphError(error_msg)
|
|
196
|
+
|
|
197
|
+
async def search_content(
|
|
198
|
+
self,
|
|
199
|
+
search_query: str,
|
|
200
|
+
site_id: str | None = None,
|
|
201
|
+
from_offset: int = 0,
|
|
202
|
+
size: int = 250,
|
|
203
|
+
entity_types: list[str] | None = None,
|
|
204
|
+
filters: list[str] | None = None,
|
|
205
|
+
include_hidden_content: bool = False,
|
|
206
|
+
region: str | None = None,
|
|
207
|
+
) -> list[MicrosoftGraphItem]:
|
|
208
|
+
"""
|
|
209
|
+
Search for content using Microsoft Graph API search.
|
|
210
|
+
|
|
211
|
+
This tool utilizes Microsoft Graph's search engine to locate items across
|
|
212
|
+
SharePoint sites, OneDrive, and other Microsoft 365 services. When a site
|
|
213
|
+
is specified, it searches within that site. Otherwise, it searches across
|
|
214
|
+
all accessible SharePoint sites and OneDrive.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
search_query: The search string to find files, folders, or list items
|
|
218
|
+
site_id: Optional site ID to scope the search. If not provided and site_url
|
|
219
|
+
is set, will use that site. If neither is provided, searches across
|
|
220
|
+
all accessible sites.
|
|
221
|
+
from_offset: The zero-based index of the first result to return (default: 0).
|
|
222
|
+
Use this for pagination - increment by the size value to get the next page.
|
|
223
|
+
size: Maximum number of results to return in this request (default: 250, max: 250).
|
|
224
|
+
The LLM should control pagination by making multiple calls with different
|
|
225
|
+
'from' values (e.g., from=0 size=250, then from=250 size=250, etc.).
|
|
226
|
+
entity_types: Optional list of entity types to search. Valid values:
|
|
227
|
+
"driveItem", "listItem", "site", "list", "drive".
|
|
228
|
+
Default: ["driveItem", "listItem"]
|
|
229
|
+
filters: Optional list of filter expressions (KQL syntax) to refine search results
|
|
230
|
+
include_hidden_content: Whether to include hidden content in search results.
|
|
231
|
+
Only works with delegated permissions, not application
|
|
232
|
+
permissions.
|
|
233
|
+
region: Optional region code for application permissions (e.g., "NAM", "EUR", "APC")
|
|
234
|
+
|
|
235
|
+
Returns
|
|
236
|
+
-------
|
|
237
|
+
List of MicrosoftGraphItem objects matching the search query
|
|
238
|
+
|
|
239
|
+
Raises
|
|
240
|
+
------
|
|
241
|
+
MicrosoftGraphError: If the search fails
|
|
242
|
+
httpx.HTTPStatusError: If the API request fails
|
|
243
|
+
"""
|
|
244
|
+
if not search_query:
|
|
245
|
+
raise MicrosoftGraphError("Search query cannot be empty")
|
|
246
|
+
|
|
247
|
+
# Validate and limit size parameter
|
|
248
|
+
size = min(max(1, size), MAX_SEARCH_RESULTS) # Between 1 and 250
|
|
249
|
+
from_offset = max(0, from_offset) # Must be non-negative
|
|
250
|
+
|
|
251
|
+
# Determine which site to search
|
|
252
|
+
# If site_id is provided, use it directly; otherwise resolve from site_url if set
|
|
253
|
+
if site_id:
|
|
254
|
+
target_site_id = site_id
|
|
255
|
+
elif self.site_url:
|
|
256
|
+
target_site_id = await self._get_site_id()
|
|
257
|
+
else:
|
|
258
|
+
target_site_id = None
|
|
259
|
+
|
|
260
|
+
# Use unified Microsoft Search API for both site-specific and organization-wide search
|
|
261
|
+
# Reference: https://learn.microsoft.com/en-us/graph/api/search-query
|
|
262
|
+
graph_url = f"{GRAPH_API_BASE}/search/query"
|
|
263
|
+
|
|
264
|
+
# Default entity types: driveItem and listItem
|
|
265
|
+
if entity_types is None:
|
|
266
|
+
entity_types = ["driveItem", "listItem"]
|
|
267
|
+
|
|
268
|
+
# Validate entity types
|
|
269
|
+
valid_entity_types = ["driveItem", "listItem", "site", "list", "drive"]
|
|
270
|
+
entity_types = [et for et in entity_types if et in valid_entity_types]
|
|
271
|
+
if not entity_types:
|
|
272
|
+
entity_types = ["driveItem", "listItem"] # Fallback to default
|
|
273
|
+
|
|
274
|
+
# Build search request payload
|
|
275
|
+
# Reference: https://learn.microsoft.com/en-us/graph/search-concept-files
|
|
276
|
+
query_parts = []
|
|
277
|
+
|
|
278
|
+
# If searching within a specific site, add scoping using KQL syntax first
|
|
279
|
+
if target_site_id:
|
|
280
|
+
# Get site details to construct proper scoping query
|
|
281
|
+
try:
|
|
282
|
+
site_info_url = f"{GRAPH_API_BASE}/sites/{target_site_id}"
|
|
283
|
+
site_response = await self._client.get(site_info_url)
|
|
284
|
+
site_response.raise_for_status()
|
|
285
|
+
site_data = site_response.json()
|
|
286
|
+
site_web_url = site_data.get("webUrl", "")
|
|
287
|
+
|
|
288
|
+
# Use KQL to scope search to the specific site
|
|
289
|
+
# Format: path:"{site-url}"
|
|
290
|
+
if site_web_url:
|
|
291
|
+
query_parts.append(f'path:"{site_web_url}"')
|
|
292
|
+
except httpx.HTTPStatusError as e:
|
|
293
|
+
raise self._handle_http_error(e, "Failed to get site details for scoping") from e
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logger.warning(
|
|
296
|
+
f"Could not get site details for scoping, using un-scoped search: {e}"
|
|
297
|
+
)
|
|
298
|
+
# Fall back to un-scoped search if site details can't be retrieved
|
|
299
|
+
|
|
300
|
+
# Add the main search query
|
|
301
|
+
query_parts.append(search_query)
|
|
302
|
+
|
|
303
|
+
# Add filters if provided (using AND operator for proper KQL syntax)
|
|
304
|
+
if filters:
|
|
305
|
+
# Join filters with AND operator for proper KQL syntax
|
|
306
|
+
filter_string = " AND ".join(filters)
|
|
307
|
+
query_parts.append(filter_string)
|
|
308
|
+
|
|
309
|
+
# Combine all query parts with spaces
|
|
310
|
+
query_string = " ".join(query_parts)
|
|
311
|
+
|
|
312
|
+
# Build request payload with from and size parameters
|
|
313
|
+
request_payload = {
|
|
314
|
+
"entityTypes": entity_types,
|
|
315
|
+
"query": {
|
|
316
|
+
"queryString": query_string,
|
|
317
|
+
},
|
|
318
|
+
"from": from_offset,
|
|
319
|
+
"size": size,
|
|
320
|
+
}
|
|
321
|
+
|
|
322
|
+
# Add includeHiddenContent (only works with delegated permissions)
|
|
323
|
+
if include_hidden_content:
|
|
324
|
+
request_payload["includeHiddenContent"] = True
|
|
325
|
+
|
|
326
|
+
# Add region for application permissions
|
|
327
|
+
if region:
|
|
328
|
+
request_payload["region"] = region
|
|
329
|
+
|
|
330
|
+
payload = {"requests": [request_payload]}
|
|
331
|
+
|
|
332
|
+
try:
|
|
333
|
+
response = await self._client.post(graph_url, json=payload)
|
|
334
|
+
response.raise_for_status()
|
|
335
|
+
data = response.json()
|
|
336
|
+
except httpx.HTTPStatusError as e:
|
|
337
|
+
raise self._handle_http_error(e, "Failed to search SharePoint content") from e
|
|
338
|
+
|
|
339
|
+
# Parse the Microsoft Search API response format
|
|
340
|
+
# Reference: https://learn.microsoft.com/en-us/graph/search-concept-files
|
|
341
|
+
results = []
|
|
342
|
+
for request_result in data.get("value", []):
|
|
343
|
+
hits_containers = request_result.get("hitsContainers", [])
|
|
344
|
+
for container in hits_containers:
|
|
345
|
+
hits = container.get("hits", [])
|
|
346
|
+
for hit in hits:
|
|
347
|
+
resource = hit.get("resource", {})
|
|
348
|
+
if not resource:
|
|
349
|
+
continue
|
|
350
|
+
|
|
351
|
+
odata_type = resource.get("@odata.type", "")
|
|
352
|
+
transformed_resource = self._transform_search_resource(resource, odata_type)
|
|
353
|
+
# transformed_resource always returns a dict, so we can process it directly
|
|
354
|
+
results.append(MicrosoftGraphItem.from_api_response(transformed_resource))
|
|
355
|
+
|
|
356
|
+
return results
|
|
357
|
+
|
|
358
|
+
def _transform_search_resource(
|
|
359
|
+
self, resource: dict[str, Any], odata_type: str
|
|
360
|
+
) -> dict[str, Any]:
|
|
361
|
+
"""Transform a search API resource to MicrosoftGraphItem-compatible format."""
|
|
362
|
+
# Preserve original values from resource if they exist, otherwise use defaults
|
|
363
|
+
# This ensures we don't lose data that might be present in the original response
|
|
364
|
+
base_resource = {
|
|
365
|
+
"id": resource.get("id", ""),
|
|
366
|
+
"webUrl": resource.get("webUrl"),
|
|
367
|
+
"createdDateTime": resource.get("createdDateTime"),
|
|
368
|
+
"lastModifiedDateTime": resource.get("lastModifiedDateTime"),
|
|
369
|
+
"size": resource.get("size"),
|
|
370
|
+
"folder": resource.get("folder", {}),
|
|
371
|
+
"file": resource.get("file", {}),
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
parent_ref = resource.get("parentReference", {})
|
|
375
|
+
|
|
376
|
+
if odata_type == "#microsoft.graph.listItem":
|
|
377
|
+
fields = resource.get("fields", {})
|
|
378
|
+
base_resource.update(
|
|
379
|
+
{
|
|
380
|
+
"name": fields.get("Title") or resource.get("name", "Unknown"),
|
|
381
|
+
"parentReference": {
|
|
382
|
+
"driveId": parent_ref.get("driveId"),
|
|
383
|
+
"id": parent_ref.get("id"),
|
|
384
|
+
},
|
|
385
|
+
}
|
|
386
|
+
)
|
|
387
|
+
elif odata_type == "#microsoft.graph.site":
|
|
388
|
+
base_resource.update(
|
|
389
|
+
{
|
|
390
|
+
"name": resource.get("displayName") or resource.get("name", "Unknown"),
|
|
391
|
+
"parentReference": {},
|
|
392
|
+
}
|
|
393
|
+
)
|
|
394
|
+
elif odata_type == "#microsoft.graph.list":
|
|
395
|
+
base_resource.update(
|
|
396
|
+
{
|
|
397
|
+
"name": resource.get("displayName") or resource.get("name", "Unknown"),
|
|
398
|
+
"parentReference": {
|
|
399
|
+
"siteId": parent_ref.get("siteId"),
|
|
400
|
+
},
|
|
401
|
+
}
|
|
402
|
+
)
|
|
403
|
+
elif odata_type == "#microsoft.graph.drive":
|
|
404
|
+
base_resource.update(
|
|
405
|
+
{
|
|
406
|
+
"name": resource.get("name", "Unknown"),
|
|
407
|
+
"parentReference": {
|
|
408
|
+
"siteId": parent_ref.get("siteId"),
|
|
409
|
+
},
|
|
410
|
+
}
|
|
411
|
+
)
|
|
412
|
+
else:
|
|
413
|
+
# Standard driveItem - use resource as-is
|
|
414
|
+
return resource
|
|
415
|
+
|
|
416
|
+
return base_resource
|
|
417
|
+
|
|
418
|
+
async def __aenter__(self) -> "MicrosoftGraphClient":
|
|
419
|
+
"""Async context manager entry."""
|
|
420
|
+
return self
|
|
421
|
+
|
|
422
|
+
async def __aexit__(
|
|
423
|
+
self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: Any
|
|
424
|
+
) -> None:
|
|
425
|
+
"""Async context manager exit."""
|
|
426
|
+
await self._client.aclose()
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
def validate_site_url(site_url: str) -> str | None:
|
|
430
|
+
"""Validate SharePoint site URL and return user-friendly error message if invalid.
|
|
431
|
+
|
|
432
|
+
Args:
|
|
433
|
+
site_url: The SharePoint site URL to validate
|
|
434
|
+
|
|
435
|
+
Returns
|
|
436
|
+
-------
|
|
437
|
+
None if valid, or a user-friendly error message if invalid
|
|
438
|
+
"""
|
|
439
|
+
if not site_url:
|
|
440
|
+
return (
|
|
441
|
+
"SharePoint site URL is required. "
|
|
442
|
+
"Please provide a valid SharePoint site URL (e.g., https://yourtenant.sharepoint.com/sites/yoursite)."
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
site_url = site_url.strip()
|
|
446
|
+
|
|
447
|
+
if not site_url.startswith("https://"):
|
|
448
|
+
return (
|
|
449
|
+
f"Invalid SharePoint site URL: '{site_url}'. "
|
|
450
|
+
"The URL must start with 'https://'. "
|
|
451
|
+
"Example: https://yourtenant.sharepoint.com/sites/yoursite"
|
|
452
|
+
)
|
|
453
|
+
|
|
454
|
+
if "sharepoint.com" not in site_url.lower():
|
|
455
|
+
return (
|
|
456
|
+
f"Invalid SharePoint site URL: '{site_url}'. "
|
|
457
|
+
"The URL must be a SharePoint site URL containing 'sharepoint.com'. "
|
|
458
|
+
"Example: https://yourtenant.sharepoint.com/sites/yoursite"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
# Check basic URL structure
|
|
462
|
+
url_parts = site_url.replace("https://", "").split("/")
|
|
463
|
+
if len(url_parts) < 1 or not url_parts[0]:
|
|
464
|
+
return (
|
|
465
|
+
f"Invalid SharePoint site URL format: '{site_url}'. "
|
|
466
|
+
"The URL must include a domain name. "
|
|
467
|
+
"Example: https://yourtenant.sharepoint.com/sites/yoursite"
|
|
468
|
+
)
|
|
469
|
+
|
|
470
|
+
# Check if it looks like a valid SharePoint site URL
|
|
471
|
+
domain = url_parts[0]
|
|
472
|
+
if not domain.endswith("sharepoint.com"):
|
|
473
|
+
return (
|
|
474
|
+
f"Invalid SharePoint site URL: '{site_url}'. "
|
|
475
|
+
"The domain must end with 'sharepoint.com'. "
|
|
476
|
+
"Example: https://yourtenant.sharepoint.com/sites/yoursite"
|
|
477
|
+
)
|
|
478
|
+
|
|
479
|
+
return None
|