datarobot-genai 0.2.31__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. datarobot_genai/__init__.py +19 -0
  2. datarobot_genai/core/__init__.py +0 -0
  3. datarobot_genai/core/agents/__init__.py +43 -0
  4. datarobot_genai/core/agents/base.py +195 -0
  5. datarobot_genai/core/chat/__init__.py +19 -0
  6. datarobot_genai/core/chat/auth.py +146 -0
  7. datarobot_genai/core/chat/client.py +178 -0
  8. datarobot_genai/core/chat/responses.py +297 -0
  9. datarobot_genai/core/cli/__init__.py +18 -0
  10. datarobot_genai/core/cli/agent_environment.py +47 -0
  11. datarobot_genai/core/cli/agent_kernel.py +211 -0
  12. datarobot_genai/core/custom_model.py +141 -0
  13. datarobot_genai/core/mcp/__init__.py +0 -0
  14. datarobot_genai/core/mcp/common.py +218 -0
  15. datarobot_genai/core/telemetry_agent.py +126 -0
  16. datarobot_genai/core/utils/__init__.py +3 -0
  17. datarobot_genai/core/utils/auth.py +234 -0
  18. datarobot_genai/core/utils/urls.py +64 -0
  19. datarobot_genai/crewai/__init__.py +24 -0
  20. datarobot_genai/crewai/agent.py +42 -0
  21. datarobot_genai/crewai/base.py +159 -0
  22. datarobot_genai/crewai/events.py +117 -0
  23. datarobot_genai/crewai/mcp.py +59 -0
  24. datarobot_genai/drmcp/__init__.py +78 -0
  25. datarobot_genai/drmcp/core/__init__.py +13 -0
  26. datarobot_genai/drmcp/core/auth.py +165 -0
  27. datarobot_genai/drmcp/core/clients.py +180 -0
  28. datarobot_genai/drmcp/core/config.py +364 -0
  29. datarobot_genai/drmcp/core/config_utils.py +174 -0
  30. datarobot_genai/drmcp/core/constants.py +18 -0
  31. datarobot_genai/drmcp/core/credentials.py +190 -0
  32. datarobot_genai/drmcp/core/dr_mcp_server.py +350 -0
  33. datarobot_genai/drmcp/core/dr_mcp_server_logo.py +136 -0
  34. datarobot_genai/drmcp/core/dynamic_prompts/__init__.py +13 -0
  35. datarobot_genai/drmcp/core/dynamic_prompts/controllers.py +130 -0
  36. datarobot_genai/drmcp/core/dynamic_prompts/dr_lib.py +70 -0
  37. datarobot_genai/drmcp/core/dynamic_prompts/register.py +205 -0
  38. datarobot_genai/drmcp/core/dynamic_prompts/utils.py +33 -0
  39. datarobot_genai/drmcp/core/dynamic_tools/__init__.py +14 -0
  40. datarobot_genai/drmcp/core/dynamic_tools/deployment/__init__.py +0 -0
  41. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/__init__.py +14 -0
  42. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/base.py +72 -0
  43. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/default.py +82 -0
  44. datarobot_genai/drmcp/core/dynamic_tools/deployment/adapters/drum.py +238 -0
  45. datarobot_genai/drmcp/core/dynamic_tools/deployment/config.py +228 -0
  46. datarobot_genai/drmcp/core/dynamic_tools/deployment/controllers.py +63 -0
  47. datarobot_genai/drmcp/core/dynamic_tools/deployment/metadata.py +162 -0
  48. datarobot_genai/drmcp/core/dynamic_tools/deployment/register.py +87 -0
  49. datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_agentic_fallback_schema.json +36 -0
  50. datarobot_genai/drmcp/core/dynamic_tools/deployment/schemas/drum_prediction_fallback_schema.json +10 -0
  51. datarobot_genai/drmcp/core/dynamic_tools/register.py +254 -0
  52. datarobot_genai/drmcp/core/dynamic_tools/schema.py +532 -0
  53. datarobot_genai/drmcp/core/exceptions.py +25 -0
  54. datarobot_genai/drmcp/core/logging.py +98 -0
  55. datarobot_genai/drmcp/core/mcp_instance.py +515 -0
  56. datarobot_genai/drmcp/core/memory_management/__init__.py +13 -0
  57. datarobot_genai/drmcp/core/memory_management/manager.py +820 -0
  58. datarobot_genai/drmcp/core/memory_management/memory_tools.py +201 -0
  59. datarobot_genai/drmcp/core/routes.py +439 -0
  60. datarobot_genai/drmcp/core/routes_utils.py +30 -0
  61. datarobot_genai/drmcp/core/server_life_cycle.py +107 -0
  62. datarobot_genai/drmcp/core/telemetry.py +424 -0
  63. datarobot_genai/drmcp/core/tool_config.py +111 -0
  64. datarobot_genai/drmcp/core/tool_filter.py +117 -0
  65. datarobot_genai/drmcp/core/utils.py +138 -0
  66. datarobot_genai/drmcp/server.py +19 -0
  67. datarobot_genai/drmcp/test_utils/__init__.py +13 -0
  68. datarobot_genai/drmcp/test_utils/clients/__init__.py +0 -0
  69. datarobot_genai/drmcp/test_utils/clients/anthropic.py +68 -0
  70. datarobot_genai/drmcp/test_utils/clients/base.py +300 -0
  71. datarobot_genai/drmcp/test_utils/clients/dr_gateway.py +58 -0
  72. datarobot_genai/drmcp/test_utils/clients/openai.py +68 -0
  73. datarobot_genai/drmcp/test_utils/elicitation_test_tool.py +89 -0
  74. datarobot_genai/drmcp/test_utils/integration_mcp_server.py +109 -0
  75. datarobot_genai/drmcp/test_utils/mcp_utils_ete.py +133 -0
  76. datarobot_genai/drmcp/test_utils/mcp_utils_integration.py +107 -0
  77. datarobot_genai/drmcp/test_utils/test_interactive.py +205 -0
  78. datarobot_genai/drmcp/test_utils/tool_base_ete.py +220 -0
  79. datarobot_genai/drmcp/test_utils/utils.py +91 -0
  80. datarobot_genai/drmcp/tools/__init__.py +14 -0
  81. datarobot_genai/drmcp/tools/clients/__init__.py +14 -0
  82. datarobot_genai/drmcp/tools/clients/atlassian.py +188 -0
  83. datarobot_genai/drmcp/tools/clients/confluence.py +584 -0
  84. datarobot_genai/drmcp/tools/clients/gdrive.py +832 -0
  85. datarobot_genai/drmcp/tools/clients/jira.py +334 -0
  86. datarobot_genai/drmcp/tools/clients/microsoft_graph.py +479 -0
  87. datarobot_genai/drmcp/tools/clients/s3.py +28 -0
  88. datarobot_genai/drmcp/tools/confluence/__init__.py +14 -0
  89. datarobot_genai/drmcp/tools/confluence/tools.py +321 -0
  90. datarobot_genai/drmcp/tools/gdrive/__init__.py +0 -0
  91. datarobot_genai/drmcp/tools/gdrive/tools.py +347 -0
  92. datarobot_genai/drmcp/tools/jira/__init__.py +14 -0
  93. datarobot_genai/drmcp/tools/jira/tools.py +243 -0
  94. datarobot_genai/drmcp/tools/microsoft_graph/__init__.py +13 -0
  95. datarobot_genai/drmcp/tools/microsoft_graph/tools.py +198 -0
  96. datarobot_genai/drmcp/tools/predictive/__init__.py +27 -0
  97. datarobot_genai/drmcp/tools/predictive/data.py +133 -0
  98. datarobot_genai/drmcp/tools/predictive/deployment.py +91 -0
  99. datarobot_genai/drmcp/tools/predictive/deployment_info.py +392 -0
  100. datarobot_genai/drmcp/tools/predictive/model.py +148 -0
  101. datarobot_genai/drmcp/tools/predictive/predict.py +254 -0
  102. datarobot_genai/drmcp/tools/predictive/predict_realtime.py +307 -0
  103. datarobot_genai/drmcp/tools/predictive/project.py +90 -0
  104. datarobot_genai/drmcp/tools/predictive/training.py +661 -0
  105. datarobot_genai/langgraph/__init__.py +0 -0
  106. datarobot_genai/langgraph/agent.py +341 -0
  107. datarobot_genai/langgraph/mcp.py +73 -0
  108. datarobot_genai/llama_index/__init__.py +16 -0
  109. datarobot_genai/llama_index/agent.py +50 -0
  110. datarobot_genai/llama_index/base.py +299 -0
  111. datarobot_genai/llama_index/mcp.py +79 -0
  112. datarobot_genai/nat/__init__.py +0 -0
  113. datarobot_genai/nat/agent.py +275 -0
  114. datarobot_genai/nat/datarobot_auth_provider.py +110 -0
  115. datarobot_genai/nat/datarobot_llm_clients.py +318 -0
  116. datarobot_genai/nat/datarobot_llm_providers.py +130 -0
  117. datarobot_genai/nat/datarobot_mcp_client.py +266 -0
  118. datarobot_genai/nat/helpers.py +87 -0
  119. datarobot_genai/py.typed +0 -0
  120. datarobot_genai-0.2.31.dist-info/METADATA +145 -0
  121. datarobot_genai-0.2.31.dist-info/RECORD +125 -0
  122. datarobot_genai-0.2.31.dist-info/WHEEL +4 -0
  123. datarobot_genai-0.2.31.dist-info/entry_points.txt +5 -0
  124. datarobot_genai-0.2.31.dist-info/licenses/AUTHORS +2 -0
  125. datarobot_genai-0.2.31.dist-info/licenses/LICENSE +201 -0
@@ -0,0 +1,832 @@
1
+ # Copyright 2025 DataRobot, Inc.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Google Drive API Client and utilities for OAuth."""
16
+
17
+ import io
18
+ import json
19
+ import logging
20
+ import uuid
21
+ from typing import Annotated
22
+ from typing import Any
23
+
24
+ import httpx
25
+ from datarobot.auth.datarobot.exceptions import OAuthServiceClientErr
26
+ from fastmcp.exceptions import ToolError
27
+ from pydantic import BaseModel
28
+ from pydantic import ConfigDict
29
+ from pydantic import Field
30
+ from pypdf import PdfReader
31
+
32
+ from datarobot_genai.drmcp.core.auth import get_access_token
33
+
34
+ logger = logging.getLogger(__name__)
35
+
36
+ SUPPORTED_FIELDS = {
37
+ "id",
38
+ "name",
39
+ "size",
40
+ "mimeType",
41
+ "webViewLink",
42
+ "createdTime",
43
+ "modifiedTime",
44
+ "starred",
45
+ "trashed",
46
+ }
47
+ SUPPORTED_FIELDS_STR = ",".join(SUPPORTED_FIELDS)
48
+ DEFAULT_FIELDS = f"nextPageToken,files({SUPPORTED_FIELDS_STR})"
49
+ GOOGLE_DRIVE_FOLDER_MIME = "application/vnd.google-apps.folder"
50
+ DEFAULT_ORDER = "modifiedTime desc"
51
+ MAX_PAGE_SIZE = 100
52
+ LIMIT = 500
53
+
54
+ GOOGLE_WORKSPACE_EXPORT_MIMES: dict[str, str] = {
55
+ "application/vnd.google-apps.document": "text/markdown",
56
+ "application/vnd.google-apps.spreadsheet": "text/csv",
57
+ "application/vnd.google-apps.presentation": "text/plain",
58
+ }
59
+
60
+ # MIME type mappings for content conversion during upload to Google Workspace formats
61
+ UPLOAD_CONTENT_TYPES: dict[str, str] = {
62
+ "application/vnd.google-apps.document": "text/plain",
63
+ "application/vnd.google-apps.spreadsheet": "text/csv",
64
+ }
65
+
66
+ BINARY_MIME_PREFIXES = (
67
+ "image/",
68
+ "audio/",
69
+ "video/",
70
+ "application/zip",
71
+ "application/octet-stream",
72
+ "application/vnd.google-apps.drawing",
73
+ )
74
+
75
+ PDF_MIME_TYPE = "application/pdf"
76
+
77
+
78
+ async def get_gdrive_access_token() -> str | ToolError:
79
+ """
80
+ Get Google Drive OAuth access token with error handling.
81
+
82
+ Returns
83
+ -------
84
+ Access token string on success, ToolError on failure
85
+
86
+ Example:
87
+ ```python
88
+ token = await get_gdrive_access_token()
89
+ if isinstance(token, ToolError):
90
+ # Handle error
91
+ return token
92
+ # Use token
93
+ ```
94
+ """
95
+ try:
96
+ access_token = await get_access_token("google")
97
+ if not access_token:
98
+ logger.warning("Empty access token received")
99
+ return ToolError("Received empty access token. Please complete the OAuth flow.")
100
+ return access_token
101
+ except OAuthServiceClientErr as e:
102
+ logger.error(f"OAuth client error: {e}", exc_info=True)
103
+ return ToolError(
104
+ "Could not obtain access token for Google. Make sure the OAuth "
105
+ "permission was granted for the application to act on your behalf."
106
+ )
107
+ except Exception as e:
108
+ logger.error(f"Unexpected error obtaining access token: {e}", exc_info=True)
109
+ return ToolError("An unexpected error occurred while obtaining access token for Google.")
110
+
111
+
112
+ class GoogleDriveError(Exception):
113
+ """Exception for Google Drive API errors."""
114
+
115
+ def __init__(self, message: str) -> None:
116
+ super().__init__(message)
117
+
118
+
119
+ PrimitiveData = str | int | float | bool | None
120
+
121
+
122
+ class GoogleDriveFile(BaseModel):
123
+ """Represents a file from Google Drive."""
124
+
125
+ id: str
126
+ name: str
127
+ mime_type: Annotated[str, Field(alias="mimeType")]
128
+ size: int | None = None
129
+ web_view_link: Annotated[str | None, Field(alias="webViewLink")] = None
130
+ created_time: Annotated[str | None, Field(alias="createdTime")] = None
131
+ modified_time: Annotated[str | None, Field(alias="modifiedTime")] = None
132
+ starred: bool | None = None
133
+ trashed: bool | None = None
134
+
135
+ model_config = ConfigDict(populate_by_name=True)
136
+
137
+ @classmethod
138
+ def from_api_response(cls, data: dict[str, Any]) -> "GoogleDriveFile":
139
+ """Create a GoogleDriveFile from API response data."""
140
+ return cls(
141
+ id=data.get("id", "Unknown"),
142
+ name=data.get("name", "Unknown"),
143
+ mime_type=data.get("mimeType", "Unknown"),
144
+ size=int(data["size"]) if data.get("size") else None,
145
+ web_view_link=data.get("webViewLink"),
146
+ created_time=data.get("createdTime"),
147
+ modified_time=data.get("modifiedTime"),
148
+ starred=data.get("starred"),
149
+ trashed=data.get("trashed"),
150
+ )
151
+
152
+ def as_flat_dict(self) -> dict[str, Any]:
153
+ """Return a flat dictionary representation of the file."""
154
+ result: dict[str, Any] = {
155
+ "id": self.id,
156
+ "name": self.name,
157
+ "mimeType": self.mime_type,
158
+ }
159
+ if self.size is not None:
160
+ result["size"] = self.size
161
+ if self.web_view_link is not None:
162
+ result["webViewLink"] = self.web_view_link
163
+ if self.created_time is not None:
164
+ result["createdTime"] = self.created_time
165
+ if self.modified_time is not None:
166
+ result["modifiedTime"] = self.modified_time
167
+ if self.starred is not None:
168
+ result["starred"] = self.starred
169
+ if self.trashed is not None:
170
+ result["trashed"] = self.trashed
171
+ return result
172
+
173
+
174
+ class PaginatedResult(BaseModel):
175
+ """Result of a paginated API call."""
176
+
177
+ files: list[GoogleDriveFile]
178
+ next_page_token: str | None = None
179
+
180
+
181
+ class GoogleDriveFileContent(BaseModel):
182
+ """Content retrieved from a Google Drive file."""
183
+
184
+ id: str
185
+ name: str
186
+ mime_type: str
187
+ content: str
188
+ original_mime_type: str
189
+ was_exported: bool = False
190
+ size: int | None = None
191
+ web_view_link: str | None = None
192
+
193
+ def as_flat_dict(self) -> dict[str, Any]:
194
+ """Return a flat dictionary representation of the file content."""
195
+ result: dict[str, Any] = {
196
+ "id": self.id,
197
+ "name": self.name,
198
+ "mimeType": self.mime_type,
199
+ "content": self.content,
200
+ "originalMimeType": self.original_mime_type,
201
+ "wasExported": self.was_exported,
202
+ }
203
+ if self.size is not None:
204
+ result["size"] = self.size
205
+ if self.web_view_link is not None:
206
+ result["webViewLink"] = self.web_view_link
207
+ return result
208
+
209
+
210
+ class GoogleDriveClient:
211
+ """Client for interacting with Google Drive API."""
212
+
213
+ def __init__(self, access_token: str) -> None:
214
+ self._client = httpx.AsyncClient(
215
+ base_url="https://www.googleapis.com/drive/v3/files",
216
+ headers={"Authorization": f"Bearer {access_token}"},
217
+ timeout=30.0,
218
+ )
219
+
220
+ async def list_files(
221
+ self,
222
+ page_size: int,
223
+ limit: int,
224
+ page_token: str | None = None,
225
+ query: str | None = None,
226
+ folder_id: str | None = None,
227
+ recursive: bool = False,
228
+ ) -> PaginatedResult:
229
+ """
230
+ List files from Google Drive.
231
+
232
+ It's public API for GoogleDriveClient.
233
+
234
+ Args:
235
+ page_size: Number of files to return per 1 gdrive api request.
236
+ limit: Maximum number of files to return.
237
+ page_token: Optional token (specific for gdrive api) allowing to query next page.
238
+ query: Optional query to filter results.
239
+ If not provided it'll list all authorized user files.
240
+ If the query doesn't contain operators (contains, =, etc.), it will be treated as
241
+ a name search: "name contains '{query}'".
242
+ folder_id: The ID of a specific folder to list or search within.
243
+ If omitted, searches the entire Drive.
244
+ recursive: If True, searches all subfolders.
245
+ If False and folder_id is provided, only lists immediate children.
246
+
247
+ Returns
248
+ -------
249
+ List of Google Drive files.
250
+ """
251
+ if page_size <= 0:
252
+ raise GoogleDriveError("Error: page size must be positive.")
253
+ if limit <= 0:
254
+ raise GoogleDriveError("Error: limit must be positive.")
255
+ if limit < page_size:
256
+ raise GoogleDriveError("Error: limit must be bigger than or equal to page size.")
257
+ if limit % page_size != 0:
258
+ raise GoogleDriveError("Error: limit must be multiplication of page size.")
259
+
260
+ page_size = min(page_size, MAX_PAGE_SIZE)
261
+ limit = min(limit, LIMIT)
262
+ formatted_query = self._build_query(query, folder_id)
263
+
264
+ if not recursive or not folder_id:
265
+ files, next_token = await self._fetch_paginated(
266
+ page_size=page_size,
267
+ limit=limit,
268
+ page_token=page_token,
269
+ query=formatted_query,
270
+ )
271
+ return PaginatedResult(files=files, next_page_token=next_token)
272
+
273
+ files = await self._fetch_recursive(
274
+ root_folder_id=folder_id,
275
+ base_query=query,
276
+ page_size=page_size,
277
+ limit=limit,
278
+ )
279
+
280
+ return PaginatedResult(files=files, next_page_token=page_token)
281
+
282
+ async def _fetch_paginated(
283
+ self,
284
+ page_size: int,
285
+ limit: int,
286
+ page_token: str | None,
287
+ query: str | None,
288
+ ) -> tuple[list[GoogleDriveFile], str | None]:
289
+ fetched = 0
290
+ files: list[GoogleDriveFile] = []
291
+ next_page_token = page_token
292
+
293
+ while fetched < limit:
294
+ data = await self._list_files(
295
+ page_size=page_size,
296
+ page_token=next_page_token,
297
+ query=query,
298
+ )
299
+
300
+ files.extend(data.files)
301
+ fetched += len(data.files)
302
+ next_page_token = data.next_page_token
303
+
304
+ if not next_page_token:
305
+ break
306
+
307
+ return files, next_page_token
308
+
309
+ async def _fetch_recursive(
310
+ self,
311
+ root_folder_id: str,
312
+ base_query: str | None,
313
+ page_size: int,
314
+ limit: int,
315
+ ) -> list[GoogleDriveFile]:
316
+ collected: list[GoogleDriveFile] = []
317
+ folders_to_visit: list[str] = [root_folder_id]
318
+
319
+ while folders_to_visit and len(collected) < limit:
320
+ current_folder = folders_to_visit.pop(0)
321
+
322
+ query = self._build_query(base_query, current_folder)
323
+
324
+ files, _ = await self._fetch_paginated(
325
+ page_size=page_size,
326
+ limit=limit - len(collected),
327
+ page_token=None,
328
+ query=query,
329
+ )
330
+
331
+ for file in files:
332
+ collected.append(file)
333
+
334
+ if file.mime_type == GOOGLE_DRIVE_FOLDER_MIME:
335
+ folders_to_visit.append(file.id)
336
+
337
+ if len(collected) >= limit:
338
+ break
339
+
340
+ return collected
341
+
342
+ async def _list_files(
343
+ self,
344
+ page_size: int,
345
+ page_token: str | None = None,
346
+ query: str | None = None,
347
+ ) -> PaginatedResult:
348
+ """Fetch a page of files from Google Drive."""
349
+ params: dict[str, PrimitiveData] = {
350
+ "pageSize": page_size,
351
+ "fields": DEFAULT_FIELDS,
352
+ "orderBy": DEFAULT_ORDER,
353
+ }
354
+ if page_token:
355
+ params["pageToken"] = page_token
356
+ if query:
357
+ params["q"] = query
358
+
359
+ response = await self._client.get(url="/", params=params)
360
+ response.raise_for_status()
361
+ data = response.json()
362
+
363
+ files = [
364
+ GoogleDriveFile.from_api_response(file_data) for file_data in data.get("files", [])
365
+ ]
366
+ next_page_token = data.get("nextPageToken")
367
+ return PaginatedResult(files=files, next_page_token=next_page_token)
368
+
369
+ def _build_query(self, query: str | None, folder_id: str | None) -> str | None:
370
+ """Build Google Drive API query.
371
+
372
+ Args:
373
+ query: Optional search query string (e.g., "name contains 'report'"").
374
+ If the query doesn't contain operators (contains, =, etc.), it will be treated as
375
+ a name search: "name contains '{query}'".
376
+ folder_id: Optional folder id.
377
+ If provided it'll narrow query to search/list only in given folder.
378
+
379
+ Returns
380
+ -------
381
+ Correctly builded query (if provided)
382
+ """
383
+ base_query = self._get_formatted_query(query)
384
+
385
+ if base_query:
386
+ # Case #1 -- Some query provided and contains in parents (gdrive "folder id")
387
+ if "in parents" in base_query and folder_id:
388
+ logger.debug(
389
+ "In-parents (parent folder) already used in query. "
390
+ "Omiting folder_id argument. "
391
+ f"Query: {base_query} | FolderId: {folder_id}"
392
+ )
393
+ return base_query
394
+ # Case #2 -- Some query provided without "in parents" and folder id provided.
395
+ elif folder_id:
396
+ return f"{base_query} and '{folder_id}' in parents"
397
+ # Case #3 -- Query provided without "in parents" and no folder id.
398
+ else:
399
+ return base_query
400
+
401
+ # Case #4 -- Base query is null but folder id provided
402
+ if folder_id:
403
+ return f"'{folder_id}' in parents"
404
+
405
+ # Case #5 -- Neither query not folder provided
406
+ return None
407
+
408
+ @staticmethod
409
+ def _get_formatted_query(query: str | None) -> str | None:
410
+ """Get formatted Google Drive API query.
411
+
412
+ Args:
413
+ query: Optional search query string (e.g., "name contains 'report'"").
414
+ If the query doesn't contain operators (contains, =, etc.), it will be treated as
415
+ a name search: "name contains '{query}'".
416
+
417
+ Returns
418
+ -------
419
+ Correctly formatted query (if provided)
420
+ """
421
+ if not query:
422
+ return None
423
+
424
+ # If query doesn't look like a formatted query (no operators), format it as a name search
425
+ # Check if query already has Google Drive API operators
426
+ has_operator = any(
427
+ op in query for op in [" contains ", "=", "!=", " in ", " and ", " or ", " not "]
428
+ )
429
+ formatted_query = query
430
+ if not has_operator and query.strip():
431
+ # Simple text search - format as name contains query
432
+ # Escape backslashes first, then single quotes for Google Drive API
433
+ escaped_query = query.replace("\\", "\\\\").replace("'", "\\'")
434
+ formatted_query = f"name contains '{escaped_query}'"
435
+ logger.debug(f"Auto-formatted query '{query}' to '{formatted_query}'")
436
+ return formatted_query
437
+
438
+ @staticmethod
439
+ def _is_binary_mime_type(mime_type: str) -> bool:
440
+ """Check if MIME type indicates binary content that's not useful for LLM consumption.
441
+
442
+ Args:
443
+ mime_type: The MIME type to check.
444
+
445
+ Returns
446
+ -------
447
+ True if the MIME type is considered binary, False otherwise.
448
+ """
449
+ return any(mime_type.startswith(prefix) for prefix in BINARY_MIME_PREFIXES)
450
+
451
+ async def get_file_metadata(self, file_id: str) -> GoogleDriveFile:
452
+ """Get file metadata from Google Drive.
453
+
454
+ Args:
455
+ file_id: The ID of the file to get metadata for.
456
+
457
+ Returns
458
+ -------
459
+ GoogleDriveFile with file metadata.
460
+
461
+ Raises
462
+ ------
463
+ GoogleDriveError: If the file is not found or access is denied.
464
+ """
465
+ params = {"fields": SUPPORTED_FIELDS_STR}
466
+ response = await self._client.get(f"/{file_id}", params=params)
467
+
468
+ if response.status_code == 404:
469
+ raise GoogleDriveError(f"File with ID '{file_id}' not found.")
470
+ if response.status_code == 403:
471
+ raise GoogleDriveError(f"Permission denied: you don't have access to file '{file_id}'.")
472
+ if response.status_code == 429:
473
+ raise GoogleDriveError("Rate limit exceeded. Please try again later.")
474
+
475
+ response.raise_for_status()
476
+ return GoogleDriveFile.from_api_response(response.json())
477
+
478
+ async def update_file_metadata(
479
+ self,
480
+ file_id: str,
481
+ new_name: str | None = None,
482
+ starred: bool | None = None,
483
+ trashed: bool | None = None,
484
+ ) -> GoogleDriveFile:
485
+ """Update file metadata in Google Drive.
486
+
487
+ Args:
488
+ file_id: The ID of the file to update.
489
+ new_name: A new name to rename the file. Must not be empty or whitespace.
490
+ starred: Set to True to star the file or False to unstar it.
491
+ trashed: Set to True to trash the file or False to restore it.
492
+
493
+ Returns
494
+ -------
495
+ GoogleDriveFile with updated metadata.
496
+
497
+ Raises
498
+ ------
499
+ GoogleDriveError: If no update fields are provided, file is not found,
500
+ access is denied, or the request is invalid.
501
+ """
502
+ if new_name is None and starred is None and trashed is None:
503
+ raise GoogleDriveError(
504
+ "At least one of new_name, starred, or trashed must be provided."
505
+ )
506
+
507
+ if new_name is not None and not new_name.strip():
508
+ raise GoogleDriveError("new_name cannot be empty or whitespace.")
509
+
510
+ body: dict[str, Any] = {}
511
+ if new_name is not None:
512
+ body["name"] = new_name
513
+ if starred is not None:
514
+ body["starred"] = starred
515
+ if trashed is not None:
516
+ body["trashed"] = trashed
517
+
518
+ response = await self._client.patch(
519
+ f"/{file_id}",
520
+ json=body,
521
+ params={"fields": SUPPORTED_FIELDS_STR, "supportsAllDrives": "true"},
522
+ )
523
+
524
+ if response.status_code == 404:
525
+ raise GoogleDriveError(f"File with ID '{file_id}' not found.")
526
+ if response.status_code == 403:
527
+ raise GoogleDriveError(
528
+ f"Permission denied: you don't have permission to update file '{file_id}'."
529
+ )
530
+ if response.status_code == 400:
531
+ raise GoogleDriveError("Bad request: invalid parameters for file update.")
532
+ if response.status_code == 429:
533
+ raise GoogleDriveError("Rate limit exceeded. Please try again later.")
534
+
535
+ response.raise_for_status()
536
+ return GoogleDriveFile.from_api_response(response.json())
537
+
538
+ async def _export_workspace_file(self, file_id: str, export_mime_type: str) -> str:
539
+ """Export a Google Workspace file to the specified format.
540
+
541
+ Args:
542
+ file_id: The ID of the Google Workspace file.
543
+ export_mime_type: The MIME type to export to (e.g., 'text/markdown').
544
+
545
+ Returns
546
+ -------
547
+ The exported content as a string.
548
+
549
+ Raises
550
+ ------
551
+ GoogleDriveError: If export fails.
552
+ """
553
+ response = await self._client.get(
554
+ f"/{file_id}/export",
555
+ params={"mimeType": export_mime_type},
556
+ )
557
+
558
+ if response.status_code == 404:
559
+ raise GoogleDriveError(f"File with ID '{file_id}' not found.")
560
+ if response.status_code == 403:
561
+ raise GoogleDriveError(
562
+ f"Permission denied: you don't have access to export file '{file_id}'."
563
+ )
564
+ if response.status_code == 400:
565
+ raise GoogleDriveError(
566
+ f"Cannot export file '{file_id}' to format '{export_mime_type}'. "
567
+ "The file may not support this export format."
568
+ )
569
+ if response.status_code == 429:
570
+ raise GoogleDriveError("Rate limit exceeded. Please try again later.")
571
+
572
+ response.raise_for_status()
573
+ return response.text
574
+
575
+ async def _download_file(self, file_id: str) -> str:
576
+ """Download a regular file's content from Google Drive as text."""
577
+ content = await self._download_file_bytes(file_id)
578
+ return content.decode("utf-8")
579
+
580
+ async def _download_file_bytes(self, file_id: str) -> bytes:
581
+ """Download a file's content as bytes from Google Drive.
582
+
583
+ Args:
584
+ file_id: The ID of the file to download.
585
+
586
+ Returns
587
+ -------
588
+ The file content as bytes.
589
+
590
+ Raises
591
+ ------
592
+ GoogleDriveError: If download fails.
593
+ """
594
+ response = await self._client.get(
595
+ f"/{file_id}",
596
+ params={"alt": "media"},
597
+ )
598
+
599
+ if response.status_code == 404:
600
+ raise GoogleDriveError(f"File with ID '{file_id}' not found.")
601
+ if response.status_code == 403:
602
+ raise GoogleDriveError(
603
+ f"Permission denied: you don't have access to download file '{file_id}'."
604
+ )
605
+ if response.status_code == 429:
606
+ raise GoogleDriveError("Rate limit exceeded. Please try again later.")
607
+
608
+ response.raise_for_status()
609
+ return response.content
610
+
611
+ def _extract_text_from_pdf(self, pdf_bytes: bytes) -> str:
612
+ """Extract text from PDF bytes using pypdf.
613
+
614
+ Args:
615
+ pdf_bytes: The PDF file content as bytes.
616
+
617
+ Returns
618
+ -------
619
+ Extracted text from the PDF.
620
+
621
+ Raises
622
+ ------
623
+ GoogleDriveError: If PDF text extraction fails.
624
+ """
625
+ try:
626
+ reader = PdfReader(io.BytesIO(pdf_bytes))
627
+ text_parts = []
628
+ for page in reader.pages:
629
+ page_text = page.extract_text()
630
+ if page_text:
631
+ text_parts.append(page_text)
632
+ return "\n\n".join(text_parts)
633
+ except Exception as e:
634
+ raise GoogleDriveError(f"Failed to extract text from PDF: {e}")
635
+
636
+ async def read_file_content(
637
+ self, file_id: str, target_format: str | None = None
638
+ ) -> GoogleDriveFileContent:
639
+ """Read the content of a file from Google Drive.
640
+
641
+ Google Workspace files (Docs, Sheets, Slides) are automatically exported to
642
+ LLM-readable formats:
643
+ - Google Docs -> Markdown (text/markdown)
644
+ - Google Sheets -> CSV (text/csv)
645
+ - Google Slides -> Plain text (text/plain)
646
+ - PDF files -> Extracted text (text/plain)
647
+
648
+ Regular text files are downloaded directly.
649
+ Binary files (images, videos, etc.) will raise an error.
650
+
651
+ Args:
652
+ file_id: The ID of the file to read.
653
+ target_format: Optional MIME type to export Google Workspace files to.
654
+ If not specified, uses sensible defaults. Has no effect on non-Workspace files.
655
+
656
+ Returns
657
+ -------
658
+ GoogleDriveFileContent with the file content and metadata.
659
+
660
+ Raises
661
+ ------
662
+ GoogleDriveError: If the file cannot be read (not found, permission denied,
663
+ binary file, etc.).
664
+ """
665
+ file_metadata = await self.get_file_metadata(file_id)
666
+ original_mime_type = file_metadata.mime_type
667
+
668
+ if self._is_binary_mime_type(original_mime_type):
669
+ raise GoogleDriveError(
670
+ f"Binary files are not supported for reading. "
671
+ f"File '{file_metadata.name}' has MIME type '{original_mime_type}'."
672
+ )
673
+
674
+ if original_mime_type == GOOGLE_DRIVE_FOLDER_MIME:
675
+ raise GoogleDriveError(
676
+ f"Cannot read content of a folder. '{file_metadata.name}' is a folder, not a file."
677
+ )
678
+
679
+ was_exported = False
680
+ if original_mime_type in GOOGLE_WORKSPACE_EXPORT_MIMES:
681
+ export_mime = target_format or GOOGLE_WORKSPACE_EXPORT_MIMES[original_mime_type]
682
+ content = await self._export_workspace_file(file_id, export_mime)
683
+ result_mime_type = export_mime
684
+ was_exported = True
685
+ elif original_mime_type == PDF_MIME_TYPE:
686
+ pdf_bytes = await self._download_file_bytes(file_id)
687
+ content = self._extract_text_from_pdf(pdf_bytes)
688
+ result_mime_type = "text/plain"
689
+ was_exported = True
690
+ else:
691
+ content = await self._download_file(file_id)
692
+ result_mime_type = original_mime_type
693
+
694
+ return GoogleDriveFileContent(
695
+ id=file_metadata.id,
696
+ name=file_metadata.name,
697
+ mime_type=result_mime_type,
698
+ content=content,
699
+ original_mime_type=original_mime_type,
700
+ was_exported=was_exported,
701
+ size=file_metadata.size,
702
+ web_view_link=file_metadata.web_view_link,
703
+ )
704
+
705
+ async def create_file(
706
+ self,
707
+ name: str,
708
+ mime_type: str,
709
+ parent_id: str | None = None,
710
+ initial_content: str | None = None,
711
+ ) -> GoogleDriveFile:
712
+ """Create a new file or folder in Google Drive.
713
+
714
+ Creates a new file with the specified name and MIME type. Optionally places
715
+ it in a specific folder and populates it with initial content.
716
+
717
+ For Google Workspace files (Docs, Sheets), the Drive API automatically
718
+ converts plain text content to the appropriate format.
719
+
720
+ Args:
721
+ name: The name for the new file or folder.
722
+ mime_type: The MIME type of the file (e.g., 'text/plain',
723
+ 'application/vnd.google-apps.document',
724
+ 'application/vnd.google-apps.folder').
725
+ parent_id: Optional ID of the parent folder. If not specified,
726
+ the file is created in the root of the user's Drive.
727
+ initial_content: Optional text content to populate the file.
728
+ Ignored for folders.
729
+
730
+ Returns
731
+ -------
732
+ GoogleDriveFile with the created file's metadata.
733
+
734
+ Raises
735
+ ------
736
+ GoogleDriveError: If file creation fails (permission denied,
737
+ parent not found, rate limited, etc.).
738
+ """
739
+ metadata: dict[str, Any] = {
740
+ "name": name,
741
+ "mimeType": mime_type,
742
+ }
743
+ if parent_id:
744
+ metadata["parents"] = [parent_id]
745
+
746
+ if mime_type == GOOGLE_DRIVE_FOLDER_MIME or not initial_content:
747
+ response = await self._client.post(
748
+ "/",
749
+ json=metadata,
750
+ params={"fields": SUPPORTED_FIELDS_STR, "supportsAllDrives": "true"},
751
+ )
752
+ else:
753
+ response = await self._create_file_with_content(
754
+ metadata=metadata,
755
+ content=initial_content,
756
+ target_mime_type=mime_type,
757
+ )
758
+
759
+ if response.status_code == 404:
760
+ raise GoogleDriveError(
761
+ f"Parent folder with ID '{parent_id}' not found."
762
+ if parent_id
763
+ else "Resource not found."
764
+ )
765
+ if response.status_code == 403:
766
+ raise GoogleDriveError(
767
+ "Permission denied: you don't have permission to create files in this location."
768
+ )
769
+ if response.status_code == 400:
770
+ raise GoogleDriveError(
771
+ f"Bad request: invalid parameters for file creation. "
772
+ f"Check that the MIME type '{mime_type}' is valid."
773
+ )
774
+ if response.status_code == 429:
775
+ raise GoogleDriveError("Rate limit exceeded. Please try again later.")
776
+
777
+ response.raise_for_status()
778
+ return GoogleDriveFile.from_api_response(response.json())
779
+
780
+ async def _create_file_with_content(
781
+ self,
782
+ metadata: dict[str, Any],
783
+ content: str,
784
+ target_mime_type: str,
785
+ ) -> httpx.Response:
786
+ """Create a file with content using multipart upload.
787
+
788
+ Args:
789
+ metadata: File metadata dictionary.
790
+ content: Text content for the file.
791
+ target_mime_type: The target MIME type for the file.
792
+
793
+ Returns
794
+ -------
795
+ The HTTP response from the upload.
796
+ """
797
+ content_type = UPLOAD_CONTENT_TYPES.get(target_mime_type, "text/plain")
798
+ boundary = f"===gdrive_boundary_{uuid.uuid4().hex}==="
799
+ body_parts = [
800
+ f"--{boundary}",
801
+ "Content-Type: application/json; charset=UTF-8",
802
+ "",
803
+ json.dumps(metadata),
804
+ f"--{boundary}",
805
+ f"Content-Type: {content_type}",
806
+ "",
807
+ content,
808
+ f"--{boundary}--",
809
+ ]
810
+ body = "\r\n".join(body_parts)
811
+
812
+ upload_url = "https://www.googleapis.com/upload/drive/v3/files"
813
+ return await self._client.post(
814
+ upload_url,
815
+ content=body.encode("utf-8"),
816
+ params={
817
+ "uploadType": "multipart",
818
+ "fields": SUPPORTED_FIELDS_STR,
819
+ "supportsAllDrives": "true",
820
+ },
821
+ headers={"Content-Type": f"multipart/related; boundary={boundary}"},
822
+ )
823
+
824
+ async def __aenter__(self) -> "GoogleDriveClient":
825
+ """Async context manager entry."""
826
+ return self
827
+
828
+ async def __aexit__(
829
+ self, exc_type: type[BaseException] | None, exc_val: BaseException | None, exc_tb: Any
830
+ ) -> None:
831
+ """Async context manager exit."""
832
+ await self._client.aclose()