arcade-google 0.1.5__py3-none-any.whl → 1.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,70 +1,194 @@
1
- from typing import Annotated, Optional
1
+ from typing import Annotated, Any
2
2
 
3
- from arcade.sdk import ToolContext, tool
4
- from arcade.sdk.auth import Google
5
- from arcade_google.tools.utils import build_drive_service, remove_none_values
3
+ from arcade_tdk import ToolContext, tool
4
+ from arcade_tdk.auth import Google
5
+ from googleapiclient.errors import HttpError
6
6
 
7
- from .models import Corpora, OrderBy
7
+ from arcade_google.doc_to_html import convert_document_to_html
8
+ from arcade_google.doc_to_markdown import convert_document_to_markdown
9
+ from arcade_google.models import DocumentFormat, OrderBy
10
+ from arcade_google.tools import get_document_by_id
11
+ from arcade_google.utils import (
12
+ build_drive_service,
13
+ build_file_tree,
14
+ build_file_tree_request_params,
15
+ build_files_list_params,
16
+ )
17
+
18
+
19
+ @tool(
20
+ requires_auth=Google(
21
+ scopes=["https://www.googleapis.com/auth/drive.file"],
22
+ )
23
+ )
24
+ async def get_file_tree_structure(
25
+ context: ToolContext,
26
+ include_shared_drives: Annotated[
27
+ bool, "Whether to include shared drives in the file tree structure. Defaults to False."
28
+ ] = False,
29
+ restrict_to_shared_drive_id: Annotated[
30
+ str | None,
31
+ "If provided, only include files from this shared drive in the file tree structure. "
32
+ "Defaults to None, which will include files and folders from all drives.",
33
+ ] = None,
34
+ include_organization_domain_documents: Annotated[
35
+ bool,
36
+ "Whether to include documents from the organization's domain. This is applicable to admin "
37
+ "users who have permissions to view organization-wide documents in a Google Workspace "
38
+ "account. Defaults to False.",
39
+ ] = False,
40
+ order_by: Annotated[
41
+ list[OrderBy] | None,
42
+ "Sort order. Defaults to listing the most recently modified documents first",
43
+ ] = None,
44
+ limit: Annotated[
45
+ int | None,
46
+ "The number of files and folders to list. Defaults to None, "
47
+ "which will list all files and folders.",
48
+ ] = None,
49
+ ) -> Annotated[
50
+ dict,
51
+ "A dictionary containing the file/folder tree structure in the user's Google Drive",
52
+ ]:
53
+ """
54
+ Get the file/folder tree structure of the user's Google Drive.
55
+ """
56
+ service = build_drive_service(
57
+ context.authorization.token if context.authorization and context.authorization.token else ""
58
+ )
59
+
60
+ keep_paginating = True
61
+ page_token = None
62
+ files = {}
63
+ file_tree: dict[str, list[dict]] = {"My Drive": []}
64
+
65
+ params = build_file_tree_request_params(
66
+ order_by,
67
+ page_token,
68
+ limit,
69
+ include_shared_drives,
70
+ restrict_to_shared_drive_id,
71
+ include_organization_domain_documents,
72
+ )
73
+
74
+ while keep_paginating:
75
+ # Get a list of files
76
+ results = service.files().list(**params).execute()
77
+
78
+ # Update page token
79
+ page_token = results.get("nextPageToken")
80
+ params["pageToken"] = page_token
81
+ keep_paginating = page_token is not None
82
+
83
+ for file in results.get("files", []):
84
+ files[file["id"]] = file
85
+
86
+ if not files:
87
+ return {"drives": []}
88
+
89
+ file_tree = build_file_tree(files)
90
+
91
+ drives = []
92
+
93
+ for drive_id, files in file_tree.items(): # type: ignore[assignment]
94
+ if drive_id == "My Drive":
95
+ drive = {"name": "My Drive", "children": files}
96
+ else:
97
+ try:
98
+ drive_details = service.drives().get(driveId=drive_id).execute()
99
+ drive_name = drive_details.get("name", "Shared Drive (name unavailable)")
100
+ except HttpError as e:
101
+ drive_name = (
102
+ f"Shared Drive (name unavailable: 'HttpError {e.status_code}: {e.reason}')"
103
+ )
104
+
105
+ drive = {"name": drive_name, "id": drive_id, "children": files}
106
+
107
+ drives.append(drive)
108
+
109
+ return {"drives": drives}
8
110
 
9
111
 
10
112
  # Implements: https://googleapis.github.io/google-api-python-client/docs/dyn/drive_v3.files.html#list
11
113
  # Example `arcade chat` query: `list my 5 most recently modified documents`
12
- # TODO: Support query with natural language. Currently, the tool expects a fully formed query string as input with the syntax defined here: https://developers.google.com/drive/api/guides/search-files
114
+ # TODO: Support query with natural language. Currently, the tool expects a fully formed query
115
+ # string as input with the syntax defined here: https://developers.google.com/drive/api/guides/search-files
13
116
  @tool(
14
117
  requires_auth=Google(
15
- scopes=["https://www.googleapis.com/auth/drive.readonly"],
118
+ scopes=["https://www.googleapis.com/auth/drive.file"],
16
119
  )
17
120
  )
18
- async def list_documents(
121
+ async def search_documents(
19
122
  context: ToolContext,
20
- corpora: Annotated[Optional[Corpora], "The source of files to list"] = Corpora.USER,
21
- title_keywords: Annotated[
22
- Optional[list[str]], "Keywords or phrases that must be in the document title"
123
+ document_contains: Annotated[
124
+ list[str] | None,
125
+ "Keywords or phrases that must be in the document title or body. Provide a list of "
126
+ "keywords or phrases if needed.",
127
+ ] = None,
128
+ document_not_contains: Annotated[
129
+ list[str] | None,
130
+ "Keywords or phrases that must NOT be in the document title or body. Provide a list of "
131
+ "keywords or phrases if needed.",
23
132
  ] = None,
133
+ search_only_in_shared_drive_id: Annotated[
134
+ str | None,
135
+ "The ID of the shared drive to restrict the search to. If provided, the search will only "
136
+ "return documents from this drive. Defaults to None, which searches across all drives.",
137
+ ] = None,
138
+ include_shared_drives: Annotated[
139
+ bool,
140
+ "Whether to include documents from shared drives. Defaults to False (searches only in "
141
+ "the user's 'My Drive').",
142
+ ] = False,
143
+ include_organization_domain_documents: Annotated[
144
+ bool,
145
+ "Whether to include documents from the organization's domain. This is applicable to admin "
146
+ "users who have permissions to view organization-wide documents in a Google Workspace "
147
+ "account. Defaults to False.",
148
+ ] = False,
24
149
  order_by: Annotated[
25
- Optional[OrderBy],
150
+ list[OrderBy] | None,
26
151
  "Sort order. Defaults to listing the most recently modified documents first",
27
- ] = OrderBy.MODIFIED_TIME_DESC,
28
- supports_all_drives: Annotated[
29
- Optional[bool],
30
- "Whether the requesting application supports both My Drives and shared drives",
31
- ] = False,
32
- limit: Annotated[Optional[int], "The number of documents to list"] = 50,
152
+ ] = None,
153
+ limit: Annotated[int, "The number of documents to list"] = 50,
154
+ pagination_token: Annotated[
155
+ str | None, "The pagination token to continue a previous request"
156
+ ] = None,
33
157
  ) -> Annotated[
34
158
  dict,
35
- "A dictionary containing 'documents_count' (number of documents returned) and 'documents' (a list of document details including 'kind', 'mimeType', 'id', and 'name' for each document)",
159
+ "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
160
+ "(a list of document details including 'kind', 'mimeType', 'id', and 'name' for each document)",
36
161
  ]:
37
162
  """
38
- List documents in the user's Google Drive. Excludes documents that are in the trash.
163
+ Searches for documents in the user's Google Drive. Excludes documents that are in the trash.
39
164
  """
165
+ if order_by is None:
166
+ order_by = [OrderBy.MODIFIED_TIME_DESC]
167
+ elif isinstance(order_by, OrderBy):
168
+ order_by = [order_by]
169
+
40
170
  page_size = min(10, limit)
41
- page_token = None # The page token is used for continuing a previous request on the next page
42
- files = []
43
-
44
- service = build_drive_service(context.authorization.token)
45
-
46
- query = "mimeType = 'application/vnd.google-apps.document' and trashed = false"
47
- if title_keywords:
48
- # Escape single quotes in title_keywords
49
- title_keywords = [keyword.replace("'", "\\'") for keyword in title_keywords]
50
- # Only support logically ANDed keywords in query for now
51
- keyword_queries = [f"name contains '{keyword}'" for keyword in title_keywords]
52
- query += " and " + " and ".join(keyword_queries)
53
-
54
- # Prepare the request parameters
55
- params = {
56
- "q": query,
57
- "pageSize": page_size,
58
- "orderBy": order_by.value,
59
- "corpora": corpora.value,
60
- "supportsAllDrives": supports_all_drives,
61
- }
62
- params = remove_none_values(params)
63
-
64
- # Paginate through the results until the limit is reached
171
+ files: list[dict[str, Any]] = []
172
+
173
+ service = build_drive_service(
174
+ context.authorization.token if context.authorization and context.authorization.token else ""
175
+ )
176
+
177
+ params = build_files_list_params(
178
+ mime_type="application/vnd.google-apps.document",
179
+ document_contains=document_contains,
180
+ document_not_contains=document_not_contains,
181
+ page_size=page_size,
182
+ order_by=order_by,
183
+ pagination_token=pagination_token,
184
+ include_shared_drives=include_shared_drives,
185
+ search_only_in_shared_drive_id=search_only_in_shared_drive_id,
186
+ include_organization_domain_documents=include_organization_domain_documents,
187
+ )
188
+
65
189
  while len(files) < limit:
66
- if page_token:
67
- params["pageToken"] = page_token
190
+ if pagination_token:
191
+ params["pageToken"] = pagination_token
68
192
  else:
69
193
  params.pop("pageToken", None)
70
194
 
@@ -72,8 +196,92 @@ async def list_documents(
72
196
  batch = results.get("files", [])
73
197
  files.extend(batch[: limit - len(files)])
74
198
 
75
- page_token = results.get("nextPageToken")
76
- if not page_token or len(batch) < page_size:
199
+ pagination_token = results.get("nextPageToken")
200
+ if not pagination_token or len(batch) < page_size:
77
201
  break
78
202
 
79
203
  return {"documents_count": len(files), "documents": files}
204
+
205
+
206
+ @tool(
207
+ requires_auth=Google(
208
+ scopes=["https://www.googleapis.com/auth/drive.file"],
209
+ )
210
+ )
211
+ async def search_and_retrieve_documents(
212
+ context: ToolContext,
213
+ return_format: Annotated[
214
+ DocumentFormat,
215
+ "The format of the document to return. Defaults to Markdown.",
216
+ ] = DocumentFormat.MARKDOWN,
217
+ document_contains: Annotated[
218
+ list[str] | None,
219
+ "Keywords or phrases that must be in the document title or body. Provide a list of "
220
+ "keywords or phrases if needed.",
221
+ ] = None,
222
+ document_not_contains: Annotated[
223
+ list[str] | None,
224
+ "Keywords or phrases that must NOT be in the document title or body. Provide a list of "
225
+ "keywords or phrases if needed.",
226
+ ] = None,
227
+ search_only_in_shared_drive_id: Annotated[
228
+ str | None,
229
+ "The ID of the shared drive to restrict the search to. If provided, the search will only "
230
+ "return documents from this drive. Defaults to None, which searches across all drives.",
231
+ ] = None,
232
+ include_shared_drives: Annotated[
233
+ bool,
234
+ "Whether to include documents from shared drives. Defaults to False (searches only in "
235
+ "the user's 'My Drive').",
236
+ ] = False,
237
+ include_organization_domain_documents: Annotated[
238
+ bool,
239
+ "Whether to include documents from the organization's domain. This is applicable to admin "
240
+ "users who have permissions to view organization-wide documents in a Google Workspace "
241
+ "account. Defaults to False.",
242
+ ] = False,
243
+ order_by: Annotated[
244
+ list[OrderBy] | None,
245
+ "Sort order. Defaults to listing the most recently modified documents first",
246
+ ] = None,
247
+ limit: Annotated[int, "The number of documents to list"] = 50,
248
+ pagination_token: Annotated[
249
+ str | None, "The pagination token to continue a previous request"
250
+ ] = None,
251
+ ) -> Annotated[
252
+ dict,
253
+ "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
254
+ "(a list of documents with their content).",
255
+ ]:
256
+ """
257
+ Searches for documents in the user's Google Drive and returns a list of documents (with text
258
+ content) matching the search criteria. Excludes documents that are in the trash.
259
+
260
+ Note: use this tool only when the user prompt requires the documents' content. If the user only
261
+ needs a list of documents, use the `search_documents` tool instead.
262
+ """
263
+ response = await search_documents(
264
+ context=context,
265
+ document_contains=document_contains,
266
+ document_not_contains=document_not_contains,
267
+ search_only_in_shared_drive_id=search_only_in_shared_drive_id,
268
+ include_shared_drives=include_shared_drives,
269
+ include_organization_domain_documents=include_organization_domain_documents,
270
+ order_by=order_by,
271
+ limit=limit,
272
+ pagination_token=pagination_token,
273
+ )
274
+
275
+ documents = []
276
+
277
+ for item in response["documents"]:
278
+ document = await get_document_by_id(context, document_id=item["id"])
279
+
280
+ if return_format == DocumentFormat.MARKDOWN:
281
+ document = convert_document_to_markdown(document)
282
+ elif return_format == DocumentFormat.HTML:
283
+ document = convert_document_to_html(document)
284
+
285
+ documents.append(document)
286
+
287
+ return {"documents_count": len(documents), "documents": documents}
@@ -0,0 +1,54 @@
1
+ import base64
2
+ import json
3
+ from typing import Annotated
4
+
5
+ from arcade_tdk import ToolContext, ToolMetadataKey, tool
6
+ from arcade_tdk.auth import Google
7
+ from arcade_tdk.errors import ToolExecutionError
8
+
9
+
10
+ @tool(
11
+ requires_auth=Google(),
12
+ requires_metadata=[ToolMetadataKey.CLIENT_ID, ToolMetadataKey.COORDINATOR_URL],
13
+ )
14
+ def generate_google_file_picker_url(
15
+ context: ToolContext,
16
+ ) -> Annotated[dict, "Google File Picker URL for user file selection and permission granting"]:
17
+ """Generate a Google File Picker URL for user-driven file selection and authorization.
18
+
19
+ This tool generates a URL that directs the end-user to a Google File Picker interface where
20
+ where they can select or upload Google Drive files. Users can grant permission to access their
21
+ Drive files, providing a secure and authorized way to interact with their files.
22
+
23
+ This is particularly useful when prior tools (e.g., those accessing or modifying
24
+ Google Docs, Google Sheets, etc.) encountered failures due to file non-existence
25
+ (Requested entity was not found) or permission errors. Once the user completes the file
26
+ picker flow, the prior tool can be retried.
27
+ """
28
+ client_id = context.get_metadata(ToolMetadataKey.CLIENT_ID)
29
+ client_id_parts = client_id.split("-")
30
+ if not client_id_parts:
31
+ raise ToolExecutionError(
32
+ message="Invalid Google Client ID",
33
+ developer_message=f"Google Client ID '{client_id}' is not valid",
34
+ )
35
+ app_id = client_id_parts[0]
36
+ cloud_coordinator_url = context.get_metadata(ToolMetadataKey.COORDINATOR_URL).strip("/")
37
+
38
+ config = {
39
+ "auth": {
40
+ "client_id": client_id,
41
+ "app_id": app_id,
42
+ },
43
+ }
44
+ config_json = json.dumps(config)
45
+ config_base64 = base64.urlsafe_b64encode(config_json.encode("utf-8")).decode("utf-8")
46
+ url = f"{cloud_coordinator_url}/google/drive_picker?config={config_base64}"
47
+
48
+ return {
49
+ "url": url,
50
+ "llm_instructions": (
51
+ "Instruct the user to click the following link to open the Google Drive File Picker. "
52
+ "This will allow them to select files and grant access permissions: {url}"
53
+ ),
54
+ }