arcade-google-docs 2.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,17 @@
1
+ from arcade_google_docs.tools import (
2
+ create_blank_document,
3
+ create_document_from_text,
4
+ get_document_by_id,
5
+ insert_text_at_end_of_document,
6
+ search_and_retrieve_documents,
7
+ search_documents,
8
+ )
9
+
10
+ __all__ = [
11
+ "create_blank_document",
12
+ "create_document_from_text",
13
+ "get_document_by_id",
14
+ "insert_text_at_end_of_document",
15
+ "search_and_retrieve_documents",
16
+ "search_documents",
17
+ ]
@@ -0,0 +1,24 @@
1
+ import functools
2
+ from collections.abc import Callable
3
+ from typing import Any
4
+
5
+ from arcade_tdk import ToolContext
6
+ from googleapiclient.errors import HttpError
7
+
8
+ from arcade_google_docs.file_picker import generate_google_file_picker_url
9
+
10
+
11
+ def with_filepicker_fallback(func: Callable[..., Any]) -> Callable[..., Any]:
12
+ """ """
13
+
14
+ @functools.wraps(func)
15
+ async def async_wrapper(context: ToolContext, *args: Any, **kwargs: Any) -> Any:
16
+ try:
17
+ return await func(context, *args, **kwargs)
18
+ except HttpError as e:
19
+ if e.status_code in [403, 404]:
20
+ file_picker_response = generate_google_file_picker_url(context)
21
+ return file_picker_response
22
+ raise
23
+
24
+ return async_wrapper
@@ -0,0 +1,99 @@
1
+ def convert_document_to_html(document: dict) -> str:
2
+ html = (
3
+ "<html><head>"
4
+ f"<title>{document['title']}</title>"
5
+ f'<meta name="documentId" content="{document["documentId"]}">'
6
+ "</head><body>"
7
+ )
8
+ for element in document["body"]["content"]:
9
+ html += convert_structural_element(element)
10
+ html += "</body></html>"
11
+ return html
12
+
13
+
14
+ def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> str:
15
+ if "sectionBreak" in element or "tableOfContents" in element:
16
+ return ""
17
+
18
+ elif "paragraph" in element:
19
+ paragraph_content = ""
20
+
21
+ prepend, append = get_paragraph_style_tags(
22
+ style=element["paragraph"]["paragraphStyle"],
23
+ wrap_paragraphs=wrap_paragraphs,
24
+ )
25
+
26
+ for item in element["paragraph"]["elements"]:
27
+ if "textRun" not in item:
28
+ continue
29
+ paragraph_content += extract_paragraph_content(item["textRun"])
30
+
31
+ if not paragraph_content:
32
+ return ""
33
+
34
+ return f"{prepend}{paragraph_content.strip()}{append}"
35
+
36
+ elif "table" in element:
37
+ table = [
38
+ [
39
+ "".join([
40
+ convert_structural_element(element=cell_element, wrap_paragraphs=False)
41
+ for cell_element in cell["content"]
42
+ ])
43
+ for cell in row["tableCells"]
44
+ ]
45
+ for row in element["table"]["tableRows"]
46
+ ]
47
+ return table_list_to_html(table)
48
+
49
+ else:
50
+ raise ValueError(f"Unknown document body element type: {element}")
51
+
52
+
53
+ def extract_paragraph_content(text_run: dict) -> str:
54
+ content = text_run["content"]
55
+ style = text_run["textStyle"]
56
+ return apply_text_style(content, style)
57
+
58
+
59
+ def apply_text_style(content: str, style: dict) -> str:
60
+ content = content.rstrip("\n")
61
+ content = content.replace("\n", "<br>")
62
+ italic = style.get("italic", False)
63
+ bold = style.get("bold", False)
64
+ if italic:
65
+ content = f"<i>{content}</i>"
66
+ if bold:
67
+ content = f"<b>{content}</b>"
68
+ return content
69
+
70
+
71
+ def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple[str, str]:
72
+ named_style = style["namedStyleType"]
73
+ if named_style == "NORMAL_TEXT":
74
+ return ("<p>", "</p>") if wrap_paragraphs else ("", "")
75
+ elif named_style == "TITLE":
76
+ return "<h1>", "</h1>"
77
+ elif named_style == "SUBTITLE":
78
+ return "<h2>", "</h2>"
79
+ elif named_style.startswith("HEADING_"):
80
+ try:
81
+ heading_level = int(named_style.split("_")[1])
82
+ except ValueError:
83
+ return ("<p>", "</p>") if wrap_paragraphs else ("", "")
84
+ else:
85
+ return f"<h{heading_level}>", f"</h{heading_level}>"
86
+ return ("<p>", "</p>") if wrap_paragraphs else ("", "")
87
+
88
+
89
+ def table_list_to_html(table: list[list[str]]) -> str:
90
+ html = "<table>"
91
+ for row in table:
92
+ html += "<tr>"
93
+ for cell in row:
94
+ if cell.endswith("<br>"):
95
+ cell = cell[:-4]
96
+ html += f"<td>{cell}</td>"
97
+ html += "</tr>"
98
+ html += "</table>"
99
+ return html
@@ -0,0 +1,64 @@
1
+ import arcade_google_docs.doc_to_html as doc_to_html
2
+
3
+
4
+ def convert_document_to_markdown(document: dict) -> str:
5
+ md = f"---\ntitle: {document['title']}\ndocumentId: {document['documentId']}\n---\n"
6
+ for element in document["body"]["content"]:
7
+ md += convert_structural_element(element)
8
+ return md
9
+
10
+
11
+ def convert_structural_element(element: dict) -> str:
12
+ if "sectionBreak" in element or "tableOfContents" in element:
13
+ return ""
14
+
15
+ elif "paragraph" in element:
16
+ md = ""
17
+ prepend = get_paragraph_style_prepend_str(element["paragraph"]["paragraphStyle"])
18
+ for item in element["paragraph"]["elements"]:
19
+ if "textRun" not in item:
20
+ continue
21
+ content = extract_paragraph_content(item["textRun"])
22
+ md += f"{prepend}{content}"
23
+ return md
24
+
25
+ elif "table" in element:
26
+ return doc_to_html.convert_structural_element(element)
27
+
28
+ else:
29
+ raise ValueError(f"Unknown document body element type: {element}")
30
+
31
+
32
+ def extract_paragraph_content(text_run: dict) -> str:
33
+ content = text_run["content"]
34
+ style = text_run["textStyle"]
35
+ return apply_text_style(content, style)
36
+
37
+
38
+ def apply_text_style(content: str, style: dict) -> str:
39
+ append = "\n" if content.endswith("\n") else ""
40
+ content = content.rstrip("\n")
41
+ italic = style.get("italic", False)
42
+ bold = style.get("bold", False)
43
+ if italic:
44
+ content = f"_{content}_"
45
+ if bold:
46
+ content = f"**{content}**"
47
+ return f"{content}{append}"
48
+
49
+
50
+ def get_paragraph_style_prepend_str(style: dict) -> str:
51
+ named_style = style["namedStyleType"]
52
+ if named_style == "NORMAL_TEXT":
53
+ return ""
54
+ elif named_style == "TITLE":
55
+ return "# "
56
+ elif named_style == "SUBTITLE":
57
+ return "## "
58
+ elif named_style.startswith("HEADING_"):
59
+ try:
60
+ heading_level = int(named_style.split("_")[1])
61
+ return f"{'#' * heading_level} "
62
+ except ValueError:
63
+ return ""
64
+ return ""
@@ -0,0 +1,116 @@
1
+ from enum import Enum
2
+
3
+
4
+ class Corpora(str, Enum):
5
+ """
6
+ Bodies of items (files/documents) to which the query applies.
7
+ Prefer 'user' or 'drive' to 'allDrives' for efficiency.
8
+ By default, corpora is set to 'user'.
9
+ """
10
+
11
+ USER = "user"
12
+ DOMAIN = "domain"
13
+ DRIVE = "drive"
14
+ ALL_DRIVES = "allDrives"
15
+
16
+
17
+ class DocumentFormat(str, Enum):
18
+ MARKDOWN = "markdown"
19
+ HTML = "html"
20
+ GOOGLE_API_JSON = "google_api_json"
21
+
22
+
23
+ class OrderBy(str, Enum):
24
+ """
25
+ Sort keys for ordering files in Google Drive.
26
+ Each key has both ascending and descending options.
27
+ """
28
+
29
+ CREATED_TIME = (
30
+ # When the file was created (ascending)
31
+ "createdTime"
32
+ )
33
+ CREATED_TIME_DESC = (
34
+ # When the file was created (descending)
35
+ "createdTime desc"
36
+ )
37
+ FOLDER = (
38
+ # The folder ID, sorted using alphabetical ordering (ascending)
39
+ "folder"
40
+ )
41
+ FOLDER_DESC = (
42
+ # The folder ID, sorted using alphabetical ordering (descending)
43
+ "folder desc"
44
+ )
45
+ MODIFIED_BY_ME_TIME = (
46
+ # The last time the file was modified by the user (ascending)
47
+ "modifiedByMeTime"
48
+ )
49
+ MODIFIED_BY_ME_TIME_DESC = (
50
+ # The last time the file was modified by the user (descending)
51
+ "modifiedByMeTime desc"
52
+ )
53
+ MODIFIED_TIME = (
54
+ # The last time the file was modified by anyone (ascending)
55
+ "modifiedTime"
56
+ )
57
+ MODIFIED_TIME_DESC = (
58
+ # The last time the file was modified by anyone (descending)
59
+ "modifiedTime desc"
60
+ )
61
+ NAME = (
62
+ # The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (ascending)
63
+ "name"
64
+ )
65
+ NAME_DESC = (
66
+ # The name of the file, sorted using alphabetical ordering (e.g., 1, 12, 2, 22) (descending)
67
+ "name desc"
68
+ )
69
+ NAME_NATURAL = (
70
+ # The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (ascending)
71
+ "name_natural"
72
+ )
73
+ NAME_NATURAL_DESC = (
74
+ # The name of the file, sorted using natural sort ordering (e.g., 1, 2, 12, 22) (descending)
75
+ "name_natural desc"
76
+ )
77
+ QUOTA_BYTES_USED = (
78
+ # The number of storage quota bytes used by the file (ascending)
79
+ "quotaBytesUsed"
80
+ )
81
+ QUOTA_BYTES_USED_DESC = (
82
+ # The number of storage quota bytes used by the file (descending)
83
+ "quotaBytesUsed desc"
84
+ )
85
+ RECENCY = (
86
+ # The most recent timestamp from the file's date-time fields (ascending)
87
+ "recency"
88
+ )
89
+ RECENCY_DESC = (
90
+ # The most recent timestamp from the file's date-time fields (descending)
91
+ "recency desc"
92
+ )
93
+ SHARED_WITH_ME_TIME = (
94
+ # When the file was shared with the user, if applicable (ascending)
95
+ "sharedWithMeTime"
96
+ )
97
+ SHARED_WITH_ME_TIME_DESC = (
98
+ # When the file was shared with the user, if applicable (descending)
99
+ "sharedWithMeTime desc"
100
+ )
101
+ STARRED = (
102
+ # Whether the user has starred the file (ascending)
103
+ "starred"
104
+ )
105
+ STARRED_DESC = (
106
+ # Whether the user has starred the file (descending)
107
+ "starred desc"
108
+ )
109
+ VIEWED_BY_ME_TIME = (
110
+ # The last time the file was viewed by the user (ascending)
111
+ "viewedByMeTime"
112
+ )
113
+ VIEWED_BY_ME_TIME_DESC = (
114
+ # The last time the file was viewed by the user (descending)
115
+ "viewedByMeTime desc"
116
+ )
@@ -0,0 +1,49 @@
1
+ import base64
2
+ import json
3
+
4
+ from arcade_tdk import ToolContext, ToolMetadataKey
5
+ from arcade_tdk.errors import ToolExecutionError
6
+
7
+
8
+ def generate_google_file_picker_url(context: ToolContext) -> dict:
9
+ """Generate a Google File Picker URL for user-driven file selection and authorization.
10
+
11
+ Generates a URL that directs the end-user to a Google File Picker interface where
12
+ where they can select or upload Google Drive files. Users can grant permission to access their
13
+ Drive files, providing a secure and authorized way to interact with their files.
14
+
15
+ This is particularly useful when prior tools (e.g., those accessing or modifying
16
+ Google Docs, Google Sheets, etc.) encountered failures due to file non-existence
17
+ (Requested entity was not found) or permission errors. Once the user completes the file
18
+ picker flow, the prior tool can be retried.
19
+
20
+ Returns:
21
+ A dictionary containing the URL and instructions for the llm to instruct the user.
22
+ """
23
+ client_id = context.get_metadata(ToolMetadataKey.CLIENT_ID)
24
+ client_id_parts = client_id.split("-")
25
+ if not client_id_parts:
26
+ raise ToolExecutionError(
27
+ message="Invalid Google Client ID",
28
+ developer_message=f"Google Client ID '{client_id}' is not valid",
29
+ )
30
+ app_id = client_id_parts[0]
31
+ cloud_coordinator_url = context.get_metadata(ToolMetadataKey.COORDINATOR_URL).strip("/")
32
+
33
+ config = {
34
+ "auth": {
35
+ "client_id": client_id,
36
+ "app_id": app_id,
37
+ },
38
+ }
39
+ config_json = json.dumps(config)
40
+ config_base64 = base64.urlsafe_b64encode(config_json.encode("utf-8")).decode("utf-8")
41
+ url = f"{cloud_coordinator_url}/google/drive_picker?config={config_base64}"
42
+
43
+ return {
44
+ "url": url,
45
+ "llm_instructions": (
46
+ "Instruct the user to click the following link to open the Google Drive File Picker. "
47
+ f"This will allow them to select files and grant access permissions: {url}"
48
+ ),
49
+ }
@@ -0,0 +1,5 @@
1
+ optional_file_picker_instructions_template = (
2
+ "Ensure the user knows that they have the option to select and grant access permissions to "
3
+ "additional documents via the Google Drive File Picker. "
4
+ "The user can pick additional documents via the following link: {url}"
5
+ )
@@ -0,0 +1,19 @@
1
+ from arcade_google_docs.tools.create import (
2
+ create_blank_document,
3
+ create_document_from_text,
4
+ )
5
+ from arcade_google_docs.tools.get import get_document_by_id
6
+ from arcade_google_docs.tools.search import (
7
+ search_and_retrieve_documents,
8
+ search_documents,
9
+ )
10
+ from arcade_google_docs.tools.update import insert_text_at_end_of_document
11
+
12
+ __all__ = [
13
+ "create_blank_document",
14
+ "create_document_from_text",
15
+ "get_document_by_id",
16
+ "insert_text_at_end_of_document",
17
+ "search_and_retrieve_documents",
18
+ "search_documents",
19
+ ]
@@ -0,0 +1,82 @@
1
+ from typing import Annotated
2
+
3
+ from arcade_tdk import ToolContext, tool
4
+ from arcade_tdk.auth import Google
5
+
6
+ from arcade_google_docs.utils import build_docs_service
7
+
8
+
9
+ # Uses https://developers.google.com/docs/api/reference/rest/v1/documents/create
10
+ # Example `arcade chat` query: `create blank document with title "My New Document"`
11
+ @tool(
12
+ requires_auth=Google(
13
+ scopes=[
14
+ "https://www.googleapis.com/auth/drive.file",
15
+ ],
16
+ )
17
+ )
18
+ async def create_blank_document(
19
+ context: ToolContext, title: Annotated[str, "The title of the blank document to create"]
20
+ ) -> Annotated[dict, "The created document's title, documentId, and documentUrl in a dictionary"]:
21
+ """
22
+ Create a blank Google Docs document with the specified title.
23
+ """
24
+ service = build_docs_service(context.get_auth_token_or_empty())
25
+
26
+ body = {"title": title}
27
+
28
+ # Execute the documents().create() method. Returns a Document object https://developers.google.com/docs/api/reference/rest/v1/documents#Document
29
+ request = service.documents().create(body=body)
30
+ response = request.execute()
31
+
32
+ return {
33
+ "title": response["title"],
34
+ "documentId": response["documentId"],
35
+ "documentUrl": f"https://docs.google.com/document/d/{response['documentId']}/edit",
36
+ }
37
+
38
+
39
+ # Uses https://developers.google.com/docs/api/reference/rest/v1/documents/batchUpdate
40
+ # Example `arcade chat` query:
41
+ # `create document with title "My New Document" and text content "Hello, World!"`
42
+ @tool(
43
+ requires_auth=Google(
44
+ scopes=[
45
+ "https://www.googleapis.com/auth/drive.file",
46
+ ],
47
+ )
48
+ )
49
+ async def create_document_from_text(
50
+ context: ToolContext,
51
+ title: Annotated[str, "The title of the document to create"],
52
+ text_content: Annotated[str, "The text content to insert into the document"],
53
+ ) -> Annotated[dict, "The created document's title, documentId, and documentUrl in a dictionary"]:
54
+ """
55
+ Create a Google Docs document with the specified title and text content.
56
+ """
57
+ # First, create a blank document
58
+ document = await create_blank_document(context, title)
59
+
60
+ service = build_docs_service(context.get_auth_token_or_empty())
61
+
62
+ requests = [
63
+ {
64
+ "insertText": {
65
+ "location": {
66
+ "index": 1,
67
+ },
68
+ "text": text_content,
69
+ }
70
+ }
71
+ ]
72
+
73
+ # Execute the batchUpdate method to insert text
74
+ service.documents().batchUpdate(
75
+ documentId=document["documentId"], body={"requests": requests}
76
+ ).execute()
77
+
78
+ return {
79
+ "title": document["title"],
80
+ "documentId": document["documentId"],
81
+ "documentUrl": f"https://docs.google.com/document/d/{document['documentId']}/edit",
82
+ }
@@ -0,0 +1,35 @@
1
+ from typing import Annotated
2
+
3
+ from arcade_tdk import ToolContext, ToolMetadataKey, tool
4
+ from arcade_tdk.auth import Google
5
+
6
+ from arcade_google_docs.decorators import with_filepicker_fallback
7
+ from arcade_google_docs.utils import build_docs_service
8
+
9
+
10
+ # Uses https://developers.google.com/docs/api/reference/rest/v1/documents/get
11
+ # Example `arcade chat` query: `get document with ID 1234567890`
12
+ # Note: Document IDs are returned in the response of the Google Drive's `list_documents` tool
13
+ @tool(
14
+ requires_auth=Google(
15
+ scopes=[
16
+ "https://www.googleapis.com/auth/drive.file",
17
+ ],
18
+ ),
19
+ requires_metadata=[ToolMetadataKey.CLIENT_ID, ToolMetadataKey.COORDINATOR_URL],
20
+ )
21
+ @with_filepicker_fallback
22
+ async def get_document_by_id(
23
+ context: ToolContext,
24
+ document_id: Annotated[str, "The ID of the document to retrieve."],
25
+ ) -> Annotated[dict, "The document contents as a dictionary"]:
26
+ """
27
+ Get the latest version of the specified Google Docs document.
28
+ """
29
+ service = build_docs_service(context.get_auth_token_or_empty())
30
+
31
+ # Execute the documents().get() method. Returns a Document object
32
+ # https://developers.google.com/docs/api/reference/rest/v1/documents#Document
33
+ request = service.documents().get(documentId=document_id)
34
+ response = request.execute()
35
+ return dict(response)
@@ -0,0 +1,219 @@
1
+ from typing import Annotated, Any
2
+
3
+ from arcade_tdk import ToolContext, ToolMetadataKey, tool
4
+ from arcade_tdk.auth import Google
5
+
6
+ from arcade_google_docs.doc_to_html import convert_document_to_html
7
+ from arcade_google_docs.doc_to_markdown import convert_document_to_markdown
8
+ from arcade_google_docs.enum import DocumentFormat, OrderBy
9
+ from arcade_google_docs.file_picker import generate_google_file_picker_url
10
+ from arcade_google_docs.templates import optional_file_picker_instructions_template
11
+ from arcade_google_docs.tools import get_document_by_id
12
+ from arcade_google_docs.utils import (
13
+ build_drive_service,
14
+ build_files_list_params,
15
+ )
16
+
17
+
18
+ # Implements: https://googleapis.github.io/google-api-python-client/docs/dyn/drive_v3.files.html#list
19
+ # Example `arcade chat` query: `list my 5 most recently modified documents`
20
+ # TODO: Support query with natural language. Currently, the tool expects a fully formed query
21
+ # string as input with the syntax defined here: https://developers.google.com/drive/api/guides/search-files
22
+ @tool(
23
+ requires_auth=Google(
24
+ scopes=["https://www.googleapis.com/auth/drive.file"],
25
+ ),
26
+ requires_metadata=[ToolMetadataKey.CLIENT_ID, ToolMetadataKey.COORDINATOR_URL],
27
+ )
28
+ async def search_documents(
29
+ context: ToolContext,
30
+ document_contains: Annotated[
31
+ list[str] | None,
32
+ "Keywords or phrases that must be in the document title or body. Provide a list of "
33
+ "keywords or phrases if needed.",
34
+ ] = None,
35
+ document_not_contains: Annotated[
36
+ list[str] | None,
37
+ "Keywords or phrases that must NOT be in the document title or body. Provide a list of "
38
+ "keywords or phrases if needed.",
39
+ ] = None,
40
+ search_only_in_shared_drive_id: Annotated[
41
+ str | None,
42
+ "The ID of the shared drive to restrict the search to. If provided, the search will only "
43
+ "return documents from this drive. Defaults to None, which searches across all drives.",
44
+ ] = None,
45
+ include_shared_drives: Annotated[
46
+ bool,
47
+ "Whether to include documents from shared drives. Defaults to False (searches only in "
48
+ "the user's 'My Drive').",
49
+ ] = False,
50
+ include_organization_domain_documents: Annotated[
51
+ bool,
52
+ "Whether to include documents from the organization's domain. This is applicable to admin "
53
+ "users who have permissions to view organization-wide documents in a Google Workspace "
54
+ "account. Defaults to False.",
55
+ ] = False,
56
+ order_by: Annotated[
57
+ list[OrderBy] | None,
58
+ "Sort order. Defaults to listing the most recently modified documents first",
59
+ ] = None,
60
+ limit: Annotated[int, "The number of documents to list"] = 50,
61
+ pagination_token: Annotated[
62
+ str | None, "The pagination token to continue a previous request"
63
+ ] = None,
64
+ ) -> Annotated[
65
+ dict,
66
+ "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
67
+ "(a list of document details including 'kind', 'mimeType', 'id', and 'name' for each document)",
68
+ ]:
69
+ """
70
+ Searches for documents in the user's Google Drive. Excludes documents that are in the trash.
71
+ """
72
+ if order_by is None:
73
+ order_by = [OrderBy.MODIFIED_TIME_DESC]
74
+ elif isinstance(order_by, OrderBy):
75
+ order_by = [order_by]
76
+
77
+ page_size = min(10, limit)
78
+ files: list[dict[str, Any]] = []
79
+
80
+ service = build_drive_service(context.get_auth_token_or_empty())
81
+
82
+ params = build_files_list_params(
83
+ mime_type="application/vnd.google-apps.document",
84
+ document_contains=document_contains,
85
+ document_not_contains=document_not_contains,
86
+ page_size=page_size,
87
+ order_by=order_by,
88
+ pagination_token=pagination_token,
89
+ include_shared_drives=include_shared_drives,
90
+ search_only_in_shared_drive_id=search_only_in_shared_drive_id,
91
+ include_organization_domain_documents=include_organization_domain_documents,
92
+ )
93
+
94
+ while len(files) < limit:
95
+ if pagination_token:
96
+ params["pageToken"] = pagination_token
97
+ else:
98
+ params.pop("pageToken", None)
99
+
100
+ results = service.files().list(**params).execute()
101
+ batch = results.get("files", [])
102
+ files.extend(batch[: limit - len(files)])
103
+
104
+ pagination_token = results.get("nextPageToken")
105
+ if not pagination_token or len(batch) < page_size:
106
+ break
107
+
108
+ file_picker_response = generate_google_file_picker_url(
109
+ context,
110
+ )
111
+
112
+ return {
113
+ "documents_count": len(files),
114
+ "documents": files,
115
+ "file_picker": {
116
+ "url": file_picker_response["url"],
117
+ "llm_instructions": optional_file_picker_instructions_template.format(
118
+ url=file_picker_response["url"]
119
+ ),
120
+ },
121
+ }
122
+
123
+
124
+ @tool(
125
+ requires_auth=Google(
126
+ scopes=["https://www.googleapis.com/auth/drive.file"],
127
+ ),
128
+ requires_metadata=[ToolMetadataKey.CLIENT_ID, ToolMetadataKey.COORDINATOR_URL],
129
+ )
130
+ async def search_and_retrieve_documents(
131
+ context: ToolContext,
132
+ return_format: Annotated[
133
+ DocumentFormat,
134
+ "The format of the document to return. Defaults to Markdown.",
135
+ ] = DocumentFormat.MARKDOWN,
136
+ document_contains: Annotated[
137
+ list[str] | None,
138
+ "Keywords or phrases that must be in the document title or body. Provide a list of "
139
+ "keywords or phrases if needed.",
140
+ ] = None,
141
+ document_not_contains: Annotated[
142
+ list[str] | None,
143
+ "Keywords or phrases that must NOT be in the document title or body. Provide a list of "
144
+ "keywords or phrases if needed.",
145
+ ] = None,
146
+ search_only_in_shared_drive_id: Annotated[
147
+ str | None,
148
+ "The ID of the shared drive to restrict the search to. If provided, the search will only "
149
+ "return documents from this drive. Defaults to None, which searches across all drives.",
150
+ ] = None,
151
+ include_shared_drives: Annotated[
152
+ bool,
153
+ "Whether to include documents from shared drives. Defaults to False (searches only in "
154
+ "the user's 'My Drive').",
155
+ ] = False,
156
+ include_organization_domain_documents: Annotated[
157
+ bool,
158
+ "Whether to include documents from the organization's domain. This is applicable to admin "
159
+ "users who have permissions to view organization-wide documents in a Google Workspace "
160
+ "account. Defaults to False.",
161
+ ] = False,
162
+ order_by: Annotated[
163
+ list[OrderBy] | None,
164
+ "Sort order. Defaults to listing the most recently modified documents first",
165
+ ] = None,
166
+ limit: Annotated[int, "The number of documents to list"] = 50,
167
+ pagination_token: Annotated[
168
+ str | None, "The pagination token to continue a previous request"
169
+ ] = None,
170
+ ) -> Annotated[
171
+ dict,
172
+ "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
173
+ "(a list of documents with their content).",
174
+ ]:
175
+ """
176
+ Searches for documents in the user's Google Drive and returns a list of documents (with text
177
+ content) matching the search criteria. Excludes documents that are in the trash.
178
+
179
+ Note: use this tool only when the user prompt requires the documents' content. If the user only
180
+ needs a list of documents, use the `search_documents` tool instead.
181
+ """
182
+ response = await search_documents(
183
+ context=context,
184
+ document_contains=document_contains,
185
+ document_not_contains=document_not_contains,
186
+ search_only_in_shared_drive_id=search_only_in_shared_drive_id,
187
+ include_shared_drives=include_shared_drives,
188
+ include_organization_domain_documents=include_organization_domain_documents,
189
+ order_by=order_by,
190
+ limit=limit,
191
+ pagination_token=pagination_token,
192
+ )
193
+
194
+ documents = []
195
+
196
+ for item in response["documents"]:
197
+ document = await get_document_by_id(context, document_id=item["id"])
198
+
199
+ if return_format == DocumentFormat.MARKDOWN:
200
+ document = convert_document_to_markdown(document)
201
+ elif return_format == DocumentFormat.HTML:
202
+ document = convert_document_to_html(document)
203
+
204
+ documents.append(document)
205
+
206
+ file_picker_response = generate_google_file_picker_url(
207
+ context,
208
+ )
209
+
210
+ return {
211
+ "documents_count": len(documents),
212
+ "documents": documents,
213
+ "file_picker": {
214
+ "url": file_picker_response["url"],
215
+ "llm_instructions": optional_file_picker_instructions_template.format(
216
+ url=file_picker_response["url"]
217
+ ),
218
+ },
219
+ }
@@ -0,0 +1,60 @@
1
+ from typing import Annotated
2
+
3
+ from arcade_tdk import ToolContext, ToolMetadataKey, tool
4
+ from arcade_tdk.auth import Google
5
+
6
+ from arcade_google_docs.decorators import with_filepicker_fallback
7
+ from arcade_google_docs.tools.get import get_document_by_id
8
+ from arcade_google_docs.utils import build_docs_service
9
+
10
+
11
+ # Uses https://developers.google.com/docs/api/reference/rest/v1/documents/batchUpdate
12
+ # Example `arcade chat` query: `insert "The END" at the end of document with ID 1234567890`
13
+ @tool(
14
+ requires_auth=Google(
15
+ scopes=[
16
+ "https://www.googleapis.com/auth/drive.file",
17
+ ],
18
+ ),
19
+ requires_metadata=[ToolMetadataKey.CLIENT_ID, ToolMetadataKey.COORDINATOR_URL],
20
+ )
21
+ @with_filepicker_fallback
22
+ async def insert_text_at_end_of_document(
23
+ context: ToolContext,
24
+ document_id: Annotated[str, "The ID of the document to update."],
25
+ text_content: Annotated[str, "The text content to insert into the document"],
26
+ ) -> Annotated[dict, "The response from the batchUpdate API as a dict."]:
27
+ """
28
+ Updates an existing Google Docs document using the batchUpdate API endpoint.
29
+ """
30
+ document_or_file_picker_response = await get_document_by_id(context, document_id)
31
+
32
+ # If the document was not found, return the file picker response
33
+ if "body" not in document_or_file_picker_response:
34
+ return document_or_file_picker_response # type: ignore[no-any-return]
35
+
36
+ document = document_or_file_picker_response
37
+
38
+ end_index = document["body"]["content"][-1]["endIndex"]
39
+
40
+ service = build_docs_service(context.get_auth_token_or_empty())
41
+
42
+ requests = [
43
+ {
44
+ "insertText": {
45
+ "location": {
46
+ "index": int(end_index) - 1,
47
+ },
48
+ "text": text_content,
49
+ }
50
+ }
51
+ ]
52
+
53
+ # Execute the documents().batchUpdate() method
54
+ response = (
55
+ service.documents()
56
+ .batchUpdate(documentId=document_id, body={"requests": requests})
57
+ .execute()
58
+ )
59
+
60
+ return dict(response)
@@ -0,0 +1,119 @@
1
+ import logging
2
+ from typing import Any
3
+
4
+ from google.oauth2.credentials import Credentials
5
+ from googleapiclient.discovery import Resource, build
6
+
7
+ from arcade_google_docs.enum import Corpora, OrderBy
8
+
9
+ ## Set up basic configuration for logging to the console with DEBUG level and a specific format.
10
+ logging.basicConfig(
11
+ level=logging.DEBUG,
12
+ format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
13
+ )
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ def build_docs_service(auth_token: str | None) -> Resource: # type: ignore[no-any-unimported]
19
+ """
20
+ Build a Drive service object.
21
+ """
22
+ auth_token = auth_token or ""
23
+ return build("docs", "v1", credentials=Credentials(auth_token))
24
+
25
+
26
+ def build_drive_service(auth_token: str | None) -> Resource: # type: ignore[no-any-unimported]
27
+ """
28
+ Build a Drive service object.
29
+ """
30
+ auth_token = auth_token or ""
31
+ return build("drive", "v3", credentials=Credentials(auth_token))
32
+
33
+
34
+ def build_files_list_params(
35
+ mime_type: str,
36
+ page_size: int,
37
+ order_by: list[OrderBy],
38
+ pagination_token: str | None,
39
+ include_shared_drives: bool,
40
+ search_only_in_shared_drive_id: str | None,
41
+ include_organization_domain_documents: bool,
42
+ document_contains: list[str] | None = None,
43
+ document_not_contains: list[str] | None = None,
44
+ ) -> dict[str, Any]:
45
+ query = build_files_list_query(
46
+ mime_type=mime_type,
47
+ document_contains=document_contains,
48
+ document_not_contains=document_not_contains,
49
+ )
50
+
51
+ params = {
52
+ "q": query,
53
+ "pageSize": page_size,
54
+ "orderBy": ",".join([item.value for item in order_by]),
55
+ "pageToken": pagination_token,
56
+ }
57
+
58
+ if (
59
+ include_shared_drives
60
+ or search_only_in_shared_drive_id
61
+ or include_organization_domain_documents
62
+ ):
63
+ params["includeItemsFromAllDrives"] = "true"
64
+ params["supportsAllDrives"] = "true"
65
+
66
+ if search_only_in_shared_drive_id:
67
+ params["driveId"] = search_only_in_shared_drive_id
68
+ params["corpora"] = Corpora.DRIVE.value
69
+
70
+ if include_organization_domain_documents:
71
+ params["corpora"] = Corpora.DOMAIN.value
72
+
73
+ params = remove_none_values(params)
74
+
75
+ return params
76
+
77
+
78
+ def build_files_list_query(
79
+ mime_type: str,
80
+ document_contains: list[str] | None = None,
81
+ document_not_contains: list[str] | None = None,
82
+ ) -> str:
83
+ query = [f"(mimeType = '{mime_type}' and trashed = false)"]
84
+
85
+ if isinstance(document_contains, str):
86
+ document_contains = [document_contains]
87
+
88
+ if isinstance(document_not_contains, str):
89
+ document_not_contains = [document_not_contains]
90
+
91
+ if document_contains:
92
+ for keyword in document_contains:
93
+ name_contains = keyword.replace("'", "\\'")
94
+ full_text_contains = keyword.replace("'", "\\'")
95
+ keyword_query = (
96
+ f"(name contains '{name_contains}' or fullText contains '{full_text_contains}')"
97
+ )
98
+ query.append(keyword_query)
99
+
100
+ if document_not_contains:
101
+ for keyword in document_not_contains:
102
+ name_not_contains = keyword.replace("'", "\\'")
103
+ full_text_not_contains = keyword.replace("'", "\\'")
104
+ keyword_query = (
105
+ f"(name not contains '{name_not_contains}' and "
106
+ f"fullText not contains '{full_text_not_contains}')"
107
+ )
108
+ query.append(keyword_query)
109
+
110
+ return " and ".join(query)
111
+
112
+
113
+ def remove_none_values(params: dict) -> dict:
114
+ """
115
+ Remove None values from a dictionary.
116
+ :param params: The dictionary to clean
117
+ :return: A new dictionary with None values removed
118
+ """
119
+ return {k: v for k, v in params.items() if v is not None}
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: arcade_google_docs
3
+ Version: 2.0.0
4
+ Summary: Arcade.dev LLM tools for Google Docs
5
+ Author-email: Arcade <dev@arcade.dev>
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: arcade-tdk<3.0.0,>=2.0.0
8
+ Requires-Dist: google-api-core<3.0.0,>=2.19.1
9
+ Requires-Dist: google-api-python-client<3.0.0,>=2.137.0
10
+ Requires-Dist: google-auth-httplib2<1.0.0,>=0.2.0
11
+ Requires-Dist: google-auth<3.0.0,>=2.32.0
12
+ Requires-Dist: googleapis-common-protos<2.0.0,>=1.63.2
13
+ Provides-Extra: dev
14
+ Requires-Dist: arcade-ai[evals]<3.0.0,>=2.0.4; extra == 'dev'
15
+ Requires-Dist: arcade-serve<3.0.0,>=2.0.0; extra == 'dev'
16
+ Requires-Dist: mypy<1.6.0,>=1.5.1; extra == 'dev'
17
+ Requires-Dist: pre-commit<3.5.0,>=3.4.0; extra == 'dev'
18
+ Requires-Dist: pytest-asyncio<0.25.0,>=0.24.0; extra == 'dev'
19
+ Requires-Dist: pytest-cov<4.1.0,>=4.0.0; extra == 'dev'
20
+ Requires-Dist: pytest-mock<3.12.0,>=3.11.1; extra == 'dev'
21
+ Requires-Dist: pytest<8.4.0,>=8.3.0; extra == 'dev'
22
+ Requires-Dist: ruff<0.8.0,>=0.7.4; extra == 'dev'
23
+ Requires-Dist: tox<4.12.0,>=4.11.1; extra == 'dev'
@@ -0,0 +1,16 @@
1
+ arcade_google_docs/__init__.py,sha256=WxIJBwkBmVeHfvgQ9E8eZy28SVqSkBlbZPhgFWhVd7o,418
2
+ arcade_google_docs/decorators.py,sha256=5ONZ3vS2lZBmog5c1TcuWjyPDeftBPAJ7vXyLjSPFRk,751
3
+ arcade_google_docs/doc_to_html.py,sha256=6RTpzRSrazNa6AndLZhA20wgVDzZuHUqpu3WAkAsbjQ,3146
4
+ arcade_google_docs/doc_to_markdown.py,sha256=eT-sc6ruxN8nEtUm9mBHFOWXajEBTTXkxsn6XsLHIxo,2020
5
+ arcade_google_docs/enum.py,sha256=vFJWPe1JPG6I9xqdVVvuaEeen4LvvtJxax1sDYeh4UU,3421
6
+ arcade_google_docs/file_picker.py,sha256=kGfUVfH5QVlIW1sL-_gAwPokt7TwVEcPk3Vnk53GKUE,2005
7
+ arcade_google_docs/templates.py,sha256=pxbdMj57eV3-ImW3CixDWscpVKS94Z8nTNyTxDhUfGY,283
8
+ arcade_google_docs/utils.py,sha256=Eku4b1olLcXfQ20liE9m3iPWvy60VA62tQ8TFwRKn94,3722
9
+ arcade_google_docs/tools/__init__.py,sha256=f0d7ZRXCqgODDBkKtNhvHzsqs_GuZ97fe0dpjBcXKq8,548
10
+ arcade_google_docs/tools/create.py,sha256=AuYy8yMGscrxAdLJQX0WiisGHCTufSlaRu_QGMMKQmM,2764
11
+ arcade_google_docs/tools/get.py,sha256=2wi9ZF9s_57mMbIGgsqr53Fr0AJyrYVOQ11x7nAyk8Y,1339
12
+ arcade_google_docs/tools/search.py,sha256=_CaEs1A_qGToNPeuGQ2yN3phGtPD7fFQbW3UqZr_qpg,8617
13
+ arcade_google_docs/tools/update.py,sha256=9SvffQIHnmYiEgyE1VrhXG2aHb0hIfWYPzPLc4nTdKI,2030
14
+ arcade_google_docs-2.0.0.dist-info/METADATA,sha256=9UwyGTqc8_dVJiMRHNCk_n-mNn_PM99WnlpaO2LZSYM,1035
15
+ arcade_google_docs-2.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
16
+ arcade_google_docs-2.0.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.27.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any