arcade-google-docs 4.3.1__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcade_google_docs/__init__.py +6 -0
- arcade_google_docs/doc_to_html.py +195 -4
- arcade_google_docs/doc_to_markdown.py +181 -3
- arcade_google_docs/docmd.py +115 -36
- arcade_google_docs/models/responses.py +143 -0
- arcade_google_docs/tools/__init__.py +7 -1
- arcade_google_docs/tools/edit_agent/utils.py +3 -1
- arcade_google_docs/tools/get.py +64 -7
- arcade_google_docs/tools/search.py +33 -33
- arcade_google_docs/utils.py +262 -1
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/METADATA +4 -4
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/RECORD +13 -12
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/WHEEL +0 -0
arcade_google_docs/docmd.py
CHANGED
|
@@ -54,12 +54,15 @@ Example DocMD as a string:
|
|
|
54
54
|
from collections.abc import Callable
|
|
55
55
|
from enum import Enum
|
|
56
56
|
|
|
57
|
+
from arcade_tdk.errors import RetryableToolError
|
|
57
58
|
from pydantic import BaseModel
|
|
58
59
|
|
|
59
60
|
from arcade_google_docs.models.document import (
|
|
60
61
|
Document,
|
|
61
62
|
NamedStyleType,
|
|
62
63
|
Paragraph,
|
|
64
|
+
StructuralElement,
|
|
65
|
+
Tab,
|
|
63
66
|
Table,
|
|
64
67
|
TextStyle,
|
|
65
68
|
)
|
|
@@ -188,10 +191,9 @@ class DocMD(BaseModel):
|
|
|
188
191
|
)
|
|
189
192
|
|
|
190
193
|
|
|
191
|
-
def build_docmd(document: Document) -> DocMD:
|
|
194
|
+
def build_docmd(document: Document, tab_id: str | None = None) -> DocMD:
|
|
192
195
|
doc_id = document.documentId or ""
|
|
193
196
|
rev = document.revisionId
|
|
194
|
-
tab = ""
|
|
195
197
|
|
|
196
198
|
counters: dict[str, int] = {
|
|
197
199
|
"H": 0,
|
|
@@ -210,7 +212,73 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
|
|
|
210
212
|
|
|
211
213
|
blocks: list[DocMDBlock] = []
|
|
212
214
|
|
|
213
|
-
|
|
215
|
+
if document.tabs and len(document.tabs) > 0:
|
|
216
|
+
flattened_tabs = _flatten_tabs_depth_first(document.tabs)
|
|
217
|
+
|
|
218
|
+
if tab_id:
|
|
219
|
+
matching_tabs = [
|
|
220
|
+
t for t in flattened_tabs if t.tabProperties and t.tabProperties.tabId == tab_id
|
|
221
|
+
]
|
|
222
|
+
if not matching_tabs:
|
|
223
|
+
available_ids = [t.tabProperties.tabId for t in flattened_tabs if t.tabProperties]
|
|
224
|
+
raise RetryableToolError(
|
|
225
|
+
message=f"Tab with ID '{tab_id}' not found in document",
|
|
226
|
+
additional_prompt_content=f"Available tab IDs: {available_ids}",
|
|
227
|
+
retry_after_ms=100,
|
|
228
|
+
)
|
|
229
|
+
flattened_tabs = matching_tabs
|
|
230
|
+
|
|
231
|
+
for tab_obj in flattened_tabs:
|
|
232
|
+
if not tab_obj.documentTab or not tab_obj.tabProperties:
|
|
233
|
+
continue
|
|
234
|
+
|
|
235
|
+
tab_metadata = {
|
|
236
|
+
"tabId": tab_obj.tabProperties.tabId or "",
|
|
237
|
+
"title": tab_obj.tabProperties.title or "",
|
|
238
|
+
"nestingLevel": str(tab_obj.tabProperties.nestingLevel or 0),
|
|
239
|
+
"index": str(tab_obj.tabProperties.index or 0),
|
|
240
|
+
}
|
|
241
|
+
if tab_obj.tabProperties.parentTabId:
|
|
242
|
+
tab_metadata["parentTabId"] = tab_obj.tabProperties.parentTabId
|
|
243
|
+
|
|
244
|
+
body_content = []
|
|
245
|
+
if tab_obj.documentTab.body and tab_obj.documentTab.body.content:
|
|
246
|
+
body_content = tab_obj.documentTab.body.content
|
|
247
|
+
_process_body_content(
|
|
248
|
+
body_content,
|
|
249
|
+
next_id,
|
|
250
|
+
tab_metadata,
|
|
251
|
+
blocks,
|
|
252
|
+
)
|
|
253
|
+
else:
|
|
254
|
+
body_content = []
|
|
255
|
+
if document.body and document.body.content:
|
|
256
|
+
body_content = document.body.content
|
|
257
|
+
_process_body_content(
|
|
258
|
+
body_content,
|
|
259
|
+
next_id,
|
|
260
|
+
{},
|
|
261
|
+
blocks,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
return DocMD(documentId=doc_id, revisionId=rev, tab="", blocks=blocks)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def _process_body_content(
|
|
268
|
+
content: list[StructuralElement],
|
|
269
|
+
next_id_func: Callable[[str], str],
|
|
270
|
+
tab_metadata: dict[str, str],
|
|
271
|
+
blocks: list[DocMDBlock],
|
|
272
|
+
) -> None:
|
|
273
|
+
"""Process structural elements from a body (main document or tab).
|
|
274
|
+
|
|
275
|
+
Args:
|
|
276
|
+
content: List of structural elements to process
|
|
277
|
+
next_id_func: Function to generate unique block IDs
|
|
278
|
+
tab_metadata: Dict with tab information (tabId, title, nestingLevel, etc.)
|
|
279
|
+
blocks: List to append processed blocks to
|
|
280
|
+
"""
|
|
281
|
+
for se in content:
|
|
214
282
|
if se.paragraph is not None:
|
|
215
283
|
p: Paragraph = se.paragraph
|
|
216
284
|
named = p.paragraphStyle.namedStyleType if p.paragraphStyle else None
|
|
@@ -225,34 +293,30 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
|
|
|
225
293
|
|
|
226
294
|
block_type: str
|
|
227
295
|
block_id: str
|
|
228
|
-
attrs: dict[str, str] = {}
|
|
229
|
-
# Only add tab attribute if it's not empty (not the default tab)
|
|
230
|
-
if tab:
|
|
231
|
-
attrs["tab"] = tab
|
|
296
|
+
attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
|
|
232
297
|
|
|
233
298
|
if is_heading:
|
|
234
299
|
level = int(str(named).split("_")[-1])
|
|
235
300
|
block_type = f"HEADING_{level}"
|
|
236
|
-
block_id =
|
|
301
|
+
block_id = next_id_func("H")
|
|
237
302
|
if p.paragraphStyle and p.paragraphStyle.headingId:
|
|
238
303
|
attrs["headingId"] = p.paragraphStyle.headingId
|
|
239
304
|
else:
|
|
240
305
|
if p.bullet and p.bullet.listId:
|
|
241
306
|
block_type = DocMDBlockType.UL_ITEM.value
|
|
242
|
-
block_id =
|
|
307
|
+
block_id = next_id_func("UL")
|
|
243
308
|
attrs["listId"] = p.bullet.listId
|
|
244
309
|
if p.bullet.nestingLevel is not None:
|
|
245
310
|
attrs["level"] = str(p.bullet.nestingLevel)
|
|
246
311
|
else:
|
|
247
312
|
block_type = DocMDBlockType.PARAGRAPH.value
|
|
248
|
-
block_id =
|
|
313
|
+
block_id = next_id_func("P")
|
|
249
314
|
|
|
250
315
|
vis_start, vis_end, text, style_runs = _visible_span_and_text(p)
|
|
251
316
|
start = vis_start if vis_start is not None else se.startIndex or 0
|
|
252
317
|
end = vis_end if vis_end is not None else se.endIndex or start
|
|
253
318
|
text_line = (text or "").rstrip("\n")
|
|
254
319
|
|
|
255
|
-
# Add style ranges to attrs if any styles are present
|
|
256
320
|
if style_runs:
|
|
257
321
|
style_ranges = _format_style_ranges(style_runs, start)
|
|
258
322
|
if style_ranges:
|
|
@@ -270,25 +334,19 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
|
|
|
270
334
|
)
|
|
271
335
|
|
|
272
336
|
elif se.table is not None:
|
|
273
|
-
_process_table(se.table, se,
|
|
274
|
-
|
|
275
|
-
return DocMD(documentId=doc_id, revisionId=rev, tab=tab, blocks=blocks)
|
|
337
|
+
_process_table(se.table, se, next_id_func, tab_metadata, blocks)
|
|
276
338
|
|
|
277
339
|
|
|
278
340
|
def _process_table( # type: ignore[no-untyped-def]
|
|
279
341
|
table: Table,
|
|
280
342
|
se,
|
|
281
343
|
next_id_func: Callable[[str], str],
|
|
282
|
-
|
|
344
|
+
tab_metadata: dict[str, str],
|
|
283
345
|
blocks: list[DocMDBlock],
|
|
284
346
|
) -> None:
|
|
285
347
|
"""Process a table structural element and add table/row/cell blocks."""
|
|
286
348
|
table_id = next_id_func("TABLE")
|
|
287
|
-
table_attrs: dict[str, str] = {}
|
|
288
|
-
|
|
289
|
-
# Only add tab attribute if it's not empty (not the default tab)
|
|
290
|
-
if tab:
|
|
291
|
-
table_attrs["tab"] = tab
|
|
349
|
+
table_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
|
|
292
350
|
|
|
293
351
|
if table.rows is not None:
|
|
294
352
|
table_attrs["rows"] = str(table.rows)
|
|
@@ -310,7 +368,7 @@ def _process_table( # type: ignore[no-untyped-def]
|
|
|
310
368
|
)
|
|
311
369
|
|
|
312
370
|
for row_idx, table_row in enumerate(table.tableRows or []):
|
|
313
|
-
_process_table_row(table_row, row_idx, table_start, next_id_func,
|
|
371
|
+
_process_table_row(table_row, row_idx, table_start, next_id_func, tab_metadata, blocks)
|
|
314
372
|
|
|
315
373
|
|
|
316
374
|
def _process_table_row( # type: ignore[no-untyped-def]
|
|
@@ -318,15 +376,13 @@ def _process_table_row( # type: ignore[no-untyped-def]
|
|
|
318
376
|
row_idx: int,
|
|
319
377
|
table_start: int,
|
|
320
378
|
next_id_func: Callable[[str], str],
|
|
321
|
-
|
|
379
|
+
tab_metadata: dict[str, str],
|
|
322
380
|
blocks: list[DocMDBlock],
|
|
323
381
|
) -> None:
|
|
324
382
|
"""Process a table row and add row/cell blocks."""
|
|
325
383
|
row_id = next_id_func("TR")
|
|
326
|
-
row_attrs: dict[str, str] =
|
|
327
|
-
|
|
328
|
-
if tab:
|
|
329
|
-
row_attrs["tab"] = tab
|
|
384
|
+
row_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
|
|
385
|
+
row_attrs["row"] = str(row_idx)
|
|
330
386
|
|
|
331
387
|
row_start = table_row.startIndex or table_start
|
|
332
388
|
row_end = table_row.endIndex or row_start
|
|
@@ -343,27 +399,25 @@ def _process_table_row( # type: ignore[no-untyped-def]
|
|
|
343
399
|
)
|
|
344
400
|
|
|
345
401
|
for cell_idx, table_cell in enumerate(table_row.tableCells or []):
|
|
346
|
-
_process_table_cell(
|
|
402
|
+
_process_table_cell(
|
|
403
|
+
table_cell, row_idx, cell_idx, row_start, next_id_func, tab_metadata, blocks
|
|
404
|
+
)
|
|
347
405
|
|
|
348
406
|
|
|
349
|
-
def _process_table_cell( # type: ignore[no-untyped-def]
|
|
407
|
+
def _process_table_cell( # type: ignore[no-untyped-def]
|
|
350
408
|
table_cell,
|
|
351
409
|
row_idx: int,
|
|
352
410
|
cell_idx: int,
|
|
353
411
|
row_start: int,
|
|
354
412
|
next_id_func: Callable[[str], str],
|
|
355
|
-
|
|
413
|
+
tab_metadata: dict[str, str],
|
|
356
414
|
blocks: list[DocMDBlock],
|
|
357
415
|
) -> None:
|
|
358
416
|
"""Process a table cell and add cell block."""
|
|
359
417
|
cell_id = next_id_func("TC")
|
|
360
|
-
cell_attrs: dict[str, str] = {
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
}
|
|
364
|
-
|
|
365
|
-
if tab:
|
|
366
|
-
cell_attrs["tab"] = tab
|
|
418
|
+
cell_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
|
|
419
|
+
cell_attrs["row"] = str(row_idx)
|
|
420
|
+
cell_attrs["col"] = str(cell_idx)
|
|
367
421
|
|
|
368
422
|
# Add cell styling attributes if present
|
|
369
423
|
if (
|
|
@@ -532,3 +586,28 @@ def _format_style_ranges(style_runs: list[dict], block_start: int) -> str:
|
|
|
532
586
|
consolidated.append(f"{style_str}:{abs_start}-{abs_end}")
|
|
533
587
|
|
|
534
588
|
return ",".join(consolidated) if consolidated else ""
|
|
589
|
+
|
|
590
|
+
|
|
591
|
+
def _flatten_tabs_depth_first(
|
|
592
|
+
tabs: list[Tab] | None, max_depth: int = 4, current_depth: int = 0
|
|
593
|
+
) -> list[Tab]:
|
|
594
|
+
"""Flatten tab hierarchy using depth-first traversal.
|
|
595
|
+
|
|
596
|
+
Args:
|
|
597
|
+
tabs: List of Tab objects, potentially with nested childTabs
|
|
598
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
599
|
+
current_depth: Current recursion depth (internal use)
|
|
600
|
+
|
|
601
|
+
Returns:
|
|
602
|
+
Flattened list of tabs in depth-first order (parent → children → grandchildren)
|
|
603
|
+
"""
|
|
604
|
+
if not tabs or current_depth >= max_depth:
|
|
605
|
+
return []
|
|
606
|
+
|
|
607
|
+
result: list[Tab] = []
|
|
608
|
+
for tab in tabs:
|
|
609
|
+
result.append(tab)
|
|
610
|
+
if tab.childTabs:
|
|
611
|
+
result.extend(_flatten_tabs_depth_first(tab.childTabs, max_depth, current_depth + 1))
|
|
612
|
+
|
|
613
|
+
return result
|
|
@@ -0,0 +1,143 @@
|
|
|
1
|
+
"""
|
|
2
|
+
TypedDict response models for Google Docs tools.
|
|
3
|
+
|
|
4
|
+
These models define the structure of responses returned by Google Docs tools,
|
|
5
|
+
with field descriptions as string literals for tool compatibility.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import TypedDict
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TabMetadata(TypedDict, total=False):
|
|
12
|
+
"""Metadata for a single tab in a Google Docs document."""
|
|
13
|
+
|
|
14
|
+
tabId: str
|
|
15
|
+
"""The unique identifier of the tab."""
|
|
16
|
+
|
|
17
|
+
title: str
|
|
18
|
+
"""The title/name of the tab."""
|
|
19
|
+
|
|
20
|
+
index: int
|
|
21
|
+
"""The position of the tab among its siblings (0-indexed)."""
|
|
22
|
+
|
|
23
|
+
nestingLevel: int
|
|
24
|
+
"""The nesting depth (0 for top-level, 1 for child, 2 for grandchild)."""
|
|
25
|
+
|
|
26
|
+
approximateCharacterCount: int
|
|
27
|
+
"""Approximate number of characters in this tab's content (excluding child tabs)."""
|
|
28
|
+
|
|
29
|
+
approximateWordCount: int
|
|
30
|
+
"""Approximate number of words in this tab's content (excluding child tabs)."""
|
|
31
|
+
|
|
32
|
+
parentTabId: str
|
|
33
|
+
"""The ID of the parent tab (if this is a nested tab)."""
|
|
34
|
+
|
|
35
|
+
childTabs: list[dict]
|
|
36
|
+
"""List of nested child tabs within this tab (each follows TabMetadata structure)."""
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class DocumentMetadata(TypedDict):
|
|
40
|
+
"""Complete metadata for a Google Docs document including tab hierarchy."""
|
|
41
|
+
|
|
42
|
+
documentId: str
|
|
43
|
+
"""The unique identifier of the document."""
|
|
44
|
+
|
|
45
|
+
title: str
|
|
46
|
+
"""The title of the document."""
|
|
47
|
+
|
|
48
|
+
documentUrl: str
|
|
49
|
+
"""The URL to open and edit the document in Google Docs."""
|
|
50
|
+
|
|
51
|
+
approximateTotalCharacterCount: int
|
|
52
|
+
"""Approximate total number of characters across all tabs (or main body if no tabs)."""
|
|
53
|
+
|
|
54
|
+
approximateTotalWordCount: int
|
|
55
|
+
"""Approximate total number of words across all tabs (or main body if no tabs)."""
|
|
56
|
+
|
|
57
|
+
tabsCount: int
|
|
58
|
+
"""The total number of tabs in the document."""
|
|
59
|
+
|
|
60
|
+
tabs: list[dict]
|
|
61
|
+
"""List of tabs with hierarchical structure (each follows TabMetadata structure)."""
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class DocumentContentResult(TypedDict):
|
|
65
|
+
"""A document with its content in a specific format and metadata."""
|
|
66
|
+
|
|
67
|
+
documentId: str
|
|
68
|
+
"""The unique identifier of the document."""
|
|
69
|
+
|
|
70
|
+
title: str
|
|
71
|
+
"""The title of the document."""
|
|
72
|
+
|
|
73
|
+
documentUrl: str
|
|
74
|
+
"""The URL to open and edit the document in Google Docs."""
|
|
75
|
+
|
|
76
|
+
content: str
|
|
77
|
+
"""The document content in the requested format (markdown, HTML, or DocMD)."""
|
|
78
|
+
|
|
79
|
+
format: str
|
|
80
|
+
"""The format of the content: 'markdown', 'html', 'docmd', or 'google_api_json'."""
|
|
81
|
+
|
|
82
|
+
tabs_count: int
|
|
83
|
+
"""The number of tabs in the document (0 if no tabs)."""
|
|
84
|
+
|
|
85
|
+
total_character_count: int
|
|
86
|
+
"""Approximate total character count across all tabs or main body if no tabs."""
|
|
87
|
+
|
|
88
|
+
total_word_count: int
|
|
89
|
+
"""Approximate total word count across all tabs or main body if no tabs."""
|
|
90
|
+
|
|
91
|
+
main_body_character_count: int
|
|
92
|
+
"""Approximate character count of the main body content only (0 if document has tabs)."""
|
|
93
|
+
|
|
94
|
+
main_body_word_count: int
|
|
95
|
+
"""Approximate word count of the main body content only (0 if document has tabs)."""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
class DocumentListItem(TypedDict):
|
|
99
|
+
"""Metadata for a document from search results."""
|
|
100
|
+
|
|
101
|
+
id: str
|
|
102
|
+
"""The unique identifier of the document."""
|
|
103
|
+
|
|
104
|
+
name: str
|
|
105
|
+
"""The name/title of the document."""
|
|
106
|
+
|
|
107
|
+
kind: str
|
|
108
|
+
"""The kind of the resource (typically 'drive#file')."""
|
|
109
|
+
|
|
110
|
+
mimeType: str
|
|
111
|
+
"""The MIME type (typically 'application/vnd.google-apps.document')."""
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
class SearchDocumentsResponse(TypedDict, total=False):
|
|
115
|
+
"""Response from search_documents with document metadata and pagination."""
|
|
116
|
+
|
|
117
|
+
documents_count: int
|
|
118
|
+
"""The number of documents returned in this response."""
|
|
119
|
+
|
|
120
|
+
documents: list[dict]
|
|
121
|
+
"""List of document metadata matching search criteria."""
|
|
122
|
+
|
|
123
|
+
pagination_token: str
|
|
124
|
+
"""Token to retrieve the next page of results (if available)."""
|
|
125
|
+
|
|
126
|
+
has_more: bool
|
|
127
|
+
"""Whether there are more documents available to retrieve."""
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
class SearchAndRetrieveResponse(TypedDict, total=False):
|
|
131
|
+
"""Response from search_and_retrieve_documents with full content and metadata."""
|
|
132
|
+
|
|
133
|
+
documents_count: int
|
|
134
|
+
"""The number of documents returned in this response."""
|
|
135
|
+
|
|
136
|
+
documents: list[dict]
|
|
137
|
+
"""List of documents with their content and metadata."""
|
|
138
|
+
|
|
139
|
+
pagination_token: str
|
|
140
|
+
"""Token to retrieve the next page of results (if available)."""
|
|
141
|
+
|
|
142
|
+
has_more: bool
|
|
143
|
+
"""Whether there are more documents available to retrieve."""
|
|
@@ -7,7 +7,11 @@ from arcade_google_docs.tools.create import (
|
|
|
7
7
|
create_document_from_text,
|
|
8
8
|
)
|
|
9
9
|
from arcade_google_docs.tools.file_picker import generate_google_file_picker_url
|
|
10
|
-
from arcade_google_docs.tools.get import
|
|
10
|
+
from arcade_google_docs.tools.get import (
|
|
11
|
+
get_document_as_docmd,
|
|
12
|
+
get_document_by_id,
|
|
13
|
+
get_document_metadata,
|
|
14
|
+
)
|
|
11
15
|
from arcade_google_docs.tools.search import (
|
|
12
16
|
search_and_retrieve_documents,
|
|
13
17
|
search_documents,
|
|
@@ -18,7 +22,9 @@ from arcade_google_docs.tools.update import insert_text_at_end_of_document
|
|
|
18
22
|
__all__ = [
|
|
19
23
|
"create_blank_document",
|
|
20
24
|
"create_document_from_text",
|
|
25
|
+
"get_document_as_docmd",
|
|
21
26
|
"get_document_by_id",
|
|
27
|
+
"get_document_metadata",
|
|
22
28
|
"comment_on_document",
|
|
23
29
|
"list_document_comments",
|
|
24
30
|
"insert_text_at_end_of_document",
|
|
@@ -15,7 +15,9 @@ def get_docmd(google_service: Any, document_id: str) -> DocMD:
|
|
|
15
15
|
Returns:
|
|
16
16
|
DocMD object
|
|
17
17
|
"""
|
|
18
|
-
google_get_response =
|
|
18
|
+
google_get_response = (
|
|
19
|
+
google_service.documents().get(documentId=document_id, includeTabsContent=True).execute()
|
|
20
|
+
)
|
|
19
21
|
document = Document(**google_get_response)
|
|
20
22
|
docmd = build_docmd(document)
|
|
21
23
|
return docmd
|
arcade_google_docs/tools/get.py
CHANGED
|
@@ -5,7 +5,15 @@ from arcade_tdk.auth import Google
|
|
|
5
5
|
|
|
6
6
|
from arcade_google_docs.docmd import build_docmd
|
|
7
7
|
from arcade_google_docs.models.document import Document
|
|
8
|
-
from arcade_google_docs.
|
|
8
|
+
from arcade_google_docs.models.responses import DocumentMetadata
|
|
9
|
+
from arcade_google_docs.utils import (
|
|
10
|
+
_calculate_character_count,
|
|
11
|
+
_calculate_word_count,
|
|
12
|
+
build_docs_service,
|
|
13
|
+
build_tab_metadata_recursive,
|
|
14
|
+
count_tab_chars_recursive,
|
|
15
|
+
count_tab_words_recursive,
|
|
16
|
+
)
|
|
9
17
|
|
|
10
18
|
|
|
11
19
|
# Uses https://developers.google.com/docs/api/reference/rest/v1/documents/get
|
|
@@ -28,9 +36,7 @@ async def get_document_by_id(
|
|
|
28
36
|
"""
|
|
29
37
|
service = build_docs_service(context.get_auth_token_or_empty())
|
|
30
38
|
|
|
31
|
-
|
|
32
|
-
# https://developers.google.com/docs/api/reference/rest/v1/documents#Document
|
|
33
|
-
request = service.documents().get(documentId=document_id)
|
|
39
|
+
request = service.documents().get(documentId=document_id, includeTabsContent=True)
|
|
34
40
|
response = request.execute()
|
|
35
41
|
return dict(response)
|
|
36
42
|
|
|
@@ -45,14 +51,65 @@ async def get_document_by_id(
|
|
|
45
51
|
async def get_document_as_docmd(
|
|
46
52
|
context: ToolContext,
|
|
47
53
|
document_id: Annotated[str, "The ID of the document to retrieve."],
|
|
54
|
+
tab_id: Annotated[
|
|
55
|
+
str | None,
|
|
56
|
+
"The ID of a specific tab to retrieve. If provided, returns only content from that tab. "
|
|
57
|
+
"If omitted, returns all tabs in sequential depth-first order.",
|
|
58
|
+
] = None,
|
|
48
59
|
) -> Annotated[str, "The document contents as DocMD"]:
|
|
49
60
|
"""
|
|
50
61
|
Get the latest version of the specified Google Docs document as DocMD.
|
|
51
62
|
The DocMD output will include tags that can be used to annotate the document with location
|
|
52
|
-
information, the type of block, block IDs, and other metadata.
|
|
63
|
+
information, the type of block, block IDs, and other metadata. If the document has tabs,
|
|
64
|
+
all tabs are included in sequential order unless a specific tab_id is provided.
|
|
53
65
|
"""
|
|
54
66
|
service = build_docs_service(context.get_auth_token_or_empty())
|
|
55
67
|
|
|
56
|
-
request = service.documents().get(documentId=document_id)
|
|
68
|
+
request = service.documents().get(documentId=document_id, includeTabsContent=True)
|
|
57
69
|
response = request.execute()
|
|
58
|
-
return build_docmd(Document(**response)).to_string()
|
|
70
|
+
return build_docmd(Document(**response), tab_id=tab_id).to_string()
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@tool(
|
|
74
|
+
requires_auth=Google(
|
|
75
|
+
scopes=[
|
|
76
|
+
"https://www.googleapis.com/auth/drive.file",
|
|
77
|
+
],
|
|
78
|
+
),
|
|
79
|
+
)
|
|
80
|
+
async def get_document_metadata(
|
|
81
|
+
context: ToolContext,
|
|
82
|
+
document_id: Annotated[str, "The ID of the document to get metadata for"],
|
|
83
|
+
) -> Annotated[DocumentMetadata, "Document metadata including hierarchical tab structure"]:
|
|
84
|
+
"""
|
|
85
|
+
Get metadata for a Google Docs document including hierarchical tab structure.
|
|
86
|
+
Returns document title, ID, URL, total character count, and nested tab information
|
|
87
|
+
with character counts for each tab.
|
|
88
|
+
"""
|
|
89
|
+
service = build_docs_service(context.get_auth_token_or_empty())
|
|
90
|
+
|
|
91
|
+
request = service.documents().get(documentId=document_id, includeTabsContent=True)
|
|
92
|
+
response = request.execute()
|
|
93
|
+
document = Document(**response)
|
|
94
|
+
|
|
95
|
+
total_char_count = 0
|
|
96
|
+
total_word_count = 0
|
|
97
|
+
tabs_metadata: list = []
|
|
98
|
+
|
|
99
|
+
if document.tabs and len(document.tabs) > 0:
|
|
100
|
+
tabs_metadata = build_tab_metadata_recursive(document.tabs)
|
|
101
|
+
total_char_count = sum(count_tab_chars_recursive(tab) for tab in tabs_metadata)
|
|
102
|
+
total_word_count = sum(count_tab_words_recursive(tab) for tab in tabs_metadata)
|
|
103
|
+
elif document.body:
|
|
104
|
+
total_char_count = _calculate_character_count(document.body.content)
|
|
105
|
+
total_word_count = _calculate_word_count(document.body.content)
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
"documentId": document.documentId or "",
|
|
109
|
+
"title": document.title or "",
|
|
110
|
+
"documentUrl": f"https://docs.google.com/document/d/{document.documentId}/edit",
|
|
111
|
+
"approximateTotalCharacterCount": total_char_count,
|
|
112
|
+
"approximateTotalWordCount": total_word_count,
|
|
113
|
+
"tabsCount": len(tabs_metadata),
|
|
114
|
+
"tabs": tabs_metadata,
|
|
115
|
+
}
|
|
@@ -3,15 +3,18 @@ from typing import Annotated, Any
|
|
|
3
3
|
from arcade_tdk import ToolContext, tool
|
|
4
4
|
from arcade_tdk.auth import Google
|
|
5
5
|
|
|
6
|
-
from arcade_google_docs.doc_to_html import convert_document_to_html
|
|
7
|
-
from arcade_google_docs.doc_to_markdown import convert_document_to_markdown
|
|
8
|
-
from arcade_google_docs.docmd import build_docmd
|
|
9
6
|
from arcade_google_docs.enum import DocumentFormat, OrderBy
|
|
10
7
|
from arcade_google_docs.models.document import Document
|
|
8
|
+
from arcade_google_docs.models.responses import (
|
|
9
|
+
SearchAndRetrieveResponse,
|
|
10
|
+
SearchDocumentsResponse,
|
|
11
|
+
)
|
|
11
12
|
from arcade_google_docs.tools import get_document_by_id
|
|
12
13
|
from arcade_google_docs.utils import (
|
|
14
|
+
build_document_content_result,
|
|
13
15
|
build_drive_service,
|
|
14
16
|
build_files_list_params,
|
|
17
|
+
build_search_retrieve_response,
|
|
15
18
|
)
|
|
16
19
|
|
|
17
20
|
|
|
@@ -63,12 +66,12 @@ async def search_documents(
|
|
|
63
66
|
str | None, "The pagination token to continue a previous request"
|
|
64
67
|
] = None,
|
|
65
68
|
) -> Annotated[
|
|
66
|
-
|
|
67
|
-
"
|
|
68
|
-
"(a list of document details including 'kind', 'mimeType', 'id', and 'name' for each document)",
|
|
69
|
+
SearchDocumentsResponse,
|
|
70
|
+
"Document count, list of documents, pagination token, and has_more flag",
|
|
69
71
|
]:
|
|
70
72
|
"""
|
|
71
|
-
Searches for documents in the user's Google Drive. Excludes documents
|
|
73
|
+
Searches for documents in the user's Google Drive. Excludes documents in trash.
|
|
74
|
+
Returns metadata only. Use get_document_metadata or get_document_as_docmd for content.
|
|
72
75
|
"""
|
|
73
76
|
if document_contains or document_not_contains:
|
|
74
77
|
# Google drive API does not support other order_by values for
|
|
@@ -111,11 +114,17 @@ async def search_documents(
|
|
|
111
114
|
if not pagination_token or len(batch) < page_size:
|
|
112
115
|
break
|
|
113
116
|
|
|
114
|
-
|
|
117
|
+
response_dict: dict = {
|
|
115
118
|
"documents_count": len(files),
|
|
116
119
|
"documents": files,
|
|
120
|
+
"has_more": pagination_token is not None,
|
|
117
121
|
}
|
|
118
122
|
|
|
123
|
+
if pagination_token:
|
|
124
|
+
response_dict["pagination_token"] = pagination_token
|
|
125
|
+
|
|
126
|
+
return response_dict # type: ignore[return-value]
|
|
127
|
+
|
|
119
128
|
|
|
120
129
|
@tool(
|
|
121
130
|
requires_auth=Google(
|
|
@@ -163,18 +172,18 @@ async def search_and_retrieve_documents(
|
|
|
163
172
|
str | None, "The pagination token to continue a previous request"
|
|
164
173
|
] = None,
|
|
165
174
|
) -> Annotated[
|
|
166
|
-
|
|
167
|
-
"A dictionary containing
|
|
168
|
-
"
|
|
175
|
+
SearchAndRetrieveResponse,
|
|
176
|
+
"A dictionary containing document count, list of documents with content and metadata, "
|
|
177
|
+
"pagination token, and has_more flag",
|
|
169
178
|
]:
|
|
170
179
|
"""
|
|
171
|
-
Searches for documents in the user's Google Drive and returns
|
|
172
|
-
content
|
|
180
|
+
Searches for documents in the user's Google Drive and returns documents with their main body
|
|
181
|
+
content and tab metadata. Excludes documents that are in the trash.
|
|
173
182
|
|
|
174
|
-
|
|
175
|
-
|
|
183
|
+
Returns main body content only with metadata about tabs. Use get_document_as_docmd() to retrieve
|
|
184
|
+
full tab content for specific documents. Use search_documents() for metadata-only searches.
|
|
176
185
|
"""
|
|
177
|
-
|
|
186
|
+
search_response = await search_documents(
|
|
178
187
|
context=context,
|
|
179
188
|
document_contains=document_contains,
|
|
180
189
|
document_not_contains=document_not_contains,
|
|
@@ -186,21 +195,12 @@ async def search_and_retrieve_documents(
|
|
|
186
195
|
pagination_token=pagination_token,
|
|
187
196
|
)
|
|
188
197
|
|
|
189
|
-
documents = []
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
document =
|
|
198
|
+
documents: list = []
|
|
199
|
+
for item in search_response["documents"]:
|
|
200
|
+
doc_dict = await get_document_by_id(context, document_id=item["id"])
|
|
201
|
+
document = Document(**doc_dict)
|
|
202
|
+
doc_result = build_document_content_result(document, doc_dict, return_format)
|
|
203
|
+
documents.append(doc_result)
|
|
193
204
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
elif return_format == DocumentFormat.MARKDOWN:
|
|
197
|
-
document = convert_document_to_markdown(document)
|
|
198
|
-
elif return_format == DocumentFormat.HTML:
|
|
199
|
-
document = convert_document_to_html(document)
|
|
200
|
-
|
|
201
|
-
documents.append(document)
|
|
202
|
-
|
|
203
|
-
return {
|
|
204
|
-
"documents_count": len(documents),
|
|
205
|
-
"documents": documents,
|
|
206
|
-
}
|
|
205
|
+
result = build_search_retrieve_response(documents, search_response)
|
|
206
|
+
return result # type: ignore[return-value]
|