arcade-google-docs 4.3.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -54,12 +54,15 @@ Example DocMD as a string:
54
54
  from collections.abc import Callable
55
55
  from enum import Enum
56
56
 
57
+ from arcade_tdk.errors import RetryableToolError
57
58
  from pydantic import BaseModel
58
59
 
59
60
  from arcade_google_docs.models.document import (
60
61
  Document,
61
62
  NamedStyleType,
62
63
  Paragraph,
64
+ StructuralElement,
65
+ Tab,
63
66
  Table,
64
67
  TextStyle,
65
68
  )
@@ -188,10 +191,9 @@ class DocMD(BaseModel):
188
191
  )
189
192
 
190
193
 
191
- def build_docmd(document: Document) -> DocMD: # noqa: C901
194
+ def build_docmd(document: Document, tab_id: str | None = None) -> DocMD:
192
195
  doc_id = document.documentId or ""
193
196
  rev = document.revisionId
194
- tab = ""
195
197
 
196
198
  counters: dict[str, int] = {
197
199
  "H": 0,
@@ -210,7 +212,73 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
210
212
 
211
213
  blocks: list[DocMDBlock] = []
212
214
 
213
- for se in document.body.content or [] if document.body else []:
215
+ if document.tabs and len(document.tabs) > 0:
216
+ flattened_tabs = _flatten_tabs_depth_first(document.tabs)
217
+
218
+ if tab_id:
219
+ matching_tabs = [
220
+ t for t in flattened_tabs if t.tabProperties and t.tabProperties.tabId == tab_id
221
+ ]
222
+ if not matching_tabs:
223
+ available_ids = [t.tabProperties.tabId for t in flattened_tabs if t.tabProperties]
224
+ raise RetryableToolError(
225
+ message=f"Tab with ID '{tab_id}' not found in document",
226
+ additional_prompt_content=f"Available tab IDs: {available_ids}",
227
+ retry_after_ms=100,
228
+ )
229
+ flattened_tabs = matching_tabs
230
+
231
+ for tab_obj in flattened_tabs:
232
+ if not tab_obj.documentTab or not tab_obj.tabProperties:
233
+ continue
234
+
235
+ tab_metadata = {
236
+ "tabId": tab_obj.tabProperties.tabId or "",
237
+ "title": tab_obj.tabProperties.title or "",
238
+ "nestingLevel": str(tab_obj.tabProperties.nestingLevel or 0),
239
+ "index": str(tab_obj.tabProperties.index or 0),
240
+ }
241
+ if tab_obj.tabProperties.parentTabId:
242
+ tab_metadata["parentTabId"] = tab_obj.tabProperties.parentTabId
243
+
244
+ body_content = []
245
+ if tab_obj.documentTab.body and tab_obj.documentTab.body.content:
246
+ body_content = tab_obj.documentTab.body.content
247
+ _process_body_content(
248
+ body_content,
249
+ next_id,
250
+ tab_metadata,
251
+ blocks,
252
+ )
253
+ else:
254
+ body_content = []
255
+ if document.body and document.body.content:
256
+ body_content = document.body.content
257
+ _process_body_content(
258
+ body_content,
259
+ next_id,
260
+ {},
261
+ blocks,
262
+ )
263
+
264
+ return DocMD(documentId=doc_id, revisionId=rev, tab="", blocks=blocks)
265
+
266
+
267
+ def _process_body_content(
268
+ content: list[StructuralElement],
269
+ next_id_func: Callable[[str], str],
270
+ tab_metadata: dict[str, str],
271
+ blocks: list[DocMDBlock],
272
+ ) -> None:
273
+ """Process structural elements from a body (main document or tab).
274
+
275
+ Args:
276
+ content: List of structural elements to process
277
+ next_id_func: Function to generate unique block IDs
278
+ tab_metadata: Dict with tab information (tabId, title, nestingLevel, etc.)
279
+ blocks: List to append processed blocks to
280
+ """
281
+ for se in content:
214
282
  if se.paragraph is not None:
215
283
  p: Paragraph = se.paragraph
216
284
  named = p.paragraphStyle.namedStyleType if p.paragraphStyle else None
@@ -225,34 +293,30 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
225
293
 
226
294
  block_type: str
227
295
  block_id: str
228
- attrs: dict[str, str] = {}
229
- # Only add tab attribute if it's not empty (not the default tab)
230
- if tab:
231
- attrs["tab"] = tab
296
+ attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
232
297
 
233
298
  if is_heading:
234
299
  level = int(str(named).split("_")[-1])
235
300
  block_type = f"HEADING_{level}"
236
- block_id = next_id("H")
301
+ block_id = next_id_func("H")
237
302
  if p.paragraphStyle and p.paragraphStyle.headingId:
238
303
  attrs["headingId"] = p.paragraphStyle.headingId
239
304
  else:
240
305
  if p.bullet and p.bullet.listId:
241
306
  block_type = DocMDBlockType.UL_ITEM.value
242
- block_id = next_id("UL")
307
+ block_id = next_id_func("UL")
243
308
  attrs["listId"] = p.bullet.listId
244
309
  if p.bullet.nestingLevel is not None:
245
310
  attrs["level"] = str(p.bullet.nestingLevel)
246
311
  else:
247
312
  block_type = DocMDBlockType.PARAGRAPH.value
248
- block_id = next_id("P")
313
+ block_id = next_id_func("P")
249
314
 
250
315
  vis_start, vis_end, text, style_runs = _visible_span_and_text(p)
251
316
  start = vis_start if vis_start is not None else se.startIndex or 0
252
317
  end = vis_end if vis_end is not None else se.endIndex or start
253
318
  text_line = (text or "").rstrip("\n")
254
319
 
255
- # Add style ranges to attrs if any styles are present
256
320
  if style_runs:
257
321
  style_ranges = _format_style_ranges(style_runs, start)
258
322
  if style_ranges:
@@ -270,25 +334,19 @@ def build_docmd(document: Document) -> DocMD: # noqa: C901
270
334
  )
271
335
 
272
336
  elif se.table is not None:
273
- _process_table(se.table, se, next_id, tab, blocks)
274
-
275
- return DocMD(documentId=doc_id, revisionId=rev, tab=tab, blocks=blocks)
337
+ _process_table(se.table, se, next_id_func, tab_metadata, blocks)
276
338
 
277
339
 
278
340
  def _process_table( # type: ignore[no-untyped-def]
279
341
  table: Table,
280
342
  se,
281
343
  next_id_func: Callable[[str], str],
282
- tab: str,
344
+ tab_metadata: dict[str, str],
283
345
  blocks: list[DocMDBlock],
284
346
  ) -> None:
285
347
  """Process a table structural element and add table/row/cell blocks."""
286
348
  table_id = next_id_func("TABLE")
287
- table_attrs: dict[str, str] = {}
288
-
289
- # Only add tab attribute if it's not empty (not the default tab)
290
- if tab:
291
- table_attrs["tab"] = tab
349
+ table_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
292
350
 
293
351
  if table.rows is not None:
294
352
  table_attrs["rows"] = str(table.rows)
@@ -310,7 +368,7 @@ def _process_table( # type: ignore[no-untyped-def]
310
368
  )
311
369
 
312
370
  for row_idx, table_row in enumerate(table.tableRows or []):
313
- _process_table_row(table_row, row_idx, table_start, next_id_func, tab, blocks)
371
+ _process_table_row(table_row, row_idx, table_start, next_id_func, tab_metadata, blocks)
314
372
 
315
373
 
316
374
  def _process_table_row( # type: ignore[no-untyped-def]
@@ -318,15 +376,13 @@ def _process_table_row( # type: ignore[no-untyped-def]
318
376
  row_idx: int,
319
377
  table_start: int,
320
378
  next_id_func: Callable[[str], str],
321
- tab: str,
379
+ tab_metadata: dict[str, str],
322
380
  blocks: list[DocMDBlock],
323
381
  ) -> None:
324
382
  """Process a table row and add row/cell blocks."""
325
383
  row_id = next_id_func("TR")
326
- row_attrs: dict[str, str] = {"row": str(row_idx)}
327
-
328
- if tab:
329
- row_attrs["tab"] = tab
384
+ row_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
385
+ row_attrs["row"] = str(row_idx)
330
386
 
331
387
  row_start = table_row.startIndex or table_start
332
388
  row_end = table_row.endIndex or row_start
@@ -343,27 +399,25 @@ def _process_table_row( # type: ignore[no-untyped-def]
343
399
  )
344
400
 
345
401
  for cell_idx, table_cell in enumerate(table_row.tableCells or []):
346
- _process_table_cell(table_cell, row_idx, cell_idx, row_start, next_id_func, tab, blocks)
402
+ _process_table_cell(
403
+ table_cell, row_idx, cell_idx, row_start, next_id_func, tab_metadata, blocks
404
+ )
347
405
 
348
406
 
349
- def _process_table_cell( # type: ignore[no-untyped-def] # noqa: C901
407
+ def _process_table_cell( # type: ignore[no-untyped-def]
350
408
  table_cell,
351
409
  row_idx: int,
352
410
  cell_idx: int,
353
411
  row_start: int,
354
412
  next_id_func: Callable[[str], str],
355
- tab: str,
413
+ tab_metadata: dict[str, str],
356
414
  blocks: list[DocMDBlock],
357
415
  ) -> None:
358
416
  """Process a table cell and add cell block."""
359
417
  cell_id = next_id_func("TC")
360
- cell_attrs: dict[str, str] = {
361
- "row": str(row_idx),
362
- "col": str(cell_idx),
363
- }
364
-
365
- if tab:
366
- cell_attrs["tab"] = tab
418
+ cell_attrs: dict[str, str] = tab_metadata.copy() if tab_metadata else {}
419
+ cell_attrs["row"] = str(row_idx)
420
+ cell_attrs["col"] = str(cell_idx)
367
421
 
368
422
  # Add cell styling attributes if present
369
423
  if (
@@ -532,3 +586,28 @@ def _format_style_ranges(style_runs: list[dict], block_start: int) -> str:
532
586
  consolidated.append(f"{style_str}:{abs_start}-{abs_end}")
533
587
 
534
588
  return ",".join(consolidated) if consolidated else ""
589
+
590
+
591
+ def _flatten_tabs_depth_first(
592
+ tabs: list[Tab] | None, max_depth: int = 4, current_depth: int = 0
593
+ ) -> list[Tab]:
594
+ """Flatten tab hierarchy using depth-first traversal.
595
+
596
+ Args:
597
+ tabs: List of Tab objects, potentially with nested childTabs
598
+ max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
599
+ current_depth: Current recursion depth (internal use)
600
+
601
+ Returns:
602
+ Flattened list of tabs in depth-first order (parent → children → grandchildren)
603
+ """
604
+ if not tabs or current_depth >= max_depth:
605
+ return []
606
+
607
+ result: list[Tab] = []
608
+ for tab in tabs:
609
+ result.append(tab)
610
+ if tab.childTabs:
611
+ result.extend(_flatten_tabs_depth_first(tab.childTabs, max_depth, current_depth + 1))
612
+
613
+ return result
@@ -0,0 +1,143 @@
1
+ """
2
+ TypedDict response models for Google Docs tools.
3
+
4
+ These models define the structure of responses returned by Google Docs tools,
5
+ with field descriptions as string literals for tool compatibility.
6
+ """
7
+
8
+ from typing import TypedDict
9
+
10
+
11
+ class TabMetadata(TypedDict, total=False):
12
+ """Metadata for a single tab in a Google Docs document."""
13
+
14
+ tabId: str
15
+ """The unique identifier of the tab."""
16
+
17
+ title: str
18
+ """The title/name of the tab."""
19
+
20
+ index: int
21
+ """The position of the tab among its siblings (0-indexed)."""
22
+
23
+ nestingLevel: int
24
+ """The nesting depth (0 for top-level, 1 for child, 2 for grandchild)."""
25
+
26
+ approximateCharacterCount: int
27
+ """Approximate number of characters in this tab's content (excluding child tabs)."""
28
+
29
+ approximateWordCount: int
30
+ """Approximate number of words in this tab's content (excluding child tabs)."""
31
+
32
+ parentTabId: str
33
+ """The ID of the parent tab (if this is a nested tab)."""
34
+
35
+ childTabs: list[dict]
36
+ """List of nested child tabs within this tab (each follows TabMetadata structure)."""
37
+
38
+
39
+ class DocumentMetadata(TypedDict):
40
+ """Complete metadata for a Google Docs document including tab hierarchy."""
41
+
42
+ documentId: str
43
+ """The unique identifier of the document."""
44
+
45
+ title: str
46
+ """The title of the document."""
47
+
48
+ documentUrl: str
49
+ """The URL to open and edit the document in Google Docs."""
50
+
51
+ approximateTotalCharacterCount: int
52
+ """Approximate total number of characters across all tabs (or main body if no tabs)."""
53
+
54
+ approximateTotalWordCount: int
55
+ """Approximate total number of words across all tabs (or main body if no tabs)."""
56
+
57
+ tabsCount: int
58
+ """The total number of tabs in the document."""
59
+
60
+ tabs: list[dict]
61
+ """List of tabs with hierarchical structure (each follows TabMetadata structure)."""
62
+
63
+
64
+ class DocumentContentResult(TypedDict):
65
+ """A document with its content in a specific format and metadata."""
66
+
67
+ documentId: str
68
+ """The unique identifier of the document."""
69
+
70
+ title: str
71
+ """The title of the document."""
72
+
73
+ documentUrl: str
74
+ """The URL to open and edit the document in Google Docs."""
75
+
76
+ content: str
77
+ """The document content in the requested format (markdown, HTML, or DocMD)."""
78
+
79
+ format: str
80
+ """The format of the content: 'markdown', 'html', 'docmd', or 'google_api_json'."""
81
+
82
+ tabs_count: int
83
+ """The number of tabs in the document (0 if no tabs)."""
84
+
85
+ total_character_count: int
86
+ """Approximate total character count across all tabs or main body if no tabs."""
87
+
88
+ total_word_count: int
89
+ """Approximate total word count across all tabs or main body if no tabs."""
90
+
91
+ main_body_character_count: int
92
+ """Approximate character count of the main body content only (0 if document has tabs)."""
93
+
94
+ main_body_word_count: int
95
+ """Approximate word count of the main body content only (0 if document has tabs)."""
96
+
97
+
98
+ class DocumentListItem(TypedDict):
99
+ """Metadata for a document from search results."""
100
+
101
+ id: str
102
+ """The unique identifier of the document."""
103
+
104
+ name: str
105
+ """The name/title of the document."""
106
+
107
+ kind: str
108
+ """The kind of the resource (typically 'drive#file')."""
109
+
110
+ mimeType: str
111
+ """The MIME type (typically 'application/vnd.google-apps.document')."""
112
+
113
+
114
+ class SearchDocumentsResponse(TypedDict, total=False):
115
+ """Response from search_documents with document metadata and pagination."""
116
+
117
+ documents_count: int
118
+ """The number of documents returned in this response."""
119
+
120
+ documents: list[dict]
121
+ """List of document metadata matching search criteria."""
122
+
123
+ pagination_token: str
124
+ """Token to retrieve the next page of results (if available)."""
125
+
126
+ has_more: bool
127
+ """Whether there are more documents available to retrieve."""
128
+
129
+
130
+ class SearchAndRetrieveResponse(TypedDict, total=False):
131
+ """Response from search_and_retrieve_documents with full content and metadata."""
132
+
133
+ documents_count: int
134
+ """The number of documents returned in this response."""
135
+
136
+ documents: list[dict]
137
+ """List of documents with their content and metadata."""
138
+
139
+ pagination_token: str
140
+ """Token to retrieve the next page of results (if available)."""
141
+
142
+ has_more: bool
143
+ """Whether there are more documents available to retrieve."""
@@ -7,7 +7,11 @@ from arcade_google_docs.tools.create import (
7
7
  create_document_from_text,
8
8
  )
9
9
  from arcade_google_docs.tools.file_picker import generate_google_file_picker_url
10
- from arcade_google_docs.tools.get import get_document_by_id
10
+ from arcade_google_docs.tools.get import (
11
+ get_document_as_docmd,
12
+ get_document_by_id,
13
+ get_document_metadata,
14
+ )
11
15
  from arcade_google_docs.tools.search import (
12
16
  search_and_retrieve_documents,
13
17
  search_documents,
@@ -18,7 +22,9 @@ from arcade_google_docs.tools.update import insert_text_at_end_of_document
18
22
  __all__ = [
19
23
  "create_blank_document",
20
24
  "create_document_from_text",
25
+ "get_document_as_docmd",
21
26
  "get_document_by_id",
27
+ "get_document_metadata",
22
28
  "comment_on_document",
23
29
  "list_document_comments",
24
30
  "insert_text_at_end_of_document",
@@ -15,7 +15,9 @@ def get_docmd(google_service: Any, document_id: str) -> DocMD:
15
15
  Returns:
16
16
  DocMD object
17
17
  """
18
- google_get_response = google_service.documents().get(documentId=document_id).execute()
18
+ google_get_response = (
19
+ google_service.documents().get(documentId=document_id, includeTabsContent=True).execute()
20
+ )
19
21
  document = Document(**google_get_response)
20
22
  docmd = build_docmd(document)
21
23
  return docmd
@@ -5,7 +5,15 @@ from arcade_tdk.auth import Google
5
5
 
6
6
  from arcade_google_docs.docmd import build_docmd
7
7
  from arcade_google_docs.models.document import Document
8
- from arcade_google_docs.utils import build_docs_service
8
+ from arcade_google_docs.models.responses import DocumentMetadata
9
+ from arcade_google_docs.utils import (
10
+ _calculate_character_count,
11
+ _calculate_word_count,
12
+ build_docs_service,
13
+ build_tab_metadata_recursive,
14
+ count_tab_chars_recursive,
15
+ count_tab_words_recursive,
16
+ )
9
17
 
10
18
 
11
19
  # Uses https://developers.google.com/docs/api/reference/rest/v1/documents/get
@@ -28,9 +36,7 @@ async def get_document_by_id(
28
36
  """
29
37
  service = build_docs_service(context.get_auth_token_or_empty())
30
38
 
31
- # Execute the documents().get() method. Returns a Document object
32
- # https://developers.google.com/docs/api/reference/rest/v1/documents#Document
33
- request = service.documents().get(documentId=document_id)
39
+ request = service.documents().get(documentId=document_id, includeTabsContent=True)
34
40
  response = request.execute()
35
41
  return dict(response)
36
42
 
@@ -45,14 +51,65 @@ async def get_document_by_id(
45
51
  async def get_document_as_docmd(
46
52
  context: ToolContext,
47
53
  document_id: Annotated[str, "The ID of the document to retrieve."],
54
+ tab_id: Annotated[
55
+ str | None,
56
+ "The ID of a specific tab to retrieve. If provided, returns only content from that tab. "
57
+ "If omitted, returns all tabs in sequential depth-first order.",
58
+ ] = None,
48
59
  ) -> Annotated[str, "The document contents as DocMD"]:
49
60
  """
50
61
  Get the latest version of the specified Google Docs document as DocMD.
51
62
  The DocMD output will include tags that can be used to annotate the document with location
52
- information, the type of block, block IDs, and other metadata.
63
+ information, the type of block, block IDs, and other metadata. If the document has tabs,
64
+ all tabs are included in sequential order unless a specific tab_id is provided.
53
65
  """
54
66
  service = build_docs_service(context.get_auth_token_or_empty())
55
67
 
56
- request = service.documents().get(documentId=document_id)
68
+ request = service.documents().get(documentId=document_id, includeTabsContent=True)
57
69
  response = request.execute()
58
- return build_docmd(Document(**response)).to_string()
70
+ return build_docmd(Document(**response), tab_id=tab_id).to_string()
71
+
72
+
73
+ @tool(
74
+ requires_auth=Google(
75
+ scopes=[
76
+ "https://www.googleapis.com/auth/drive.file",
77
+ ],
78
+ ),
79
+ )
80
+ async def get_document_metadata(
81
+ context: ToolContext,
82
+ document_id: Annotated[str, "The ID of the document to get metadata for"],
83
+ ) -> Annotated[DocumentMetadata, "Document metadata including hierarchical tab structure"]:
84
+ """
85
+ Get metadata for a Google Docs document including hierarchical tab structure.
86
+ Returns document title, ID, URL, total character count, and nested tab information
87
+ with character counts for each tab.
88
+ """
89
+ service = build_docs_service(context.get_auth_token_or_empty())
90
+
91
+ request = service.documents().get(documentId=document_id, includeTabsContent=True)
92
+ response = request.execute()
93
+ document = Document(**response)
94
+
95
+ total_char_count = 0
96
+ total_word_count = 0
97
+ tabs_metadata: list = []
98
+
99
+ if document.tabs and len(document.tabs) > 0:
100
+ tabs_metadata = build_tab_metadata_recursive(document.tabs)
101
+ total_char_count = sum(count_tab_chars_recursive(tab) for tab in tabs_metadata)
102
+ total_word_count = sum(count_tab_words_recursive(tab) for tab in tabs_metadata)
103
+ elif document.body:
104
+ total_char_count = _calculate_character_count(document.body.content)
105
+ total_word_count = _calculate_word_count(document.body.content)
106
+
107
+ return {
108
+ "documentId": document.documentId or "",
109
+ "title": document.title or "",
110
+ "documentUrl": f"https://docs.google.com/document/d/{document.documentId}/edit",
111
+ "approximateTotalCharacterCount": total_char_count,
112
+ "approximateTotalWordCount": total_word_count,
113
+ "tabsCount": len(tabs_metadata),
114
+ "tabs": tabs_metadata,
115
+ }
@@ -3,15 +3,18 @@ from typing import Annotated, Any
3
3
  from arcade_tdk import ToolContext, tool
4
4
  from arcade_tdk.auth import Google
5
5
 
6
- from arcade_google_docs.doc_to_html import convert_document_to_html
7
- from arcade_google_docs.doc_to_markdown import convert_document_to_markdown
8
- from arcade_google_docs.docmd import build_docmd
9
6
  from arcade_google_docs.enum import DocumentFormat, OrderBy
10
7
  from arcade_google_docs.models.document import Document
8
+ from arcade_google_docs.models.responses import (
9
+ SearchAndRetrieveResponse,
10
+ SearchDocumentsResponse,
11
+ )
11
12
  from arcade_google_docs.tools import get_document_by_id
12
13
  from arcade_google_docs.utils import (
14
+ build_document_content_result,
13
15
  build_drive_service,
14
16
  build_files_list_params,
17
+ build_search_retrieve_response,
15
18
  )
16
19
 
17
20
 
@@ -63,12 +66,12 @@ async def search_documents(
63
66
  str | None, "The pagination token to continue a previous request"
64
67
  ] = None,
65
68
  ) -> Annotated[
66
- dict,
67
- "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
68
- "(a list of document details including 'kind', 'mimeType', 'id', and 'name' for each document)",
69
+ SearchDocumentsResponse,
70
+ "Document count, list of documents, pagination token, and has_more flag",
69
71
  ]:
70
72
  """
71
- Searches for documents in the user's Google Drive. Excludes documents that are in the trash.
73
+ Searches for documents in the user's Google Drive. Excludes documents in trash.
74
+ Returns metadata only. Use get_document_metadata or get_document_as_docmd for content.
72
75
  """
73
76
  if document_contains or document_not_contains:
74
77
  # Google drive API does not support other order_by values for
@@ -111,11 +114,17 @@ async def search_documents(
111
114
  if not pagination_token or len(batch) < page_size:
112
115
  break
113
116
 
114
- return {
117
+ response_dict: dict = {
115
118
  "documents_count": len(files),
116
119
  "documents": files,
120
+ "has_more": pagination_token is not None,
117
121
  }
118
122
 
123
+ if pagination_token:
124
+ response_dict["pagination_token"] = pagination_token
125
+
126
+ return response_dict # type: ignore[return-value]
127
+
119
128
 
120
129
  @tool(
121
130
  requires_auth=Google(
@@ -163,18 +172,18 @@ async def search_and_retrieve_documents(
163
172
  str | None, "The pagination token to continue a previous request"
164
173
  ] = None,
165
174
  ) -> Annotated[
166
- dict,
167
- "A dictionary containing 'documents_count' (number of documents returned) and 'documents' "
168
- "(a list of documents with their content).",
175
+ SearchAndRetrieveResponse,
176
+ "A dictionary containing document count, list of documents with content and metadata, "
177
+ "pagination token, and has_more flag",
169
178
  ]:
170
179
  """
171
- Searches for documents in the user's Google Drive and returns a list of documents (with text
172
- content) matching the search criteria. Excludes documents that are in the trash.
180
+ Searches for documents in the user's Google Drive and returns documents with their main body
181
+ content and tab metadata. Excludes documents that are in the trash.
173
182
 
174
- Note: use this tool only when the user prompt requires the documents' content. If the user only
175
- needs a list of documents, use the `search_documents` tool instead.
183
+ Returns main body content only with metadata about tabs. Use get_document_as_docmd() to retrieve
184
+ full tab content for specific documents. Use search_documents() for metadata-only searches.
176
185
  """
177
- response = await search_documents(
186
+ search_response = await search_documents(
178
187
  context=context,
179
188
  document_contains=document_contains,
180
189
  document_not_contains=document_not_contains,
@@ -186,21 +195,12 @@ async def search_and_retrieve_documents(
186
195
  pagination_token=pagination_token,
187
196
  )
188
197
 
189
- documents = []
190
-
191
- for item in response["documents"]:
192
- document = await get_document_by_id(context, document_id=item["id"])
198
+ documents: list = []
199
+ for item in search_response["documents"]:
200
+ doc_dict = await get_document_by_id(context, document_id=item["id"])
201
+ document = Document(**doc_dict)
202
+ doc_result = build_document_content_result(document, doc_dict, return_format)
203
+ documents.append(doc_result)
193
204
 
194
- if return_format == DocumentFormat.DOCMD:
195
- document = build_docmd(Document(**document)).to_string()
196
- elif return_format == DocumentFormat.MARKDOWN:
197
- document = convert_document_to_markdown(document)
198
- elif return_format == DocumentFormat.HTML:
199
- document = convert_document_to_html(document)
200
-
201
- documents.append(document)
202
-
203
- return {
204
- "documents_count": len(documents),
205
- "documents": documents,
206
- }
205
+ result = build_search_retrieve_response(documents, search_response)
206
+ return result # type: ignore[return-value]