arcade-google-docs 4.3.2__py3-none-any.whl → 5.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcade_google_docs/__init__.py +6 -0
- arcade_google_docs/doc_to_html.py +195 -4
- arcade_google_docs/doc_to_markdown.py +181 -3
- arcade_google_docs/docmd.py +115 -36
- arcade_google_docs/models/responses.py +143 -0
- arcade_google_docs/tools/__init__.py +7 -1
- arcade_google_docs/tools/edit_agent/utils.py +3 -1
- arcade_google_docs/tools/get.py +64 -7
- arcade_google_docs/tools/search.py +33 -33
- arcade_google_docs/utils.py +264 -3
- arcade_google_docs/who_am_i_util.py +1 -1
- {arcade_google_docs-4.3.2.dist-info → arcade_google_docs-5.0.1.dist-info}/METADATA +1 -1
- {arcade_google_docs-4.3.2.dist-info → arcade_google_docs-5.0.1.dist-info}/RECORD +14 -13
- {arcade_google_docs-4.3.2.dist-info → arcade_google_docs-5.0.1.dist-info}/WHEEL +1 -1
arcade_google_docs/utils.py
CHANGED
|
@@ -4,7 +4,11 @@ from typing import Any
|
|
|
4
4
|
from google.oauth2.credentials import Credentials
|
|
5
5
|
from googleapiclient.discovery import Resource, build
|
|
6
6
|
|
|
7
|
-
from arcade_google_docs.
|
|
7
|
+
from arcade_google_docs.doc_to_html import convert_document_to_html
|
|
8
|
+
from arcade_google_docs.doc_to_markdown import convert_document_to_markdown
|
|
9
|
+
from arcade_google_docs.docmd import build_docmd
|
|
10
|
+
from arcade_google_docs.enum import Corpora, DocumentFormat, OrderBy
|
|
11
|
+
from arcade_google_docs.models.document import StructuralElement
|
|
8
12
|
|
|
9
13
|
## Set up basic configuration for logging to the console with DEBUG level and a specific format.
|
|
10
14
|
logging.basicConfig(
|
|
@@ -20,7 +24,7 @@ def build_docs_service(auth_token: str | None) -> Resource: # type: ignore[no-a
|
|
|
20
24
|
Build a Drive service object.
|
|
21
25
|
"""
|
|
22
26
|
auth_token = auth_token or ""
|
|
23
|
-
return build("docs", "v1", credentials=Credentials(auth_token))
|
|
27
|
+
return build("docs", "v1", credentials=Credentials(auth_token), cache_discovery=False)
|
|
24
28
|
|
|
25
29
|
|
|
26
30
|
def build_drive_service(auth_token: str | None) -> Resource: # type: ignore[no-any-unimported]
|
|
@@ -28,7 +32,7 @@ def build_drive_service(auth_token: str | None) -> Resource: # type: ignore[no-
|
|
|
28
32
|
Build a Drive service object.
|
|
29
33
|
"""
|
|
30
34
|
auth_token = auth_token or ""
|
|
31
|
-
return build("drive", "v3", credentials=Credentials(auth_token))
|
|
35
|
+
return build("drive", "v3", credentials=Credentials(auth_token), cache_discovery=False)
|
|
32
36
|
|
|
33
37
|
|
|
34
38
|
def build_files_list_params(
|
|
@@ -117,3 +121,260 @@ def remove_none_values(params: dict) -> dict:
|
|
|
117
121
|
:return: A new dictionary with None values removed
|
|
118
122
|
"""
|
|
119
123
|
return {k: v for k, v in params.items() if v is not None}
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def build_tab_metadata_recursive(
|
|
127
|
+
tabs: list, max_depth: int = 4, current_depth: int = 0
|
|
128
|
+
) -> list[Any]:
|
|
129
|
+
"""Build hierarchical tab metadata preserving nested structure.
|
|
130
|
+
|
|
131
|
+
Args:
|
|
132
|
+
tabs: List of Tab objects with potential childTabs
|
|
133
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
134
|
+
current_depth: Current recursion depth
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
List of TabMetadata dicts with nested childTabs
|
|
138
|
+
"""
|
|
139
|
+
if current_depth >= max_depth:
|
|
140
|
+
return []
|
|
141
|
+
|
|
142
|
+
result: list[Any] = []
|
|
143
|
+
|
|
144
|
+
for tab in tabs:
|
|
145
|
+
if not tab.tabProperties:
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
char_count = 0
|
|
149
|
+
word_count = 0
|
|
150
|
+
if tab.documentTab and tab.documentTab.body:
|
|
151
|
+
char_count = _calculate_character_count(tab.documentTab.body.content)
|
|
152
|
+
word_count = _calculate_word_count(tab.documentTab.body.content)
|
|
153
|
+
|
|
154
|
+
nesting_level = tab.tabProperties.nestingLevel or 0
|
|
155
|
+
if not isinstance(nesting_level, int) or nesting_level < 0:
|
|
156
|
+
nesting_level = 0
|
|
157
|
+
|
|
158
|
+
tab_meta_dict: dict = {
|
|
159
|
+
"tabId": tab.tabProperties.tabId or "",
|
|
160
|
+
"title": tab.tabProperties.title or "",
|
|
161
|
+
"index": tab.tabProperties.index or 0,
|
|
162
|
+
"nestingLevel": nesting_level,
|
|
163
|
+
"approximateCharacterCount": char_count,
|
|
164
|
+
"approximateWordCount": word_count,
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if tab.tabProperties.parentTabId:
|
|
168
|
+
tab_meta_dict["parentTabId"] = tab.tabProperties.parentTabId
|
|
169
|
+
|
|
170
|
+
if tab.childTabs and current_depth < max_depth - 1:
|
|
171
|
+
tab_meta_dict["childTabs"] = build_tab_metadata_recursive(
|
|
172
|
+
tab.childTabs, max_depth, current_depth + 1
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
result.append(tab_meta_dict)
|
|
176
|
+
|
|
177
|
+
return result
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def count_tab_chars_recursive(tab_meta: dict) -> int:
|
|
181
|
+
"""Recursively count characters in a tab and its children.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
tab_meta: TabMetadata dict potentially with childTabs
|
|
185
|
+
|
|
186
|
+
Returns:
|
|
187
|
+
Total character count including all descendants
|
|
188
|
+
"""
|
|
189
|
+
count: int = tab_meta.get("approximateCharacterCount", 0)
|
|
190
|
+
if "childTabs" in tab_meta:
|
|
191
|
+
count += sum(count_tab_chars_recursive(child) for child in tab_meta["childTabs"])
|
|
192
|
+
return count
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
def count_tab_words_recursive(tab_meta: dict) -> int:
|
|
196
|
+
"""Recursively count words in a tab and its children.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
tab_meta: TabMetadata dict potentially with childTabs
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
Total word count including all descendants
|
|
203
|
+
"""
|
|
204
|
+
count: int = tab_meta.get("approximateWordCount", 0)
|
|
205
|
+
if "childTabs" in tab_meta:
|
|
206
|
+
count += sum(count_tab_words_recursive(child) for child in tab_meta["childTabs"])
|
|
207
|
+
return count
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _calculate_character_count(content: list[StructuralElement] | None) -> int:
|
|
211
|
+
"""Calculate total character count from body content.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
content: List of structural elements from a body
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Total number of characters in the content
|
|
218
|
+
"""
|
|
219
|
+
if not content:
|
|
220
|
+
return 0
|
|
221
|
+
|
|
222
|
+
char_count = 0
|
|
223
|
+
for element in content:
|
|
224
|
+
if element.paragraph:
|
|
225
|
+
for el in element.paragraph.elements or []:
|
|
226
|
+
if el.textRun and el.textRun.content:
|
|
227
|
+
char_count += len(el.textRun.content)
|
|
228
|
+
elif element.table:
|
|
229
|
+
for row in element.table.tableRows or []:
|
|
230
|
+
for cell in row.tableCells or []:
|
|
231
|
+
char_count += _calculate_character_count(cell.content)
|
|
232
|
+
|
|
233
|
+
return char_count
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def _calculate_word_count(content: list[StructuralElement] | None) -> int:
|
|
237
|
+
"""Calculate total word count from body content.
|
|
238
|
+
|
|
239
|
+
Args:
|
|
240
|
+
content: List of structural elements from a body
|
|
241
|
+
|
|
242
|
+
Returns:
|
|
243
|
+
Total number of words in the content
|
|
244
|
+
"""
|
|
245
|
+
if not content:
|
|
246
|
+
return 0
|
|
247
|
+
|
|
248
|
+
word_count = 0
|
|
249
|
+
for element in content:
|
|
250
|
+
if element.paragraph:
|
|
251
|
+
for el in element.paragraph.elements or []:
|
|
252
|
+
if el.textRun and el.textRun.content:
|
|
253
|
+
text = el.textRun.content.strip()
|
|
254
|
+
if text:
|
|
255
|
+
word_count += len(text.split())
|
|
256
|
+
elif element.table:
|
|
257
|
+
for row in element.table.tableRows or []:
|
|
258
|
+
for cell in row.tableCells or []:
|
|
259
|
+
word_count += _calculate_word_count(cell.content)
|
|
260
|
+
|
|
261
|
+
return word_count
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
def calculate_total_tabs_characters(tabs: list, max_depth: int = 4, current_depth: int = 0) -> int:
|
|
265
|
+
"""Calculate total character count from all tabs recursively.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
tabs: List of Tab objects
|
|
269
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
270
|
+
current_depth: Current recursion depth
|
|
271
|
+
|
|
272
|
+
Returns:
|
|
273
|
+
Total character count across all tabs
|
|
274
|
+
"""
|
|
275
|
+
if current_depth >= max_depth:
|
|
276
|
+
return 0
|
|
277
|
+
|
|
278
|
+
total = 0
|
|
279
|
+
for tab in tabs:
|
|
280
|
+
if tab.documentTab and tab.documentTab.body:
|
|
281
|
+
total += _calculate_character_count(tab.documentTab.body.content)
|
|
282
|
+
if tab.childTabs:
|
|
283
|
+
total += calculate_total_tabs_characters(tab.childTabs, max_depth, current_depth + 1)
|
|
284
|
+
return total
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def calculate_total_tabs_words(tabs: list, max_depth: int = 4, current_depth: int = 0) -> int:
|
|
288
|
+
"""Calculate total word count from all tabs recursively.
|
|
289
|
+
|
|
290
|
+
Args:
|
|
291
|
+
tabs: List of Tab objects
|
|
292
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
293
|
+
current_depth: Current recursion depth
|
|
294
|
+
|
|
295
|
+
Returns:
|
|
296
|
+
Total word count across all tabs
|
|
297
|
+
"""
|
|
298
|
+
if current_depth >= max_depth:
|
|
299
|
+
return 0
|
|
300
|
+
|
|
301
|
+
total = 0
|
|
302
|
+
for tab in tabs:
|
|
303
|
+
if tab.documentTab and tab.documentTab.body:
|
|
304
|
+
total += _calculate_word_count(tab.documentTab.body.content)
|
|
305
|
+
if tab.childTabs:
|
|
306
|
+
total += calculate_total_tabs_words(tab.childTabs, max_depth, current_depth + 1)
|
|
307
|
+
return total
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def build_document_content_result(document: Any, doc_dict: dict, return_format: Any) -> dict:
|
|
311
|
+
"""Build a DocumentContentResult from a document.
|
|
312
|
+
|
|
313
|
+
Args:
|
|
314
|
+
document: Parsed Document object
|
|
315
|
+
doc_dict: Raw document dict for conversion functions
|
|
316
|
+
return_format: Desired output format (DocumentFormat enum)
|
|
317
|
+
|
|
318
|
+
Returns:
|
|
319
|
+
DocumentContentResult with content and metadata
|
|
320
|
+
"""
|
|
321
|
+
tabs_count = len(document.tabs) if document.tabs else 0
|
|
322
|
+
|
|
323
|
+
total_char_count = 0
|
|
324
|
+
total_word_count = 0
|
|
325
|
+
main_body_char_count = 0
|
|
326
|
+
main_body_word_count = 0
|
|
327
|
+
|
|
328
|
+
if document.tabs and len(document.tabs) > 0:
|
|
329
|
+
total_char_count = calculate_total_tabs_characters(document.tabs)
|
|
330
|
+
total_word_count = calculate_total_tabs_words(document.tabs)
|
|
331
|
+
|
|
332
|
+
if document.body:
|
|
333
|
+
main_body_char_count = _calculate_character_count(document.body.content)
|
|
334
|
+
main_body_word_count = _calculate_word_count(document.body.content)
|
|
335
|
+
if tabs_count == 0:
|
|
336
|
+
total_char_count = main_body_char_count
|
|
337
|
+
total_word_count = main_body_word_count
|
|
338
|
+
|
|
339
|
+
content: str
|
|
340
|
+
if return_format == DocumentFormat.DOCMD:
|
|
341
|
+
content = build_docmd(document).to_string()
|
|
342
|
+
elif return_format == DocumentFormat.MARKDOWN:
|
|
343
|
+
content = convert_document_to_markdown(doc_dict, include_all_tabs=True)
|
|
344
|
+
else:
|
|
345
|
+
content = convert_document_to_html(doc_dict, include_all_tabs=True)
|
|
346
|
+
|
|
347
|
+
return {
|
|
348
|
+
"documentId": document.documentId or "",
|
|
349
|
+
"title": document.title or "",
|
|
350
|
+
"documentUrl": f"https://docs.google.com/document/d/{document.documentId}/edit",
|
|
351
|
+
"content": content,
|
|
352
|
+
"format": return_format.value,
|
|
353
|
+
"tabs_count": tabs_count,
|
|
354
|
+
"total_character_count": total_char_count,
|
|
355
|
+
"total_word_count": total_word_count,
|
|
356
|
+
"main_body_character_count": main_body_char_count,
|
|
357
|
+
"main_body_word_count": main_body_word_count,
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def build_search_retrieve_response(documents: list[dict], search_response: dict) -> dict:
|
|
362
|
+
"""Build final response for search and retrieve operation.
|
|
363
|
+
|
|
364
|
+
Args:
|
|
365
|
+
documents: List of processed documents with content
|
|
366
|
+
search_response: Response from search_documents
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Complete search and retrieve response
|
|
370
|
+
"""
|
|
371
|
+
result_dict: dict = {
|
|
372
|
+
"documents_count": len(documents),
|
|
373
|
+
"documents": documents,
|
|
374
|
+
"has_more": search_response["has_more"],
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if "pagination_token" in search_response:
|
|
378
|
+
result_dict["pagination_token"] = search_response["pagination_token"]
|
|
379
|
+
|
|
380
|
+
return result_dict
|
|
@@ -74,7 +74,7 @@ def _extract_google_docs_info(docs_service: Any) -> dict[str, Any]:
|
|
|
74
74
|
|
|
75
75
|
def _build_people_service(credentials: Credentials) -> Any:
|
|
76
76
|
"""Build and return the People API service client."""
|
|
77
|
-
return build("people", "v1", credentials=credentials)
|
|
77
|
+
return build("people", "v1", credentials=credentials, cache_discovery=False)
|
|
78
78
|
|
|
79
79
|
|
|
80
80
|
def _get_people_api_data(people_service: Any) -> dict[str, Any]:
|
|
@@ -1,20 +1,21 @@
|
|
|
1
|
-
arcade_google_docs/__init__.py,sha256=
|
|
2
|
-
arcade_google_docs/doc_to_html.py,sha256=
|
|
3
|
-
arcade_google_docs/doc_to_markdown.py,sha256=
|
|
4
|
-
arcade_google_docs/docmd.py,sha256=
|
|
1
|
+
arcade_google_docs/__init__.py,sha256=IjNb08C6cdnCIQDsBGj1_t_FlNizeBqkyhNiOrfZiMY,746
|
|
2
|
+
arcade_google_docs/doc_to_html.py,sha256=qD2QR17J4EOPsQYaNVdfHJl4rPISytOw3QaocOxs4g4,7836
|
|
3
|
+
arcade_google_docs/doc_to_markdown.py,sha256=Hq651d2nk48rO7piGncSPZx4KFEGVu-mBQ6kFJUGi8w,6295
|
|
4
|
+
arcade_google_docs/docmd.py,sha256=Dw8ZAgR_9Df9nKoJSvmslU4fbUtCuteW5bfmyXytwnU,23324
|
|
5
5
|
arcade_google_docs/enum.py,sha256=kuXlsHcMYbN28Qg-Dwp4viz-CZ8z85_WVjQVZj2EsEY,3441
|
|
6
6
|
arcade_google_docs/templates.py,sha256=pxbdMj57eV3-ImW3CixDWscpVKS94Z8nTNyTxDhUfGY,283
|
|
7
|
-
arcade_google_docs/utils.py,sha256=
|
|
8
|
-
arcade_google_docs/who_am_i_util.py,sha256=
|
|
7
|
+
arcade_google_docs/utils.py,sha256=lANqXh6rvKjGBcqFbs-_0cQsA80oHywey4ZKUBXQYYQ,12527
|
|
8
|
+
arcade_google_docs/who_am_i_util.py,sha256=Atg64UcY3wrwKi71k9e33Stm70_xeCriwisTISb-GDk,2948
|
|
9
9
|
arcade_google_docs/models/document.py,sha256=0RvZ2_dfpz6ZoF1aUucYWOkRYWy_K_hiChSzoQtwhTc,30419
|
|
10
10
|
arcade_google_docs/models/document_writables.py,sha256=DMBT5A05y7o7_PYlBB6O3KThma6-pm6hd5nxKGydT5Q,27575
|
|
11
11
|
arcade_google_docs/models/requests.py,sha256=8Cga7QECmQWNFhM2QiGudvnQcgA_THi7ThNsUb7uavg,52176
|
|
12
|
-
arcade_google_docs/
|
|
12
|
+
arcade_google_docs/models/responses.py,sha256=82douA0aEvc90FzeYqnxzCB0MkhxqvrzBl_IbfIrPr0,4308
|
|
13
|
+
arcade_google_docs/tools/__init__.py,sha256=JqdaeKJqlFI4cXWEOg960uQCeiIksdD0EDEEOcVu3_0,1025
|
|
13
14
|
arcade_google_docs/tools/comment.py,sha256=Qm5NHdNHONs3j4gqbZ7Fw9NrTVBb_mZ-th1X-z2IoLM,2836
|
|
14
15
|
arcade_google_docs/tools/create.py,sha256=AuYy8yMGscrxAdLJQX0WiisGHCTufSlaRu_QGMMKQmM,2764
|
|
15
16
|
arcade_google_docs/tools/file_picker.py,sha256=Dqn-hfMoTsWyHM8QCakVgHr5TKrzL_1Lj-vYHVGtOW4,2342
|
|
16
|
-
arcade_google_docs/tools/get.py,sha256=
|
|
17
|
-
arcade_google_docs/tools/search.py,sha256=
|
|
17
|
+
arcade_google_docs/tools/get.py,sha256=VQpFxnposGMGS1ulw_3EurIsX3LLi8P3Qxz0X4a0vA0,4297
|
|
18
|
+
arcade_google_docs/tools/search.py,sha256=H_u_zJhUIAwWAkn_iGqbjcnh8z5lWyGK480Y5RpsM1U,8286
|
|
18
19
|
arcade_google_docs/tools/system_context.py,sha256=19HPSpNkLsb-MDWc-9CFgK_ha-rRzwaJr7hV6Us_1LI,1130
|
|
19
20
|
arcade_google_docs/tools/update.py,sha256=_dReYit0s7ykn2bYQEUwohl3D_63U5leF87egO4eEiQ,1836
|
|
20
21
|
arcade_google_docs/tools/edit_agent/edit_agent.py,sha256=1LIgKrQ70pDWzWNoaoy1st659perqa-ZW_ALmbVRbW0,2439
|
|
@@ -23,8 +24,8 @@ arcade_google_docs/tools/edit_agent/planner.py,sha256=38aslAnlPDEY3JEoVXtHqL3Oq_
|
|
|
23
24
|
arcade_google_docs/tools/edit_agent/progress_tracker.py,sha256=eb69tk-yL3uhEEo4ggPoBFtZtHTA6OwgcPxELtvbeEs,1280
|
|
24
25
|
arcade_google_docs/tools/edit_agent/prompts.py,sha256=M_f-HsPJppd3FQPhRAw7pKpaArkVfz213mKVx8qHp8A,15149
|
|
25
26
|
arcade_google_docs/tools/edit_agent/request_generator.py,sha256=eVDmzJDsOmJ-S8yANmJATt_G51rgCcCga6ArX4DTShM,5399
|
|
26
|
-
arcade_google_docs/tools/edit_agent/utils.py,sha256=
|
|
27
|
+
arcade_google_docs/tools/edit_agent/utils.py,sha256=uAxhI43KALjDiD6yxlb8sLoPAV-oHKrF5jk2lgITPv0,680
|
|
27
28
|
arcade_google_docs/tools/edit_agent/models/planning.py,sha256=RWQFB_KHl3Pq-snv1rHzoRxVvTnHVLZEGRpdohSX7wc,2962
|
|
28
|
-
arcade_google_docs-
|
|
29
|
-
arcade_google_docs-
|
|
30
|
-
arcade_google_docs-
|
|
29
|
+
arcade_google_docs-5.0.1.dist-info/METADATA,sha256=WoZXPsv3jEyVQW0aMD7WvG5Tu31RhZ74nPLqnL3eZ9Q,1128
|
|
30
|
+
arcade_google_docs-5.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
31
|
+
arcade_google_docs-5.0.1.dist-info/RECORD,,
|