arcade-google-docs 4.3.1__py3-none-any.whl → 5.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- arcade_google_docs/__init__.py +6 -0
- arcade_google_docs/doc_to_html.py +195 -4
- arcade_google_docs/doc_to_markdown.py +181 -3
- arcade_google_docs/docmd.py +115 -36
- arcade_google_docs/models/responses.py +143 -0
- arcade_google_docs/tools/__init__.py +7 -1
- arcade_google_docs/tools/edit_agent/utils.py +3 -1
- arcade_google_docs/tools/get.py +64 -7
- arcade_google_docs/tools/search.py +33 -33
- arcade_google_docs/utils.py +262 -1
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/METADATA +4 -4
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/RECORD +13 -12
- {arcade_google_docs-4.3.1.dist-info → arcade_google_docs-5.0.0.dist-info}/WHEEL +0 -0
arcade_google_docs/__init__.py
CHANGED
|
@@ -3,11 +3,14 @@ from arcade_google_docs.tools import (
|
|
|
3
3
|
create_blank_document,
|
|
4
4
|
create_document_from_text,
|
|
5
5
|
generate_google_file_picker_url,
|
|
6
|
+
get_document_as_docmd,
|
|
6
7
|
get_document_by_id,
|
|
8
|
+
get_document_metadata,
|
|
7
9
|
insert_text_at_end_of_document,
|
|
8
10
|
list_document_comments,
|
|
9
11
|
search_and_retrieve_documents,
|
|
10
12
|
search_documents,
|
|
13
|
+
who_am_i,
|
|
11
14
|
)
|
|
12
15
|
|
|
13
16
|
__all__ = [
|
|
@@ -15,9 +18,12 @@ __all__ = [
|
|
|
15
18
|
"create_document_from_text",
|
|
16
19
|
"comment_on_document",
|
|
17
20
|
"list_document_comments",
|
|
21
|
+
"get_document_as_docmd",
|
|
18
22
|
"get_document_by_id",
|
|
23
|
+
"get_document_metadata",
|
|
19
24
|
"insert_text_at_end_of_document",
|
|
20
25
|
"search_and_retrieve_documents",
|
|
21
26
|
"search_documents",
|
|
22
27
|
"generate_google_file_picker_url",
|
|
28
|
+
"who_am_i",
|
|
23
29
|
]
|
|
@@ -1,17 +1,47 @@
|
|
|
1
|
-
|
|
1
|
+
"""
|
|
2
|
+
Google Docs to HTML converter.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import html as html_module
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def convert_document_to_html(document: dict, include_all_tabs: bool = True) -> str:
|
|
9
|
+
"""Convert a Google Docs document to HTML format.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
document: Document dict from Google Docs API
|
|
13
|
+
include_all_tabs: Whether to include all tabs (True) or just main body (False)
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
HTML string representation of the document
|
|
17
|
+
"""
|
|
18
|
+
escaped_title = html_module.escape(document.get("title", ""))
|
|
2
19
|
html = (
|
|
3
20
|
"<html><head>"
|
|
4
|
-
f"<title>{
|
|
21
|
+
f"<title>{escaped_title}</title>"
|
|
5
22
|
f'<meta name="documentId" content="{document["documentId"]}">'
|
|
6
23
|
"</head><body>"
|
|
7
24
|
)
|
|
8
|
-
|
|
9
|
-
|
|
25
|
+
|
|
26
|
+
if include_all_tabs and "tabs" in document and document["tabs"]:
|
|
27
|
+
html += _convert_tabs_to_html(document["tabs"])
|
|
28
|
+
else:
|
|
29
|
+
html += _convert_body_to_html(document.get("body", {}))
|
|
30
|
+
|
|
10
31
|
html += "</body></html>"
|
|
11
32
|
return html
|
|
12
33
|
|
|
13
34
|
|
|
14
35
|
def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> str:
|
|
36
|
+
"""Convert a structural element to HTML.
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
element: Structural element dict
|
|
40
|
+
wrap_paragraphs: Whether to wrap paragraphs in <p> tags
|
|
41
|
+
|
|
42
|
+
Returns:
|
|
43
|
+
HTML string
|
|
44
|
+
"""
|
|
15
45
|
if "sectionBreak" in element or "tableOfContents" in element:
|
|
16
46
|
return ""
|
|
17
47
|
|
|
@@ -51,12 +81,30 @@ def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> s
|
|
|
51
81
|
|
|
52
82
|
|
|
53
83
|
def extract_paragraph_content(text_run: dict) -> str:
|
|
84
|
+
"""Extract content from a paragraph text run.
|
|
85
|
+
|
|
86
|
+
Args:
|
|
87
|
+
text_run: Text run dict
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Styled HTML string
|
|
91
|
+
"""
|
|
54
92
|
content = text_run["content"]
|
|
55
93
|
style = text_run["textStyle"]
|
|
56
94
|
return apply_text_style(content, style)
|
|
57
95
|
|
|
58
96
|
|
|
59
97
|
def apply_text_style(content: str, style: dict) -> str:
|
|
98
|
+
"""Apply text styling to content.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
content: Text content
|
|
102
|
+
style: Style dict
|
|
103
|
+
|
|
104
|
+
Returns:
|
|
105
|
+
Styled content with HTML tags
|
|
106
|
+
"""
|
|
107
|
+
content = content.replace("\u000b", "\n") # Replace vertical tab with newline
|
|
60
108
|
content = content.rstrip("\n")
|
|
61
109
|
content = content.replace("\n", "<br>")
|
|
62
110
|
italic = style.get("italic", False)
|
|
@@ -69,6 +117,15 @@ def apply_text_style(content: str, style: dict) -> str:
|
|
|
69
117
|
|
|
70
118
|
|
|
71
119
|
def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple[str, str]:
|
|
120
|
+
"""Get HTML opening and closing tags for paragraph style.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
style: Paragraph style dict
|
|
124
|
+
wrap_paragraphs: Whether to wrap in paragraph tags
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Tuple of (opening_tag, closing_tag)
|
|
128
|
+
"""
|
|
72
129
|
named_style = style["namedStyleType"]
|
|
73
130
|
if named_style == "NORMAL_TEXT":
|
|
74
131
|
return ("<p>", "</p>") if wrap_paragraphs else ("", "")
|
|
@@ -87,6 +144,14 @@ def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple
|
|
|
87
144
|
|
|
88
145
|
|
|
89
146
|
def table_list_to_html(table: list[list[str]]) -> str:
|
|
147
|
+
"""Convert a table list to HTML.
|
|
148
|
+
|
|
149
|
+
Args:
|
|
150
|
+
table: List of rows, where each row is a list of cell contents
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
HTML table string
|
|
154
|
+
"""
|
|
90
155
|
html = "<table>"
|
|
91
156
|
for row in table:
|
|
92
157
|
html += "<tr>"
|
|
@@ -97,3 +162,129 @@ def table_list_to_html(table: list[list[str]]) -> str:
|
|
|
97
162
|
html += "</tr>"
|
|
98
163
|
html += "</table>"
|
|
99
164
|
return html
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
def _flatten_tabs_for_html(
|
|
168
|
+
tabs: list[dict], max_depth: int = 4, current_depth: int = 0
|
|
169
|
+
) -> list[dict]:
|
|
170
|
+
"""Flatten tab hierarchy using depth-first traversal for HTML conversion.
|
|
171
|
+
|
|
172
|
+
Args:
|
|
173
|
+
tabs: List of tab dicts with potential childTabs
|
|
174
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
175
|
+
current_depth: Current recursion depth
|
|
176
|
+
|
|
177
|
+
Returns:
|
|
178
|
+
Flattened list in depth-first order
|
|
179
|
+
"""
|
|
180
|
+
if current_depth >= max_depth:
|
|
181
|
+
return []
|
|
182
|
+
|
|
183
|
+
result: list[dict] = []
|
|
184
|
+
for tab in tabs:
|
|
185
|
+
result.append(tab)
|
|
186
|
+
if tab.get("childTabs"):
|
|
187
|
+
result.extend(_flatten_tabs_for_html(tab["childTabs"], max_depth, current_depth + 1))
|
|
188
|
+
return result
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def _convert_tabs_to_html(tabs: list[dict]) -> str:
|
|
192
|
+
"""Convert all tabs to HTML format.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
tabs: List of tab dicts
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
HTML string for all tabs
|
|
199
|
+
"""
|
|
200
|
+
html = ""
|
|
201
|
+
flattened_tabs = _flatten_tabs_for_html(tabs)
|
|
202
|
+
|
|
203
|
+
for tab in flattened_tabs:
|
|
204
|
+
html += _convert_single_tab_to_html(tab)
|
|
205
|
+
|
|
206
|
+
return html
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def _convert_single_tab_to_html(tab: dict) -> str:
|
|
210
|
+
"""Convert a single tab to HTML.
|
|
211
|
+
|
|
212
|
+
Args:
|
|
213
|
+
tab: Single tab dict
|
|
214
|
+
|
|
215
|
+
Returns:
|
|
216
|
+
HTML string for the tab
|
|
217
|
+
"""
|
|
218
|
+
if "documentTab" not in tab or "tabProperties" not in tab:
|
|
219
|
+
return ""
|
|
220
|
+
|
|
221
|
+
tab_props = tab.get("tabProperties")
|
|
222
|
+
if not tab_props:
|
|
223
|
+
return ""
|
|
224
|
+
|
|
225
|
+
nesting_level = _validate_nesting_level_for_html(tab_props.get("nestingLevel", 0))
|
|
226
|
+
tab_title = tab_props.get("title", "Untitled")
|
|
227
|
+
tab_id = tab_props.get("tabId", "")
|
|
228
|
+
|
|
229
|
+
escaped_tab_id = html_module.escape(tab_id, quote=True)
|
|
230
|
+
escaped_tab_title = html_module.escape(tab_title, quote=True)
|
|
231
|
+
|
|
232
|
+
header_level = min(nesting_level + 1, 6)
|
|
233
|
+
html = (
|
|
234
|
+
f'<section id="tab-{escaped_tab_id}" data-title="{escaped_tab_title}" '
|
|
235
|
+
f'data-level="{nesting_level}">'
|
|
236
|
+
f"<h{header_level}>{html_module.escape(tab_title)}</h{header_level}>"
|
|
237
|
+
)
|
|
238
|
+
|
|
239
|
+
html += _convert_tab_body_to_html(tab.get("documentTab", {}))
|
|
240
|
+
html += "</section>"
|
|
241
|
+
|
|
242
|
+
return html
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _convert_body_to_html(body: dict) -> str:
|
|
246
|
+
"""Convert document body to HTML.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
body: Body dict with content
|
|
250
|
+
|
|
251
|
+
Returns:
|
|
252
|
+
HTML string
|
|
253
|
+
"""
|
|
254
|
+
html = ""
|
|
255
|
+
for element in body.get("content", []):
|
|
256
|
+
html += convert_structural_element(element)
|
|
257
|
+
return html
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def _convert_tab_body_to_html(doc_tab: dict) -> str:
|
|
261
|
+
"""Convert tab body content to HTML.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
doc_tab: DocumentTab dict
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
HTML string
|
|
268
|
+
"""
|
|
269
|
+
body = doc_tab.get("body")
|
|
270
|
+
if not body:
|
|
271
|
+
return ""
|
|
272
|
+
|
|
273
|
+
html = ""
|
|
274
|
+
for element in body.get("content", []):
|
|
275
|
+
html += convert_structural_element(element)
|
|
276
|
+
return html
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _validate_nesting_level_for_html(nesting_level: int) -> int:
|
|
280
|
+
"""Validate and clamp nesting level to safe range.
|
|
281
|
+
|
|
282
|
+
Args:
|
|
283
|
+
nesting_level: The nesting level to validate
|
|
284
|
+
|
|
285
|
+
Returns:
|
|
286
|
+
Validated nesting level (0-5)
|
|
287
|
+
"""
|
|
288
|
+
if not isinstance(nesting_level, int) or nesting_level < 0:
|
|
289
|
+
return 0
|
|
290
|
+
return nesting_level
|
|
@@ -1,14 +1,43 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Google Docs to Markdown converter.
|
|
3
|
+
|
|
4
|
+
File organization:
|
|
5
|
+
1. Public functions (convert_document_to_markdown, convert_structural_element, etc.)
|
|
6
|
+
2. Private helper functions (prefixed with _) at the end
|
|
7
|
+
"""
|
|
8
|
+
|
|
1
9
|
import arcade_google_docs.doc_to_html as doc_to_html
|
|
2
10
|
|
|
3
11
|
|
|
4
|
-
def convert_document_to_markdown(document: dict) -> str:
|
|
12
|
+
def convert_document_to_markdown(document: dict, include_all_tabs: bool = True) -> str:
|
|
13
|
+
"""Convert a Google Docs document to Markdown format.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
document: Document dict from Google Docs API
|
|
17
|
+
include_all_tabs: Whether to include all tabs (True) or just main body (False)
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
Markdown string representation of the document
|
|
21
|
+
"""
|
|
5
22
|
md = f"---\ntitle: {document['title']}\ndocumentId: {document['documentId']}\n---\n"
|
|
6
|
-
|
|
7
|
-
|
|
23
|
+
|
|
24
|
+
if include_all_tabs and "tabs" in document and document["tabs"]:
|
|
25
|
+
md += _convert_tabs_to_markdown(document["tabs"])
|
|
26
|
+
else:
|
|
27
|
+
md += _convert_body_to_markdown(document.get("body", {}))
|
|
28
|
+
|
|
8
29
|
return md
|
|
9
30
|
|
|
10
31
|
|
|
11
32
|
def convert_structural_element(element: dict) -> str:
|
|
33
|
+
"""Convert a structural element to markdown.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
element: Structural element dict
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
Markdown string
|
|
40
|
+
"""
|
|
12
41
|
if "sectionBreak" in element or "tableOfContents" in element:
|
|
13
42
|
return ""
|
|
14
43
|
|
|
@@ -30,12 +59,29 @@ def convert_structural_element(element: dict) -> str:
|
|
|
30
59
|
|
|
31
60
|
|
|
32
61
|
def extract_paragraph_content(text_run: dict) -> str:
|
|
62
|
+
"""Extract and style paragraph content.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
text_run: Text run dict
|
|
66
|
+
|
|
67
|
+
Returns:
|
|
68
|
+
Styled markdown string
|
|
69
|
+
"""
|
|
33
70
|
content = text_run["content"]
|
|
34
71
|
style = text_run["textStyle"]
|
|
35
72
|
return apply_text_style(content, style)
|
|
36
73
|
|
|
37
74
|
|
|
38
75
|
def apply_text_style(content: str, style: dict) -> str:
|
|
76
|
+
"""Apply text styling to content.
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
content: Text content
|
|
80
|
+
style: Style dict
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Styled content with markdown formatting
|
|
84
|
+
"""
|
|
39
85
|
append = "\n" if content.endswith("\n") else ""
|
|
40
86
|
content = content.rstrip("\n")
|
|
41
87
|
italic = style.get("italic", False)
|
|
@@ -48,6 +94,14 @@ def apply_text_style(content: str, style: dict) -> str:
|
|
|
48
94
|
|
|
49
95
|
|
|
50
96
|
def get_paragraph_style_prepend_str(style: dict) -> str:
|
|
97
|
+
"""Get markdown prefix for paragraph style.
|
|
98
|
+
|
|
99
|
+
Args:
|
|
100
|
+
style: Paragraph style dict
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
Markdown prefix string (e.g., "# ", "## ", etc.)
|
|
104
|
+
"""
|
|
51
105
|
named_style = style["namedStyleType"]
|
|
52
106
|
if named_style == "NORMAL_TEXT":
|
|
53
107
|
return ""
|
|
@@ -62,3 +116,127 @@ def get_paragraph_style_prepend_str(style: dict) -> str:
|
|
|
62
116
|
except ValueError:
|
|
63
117
|
return ""
|
|
64
118
|
return ""
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _flatten_tabs_for_conversion(
|
|
122
|
+
tabs: list[dict], max_depth: int = 4, current_depth: int = 0
|
|
123
|
+
) -> list[dict]:
|
|
124
|
+
"""Flatten tab hierarchy using depth-first traversal for conversion.
|
|
125
|
+
|
|
126
|
+
Args:
|
|
127
|
+
tabs: List of tab dicts with potential childTabs
|
|
128
|
+
max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
|
|
129
|
+
current_depth: Current recursion depth
|
|
130
|
+
|
|
131
|
+
Returns:
|
|
132
|
+
Flattened list in depth-first order
|
|
133
|
+
"""
|
|
134
|
+
if current_depth >= max_depth:
|
|
135
|
+
return []
|
|
136
|
+
|
|
137
|
+
result: list[dict] = []
|
|
138
|
+
for tab in tabs:
|
|
139
|
+
result.append(tab)
|
|
140
|
+
if tab.get("childTabs"):
|
|
141
|
+
result.extend(
|
|
142
|
+
_flatten_tabs_for_conversion(tab["childTabs"], max_depth, current_depth + 1)
|
|
143
|
+
)
|
|
144
|
+
return result
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _convert_tabs_to_markdown(tabs: list[dict]) -> str:
|
|
148
|
+
"""Convert all tabs to markdown format.
|
|
149
|
+
|
|
150
|
+
Args:
|
|
151
|
+
tabs: List of tab dicts
|
|
152
|
+
|
|
153
|
+
Returns:
|
|
154
|
+
Markdown string for all tabs
|
|
155
|
+
"""
|
|
156
|
+
md = ""
|
|
157
|
+
flattened_tabs = _flatten_tabs_for_conversion(tabs)
|
|
158
|
+
|
|
159
|
+
for tab in flattened_tabs:
|
|
160
|
+
md += _convert_single_tab_to_markdown(tab)
|
|
161
|
+
|
|
162
|
+
return md
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def _convert_single_tab_to_markdown(tab: dict) -> str:
|
|
166
|
+
"""Convert a single tab to markdown.
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
tab: Single tab dict
|
|
170
|
+
|
|
171
|
+
Returns:
|
|
172
|
+
Markdown string for the tab
|
|
173
|
+
"""
|
|
174
|
+
if "documentTab" not in tab or "tabProperties" not in tab:
|
|
175
|
+
return ""
|
|
176
|
+
|
|
177
|
+
tab_props = tab.get("tabProperties")
|
|
178
|
+
if not tab_props:
|
|
179
|
+
return ""
|
|
180
|
+
|
|
181
|
+
nesting_level = _validate_nesting_level(tab_props.get("nestingLevel", 0))
|
|
182
|
+
tab_title = tab_props.get("title", "Untitled")
|
|
183
|
+
tab_id = tab_props.get("tabId", "")
|
|
184
|
+
|
|
185
|
+
header_prefix = "#" * (nesting_level + 1)
|
|
186
|
+
md = f"\n{header_prefix} {tab_title}\n\n"
|
|
187
|
+
if tab_id:
|
|
188
|
+
md += f"<!-- Tab ID: {tab_id} -->\n\n"
|
|
189
|
+
else:
|
|
190
|
+
md += "<!-- Tab ID: -->\n\n"
|
|
191
|
+
|
|
192
|
+
md += _convert_tab_body_to_markdown(tab.get("documentTab", {}))
|
|
193
|
+
|
|
194
|
+
return md
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _convert_body_to_markdown(body: dict) -> str:
|
|
198
|
+
"""Convert document body to markdown.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
body: Body dict with content
|
|
202
|
+
|
|
203
|
+
Returns:
|
|
204
|
+
Markdown string
|
|
205
|
+
"""
|
|
206
|
+
md = ""
|
|
207
|
+
for element in body.get("content", []):
|
|
208
|
+
md += convert_structural_element(element)
|
|
209
|
+
return md
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _convert_tab_body_to_markdown(doc_tab: dict) -> str:
|
|
213
|
+
"""Convert tab body content to markdown.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
doc_tab: DocumentTab dict
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
Markdown string
|
|
220
|
+
"""
|
|
221
|
+
body = doc_tab.get("body")
|
|
222
|
+
if not body:
|
|
223
|
+
return ""
|
|
224
|
+
|
|
225
|
+
md = ""
|
|
226
|
+
for element in body.get("content", []):
|
|
227
|
+
md += convert_structural_element(element)
|
|
228
|
+
return md
|
|
229
|
+
|
|
230
|
+
|
|
231
|
+
def _validate_nesting_level(nesting_level: int) -> int:
|
|
232
|
+
"""Validate and clamp nesting level to safe range.
|
|
233
|
+
|
|
234
|
+
Args:
|
|
235
|
+
nesting_level: The nesting level to validate
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Validated nesting level (0-5)
|
|
239
|
+
"""
|
|
240
|
+
if not isinstance(nesting_level, int) or nesting_level < 0:
|
|
241
|
+
return 0
|
|
242
|
+
return min(nesting_level, 5)
|