arcade-google-docs 4.3.1__py3-none-any.whl → 5.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,11 +3,14 @@ from arcade_google_docs.tools import (
3
3
  create_blank_document,
4
4
  create_document_from_text,
5
5
  generate_google_file_picker_url,
6
+ get_document_as_docmd,
6
7
  get_document_by_id,
8
+ get_document_metadata,
7
9
  insert_text_at_end_of_document,
8
10
  list_document_comments,
9
11
  search_and_retrieve_documents,
10
12
  search_documents,
13
+ who_am_i,
11
14
  )
12
15
 
13
16
  __all__ = [
@@ -15,9 +18,12 @@ __all__ = [
15
18
  "create_document_from_text",
16
19
  "comment_on_document",
17
20
  "list_document_comments",
21
+ "get_document_as_docmd",
18
22
  "get_document_by_id",
23
+ "get_document_metadata",
19
24
  "insert_text_at_end_of_document",
20
25
  "search_and_retrieve_documents",
21
26
  "search_documents",
22
27
  "generate_google_file_picker_url",
28
+ "who_am_i",
23
29
  ]
@@ -1,17 +1,47 @@
1
- def convert_document_to_html(document: dict) -> str:
1
+ """
2
+ Google Docs to HTML converter.
3
+ """
4
+
5
+ import html as html_module
6
+
7
+
8
+ def convert_document_to_html(document: dict, include_all_tabs: bool = True) -> str:
9
+ """Convert a Google Docs document to HTML format.
10
+
11
+ Args:
12
+ document: Document dict from Google Docs API
13
+ include_all_tabs: Whether to include all tabs (True) or just main body (False)
14
+
15
+ Returns:
16
+ HTML string representation of the document
17
+ """
18
+ escaped_title = html_module.escape(document.get("title", ""))
2
19
  html = (
3
20
  "<html><head>"
4
- f"<title>{document['title']}</title>"
21
+ f"<title>{escaped_title}</title>"
5
22
  f'<meta name="documentId" content="{document["documentId"]}">'
6
23
  "</head><body>"
7
24
  )
8
- for element in document["body"]["content"]:
9
- html += convert_structural_element(element)
25
+
26
+ if include_all_tabs and "tabs" in document and document["tabs"]:
27
+ html += _convert_tabs_to_html(document["tabs"])
28
+ else:
29
+ html += _convert_body_to_html(document.get("body", {}))
30
+
10
31
  html += "</body></html>"
11
32
  return html
12
33
 
13
34
 
14
35
  def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> str:
36
+ """Convert a structural element to HTML.
37
+
38
+ Args:
39
+ element: Structural element dict
40
+ wrap_paragraphs: Whether to wrap paragraphs in <p> tags
41
+
42
+ Returns:
43
+ HTML string
44
+ """
15
45
  if "sectionBreak" in element or "tableOfContents" in element:
16
46
  return ""
17
47
 
@@ -51,12 +81,30 @@ def convert_structural_element(element: dict, wrap_paragraphs: bool = True) -> s
51
81
 
52
82
 
53
83
  def extract_paragraph_content(text_run: dict) -> str:
84
+ """Extract content from a paragraph text run.
85
+
86
+ Args:
87
+ text_run: Text run dict
88
+
89
+ Returns:
90
+ Styled HTML string
91
+ """
54
92
  content = text_run["content"]
55
93
  style = text_run["textStyle"]
56
94
  return apply_text_style(content, style)
57
95
 
58
96
 
59
97
  def apply_text_style(content: str, style: dict) -> str:
98
+ """Apply text styling to content.
99
+
100
+ Args:
101
+ content: Text content
102
+ style: Style dict
103
+
104
+ Returns:
105
+ Styled content with HTML tags
106
+ """
107
+ content = content.replace("\u000b", "\n") # Replace vertical tab with newline
60
108
  content = content.rstrip("\n")
61
109
  content = content.replace("\n", "<br>")
62
110
  italic = style.get("italic", False)
@@ -69,6 +117,15 @@ def apply_text_style(content: str, style: dict) -> str:
69
117
 
70
118
 
71
119
  def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple[str, str]:
120
+ """Get HTML opening and closing tags for paragraph style.
121
+
122
+ Args:
123
+ style: Paragraph style dict
124
+ wrap_paragraphs: Whether to wrap in paragraph tags
125
+
126
+ Returns:
127
+ Tuple of (opening_tag, closing_tag)
128
+ """
72
129
  named_style = style["namedStyleType"]
73
130
  if named_style == "NORMAL_TEXT":
74
131
  return ("<p>", "</p>") if wrap_paragraphs else ("", "")
@@ -87,6 +144,14 @@ def get_paragraph_style_tags(style: dict, wrap_paragraphs: bool = True) -> tuple
87
144
 
88
145
 
89
146
  def table_list_to_html(table: list[list[str]]) -> str:
147
+ """Convert a table list to HTML.
148
+
149
+ Args:
150
+ table: List of rows, where each row is a list of cell contents
151
+
152
+ Returns:
153
+ HTML table string
154
+ """
90
155
  html = "<table>"
91
156
  for row in table:
92
157
  html += "<tr>"
@@ -97,3 +162,129 @@ def table_list_to_html(table: list[list[str]]) -> str:
97
162
  html += "</tr>"
98
163
  html += "</table>"
99
164
  return html
165
+
166
+
167
+ def _flatten_tabs_for_html(
168
+ tabs: list[dict], max_depth: int = 4, current_depth: int = 0
169
+ ) -> list[dict]:
170
+ """Flatten tab hierarchy using depth-first traversal for HTML conversion.
171
+
172
+ Args:
173
+ tabs: List of tab dicts with potential childTabs
174
+ max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
175
+ current_depth: Current recursion depth
176
+
177
+ Returns:
178
+ Flattened list in depth-first order
179
+ """
180
+ if current_depth >= max_depth:
181
+ return []
182
+
183
+ result: list[dict] = []
184
+ for tab in tabs:
185
+ result.append(tab)
186
+ if tab.get("childTabs"):
187
+ result.extend(_flatten_tabs_for_html(tab["childTabs"], max_depth, current_depth + 1))
188
+ return result
189
+
190
+
191
+ def _convert_tabs_to_html(tabs: list[dict]) -> str:
192
+ """Convert all tabs to HTML format.
193
+
194
+ Args:
195
+ tabs: List of tab dicts
196
+
197
+ Returns:
198
+ HTML string for all tabs
199
+ """
200
+ html = ""
201
+ flattened_tabs = _flatten_tabs_for_html(tabs)
202
+
203
+ for tab in flattened_tabs:
204
+ html += _convert_single_tab_to_html(tab)
205
+
206
+ return html
207
+
208
+
209
+ def _convert_single_tab_to_html(tab: dict) -> str:
210
+ """Convert a single tab to HTML.
211
+
212
+ Args:
213
+ tab: Single tab dict
214
+
215
+ Returns:
216
+ HTML string for the tab
217
+ """
218
+ if "documentTab" not in tab or "tabProperties" not in tab:
219
+ return ""
220
+
221
+ tab_props = tab.get("tabProperties")
222
+ if not tab_props:
223
+ return ""
224
+
225
+ nesting_level = _validate_nesting_level_for_html(tab_props.get("nestingLevel", 0))
226
+ tab_title = tab_props.get("title", "Untitled")
227
+ tab_id = tab_props.get("tabId", "")
228
+
229
+ escaped_tab_id = html_module.escape(tab_id, quote=True)
230
+ escaped_tab_title = html_module.escape(tab_title, quote=True)
231
+
232
+ header_level = min(nesting_level + 1, 6)
233
+ html = (
234
+ f'<section id="tab-{escaped_tab_id}" data-title="{escaped_tab_title}" '
235
+ f'data-level="{nesting_level}">'
236
+ f"<h{header_level}>{html_module.escape(tab_title)}</h{header_level}>"
237
+ )
238
+
239
+ html += _convert_tab_body_to_html(tab.get("documentTab", {}))
240
+ html += "</section>"
241
+
242
+ return html
243
+
244
+
245
+ def _convert_body_to_html(body: dict) -> str:
246
+ """Convert document body to HTML.
247
+
248
+ Args:
249
+ body: Body dict with content
250
+
251
+ Returns:
252
+ HTML string
253
+ """
254
+ html = ""
255
+ for element in body.get("content", []):
256
+ html += convert_structural_element(element)
257
+ return html
258
+
259
+
260
+ def _convert_tab_body_to_html(doc_tab: dict) -> str:
261
+ """Convert tab body content to HTML.
262
+
263
+ Args:
264
+ doc_tab: DocumentTab dict
265
+
266
+ Returns:
267
+ HTML string
268
+ """
269
+ body = doc_tab.get("body")
270
+ if not body:
271
+ return ""
272
+
273
+ html = ""
274
+ for element in body.get("content", []):
275
+ html += convert_structural_element(element)
276
+ return html
277
+
278
+
279
+ def _validate_nesting_level_for_html(nesting_level: int) -> int:
280
+ """Validate and clamp nesting level to safe range.
281
+
282
+ Args:
283
+ nesting_level: The nesting level to validate
284
+
285
+ Returns:
286
+ Validated nesting level (0-5)
287
+ """
288
+ if not isinstance(nesting_level, int) or nesting_level < 0:
289
+ return 0
290
+ return nesting_level
@@ -1,14 +1,43 @@
1
+ """
2
+ Google Docs to Markdown converter.
3
+
4
+ File organization:
5
+ 1. Public functions (convert_document_to_markdown, convert_structural_element, etc.)
6
+ 2. Private helper functions (prefixed with _) at the end
7
+ """
8
+
1
9
  import arcade_google_docs.doc_to_html as doc_to_html
2
10
 
3
11
 
4
- def convert_document_to_markdown(document: dict) -> str:
12
+ def convert_document_to_markdown(document: dict, include_all_tabs: bool = True) -> str:
13
+ """Convert a Google Docs document to Markdown format.
14
+
15
+ Args:
16
+ document: Document dict from Google Docs API
17
+ include_all_tabs: Whether to include all tabs (True) or just main body (False)
18
+
19
+ Returns:
20
+ Markdown string representation of the document
21
+ """
5
22
  md = f"---\ntitle: {document['title']}\ndocumentId: {document['documentId']}\n---\n"
6
- for element in document["body"]["content"]:
7
- md += convert_structural_element(element)
23
+
24
+ if include_all_tabs and "tabs" in document and document["tabs"]:
25
+ md += _convert_tabs_to_markdown(document["tabs"])
26
+ else:
27
+ md += _convert_body_to_markdown(document.get("body", {}))
28
+
8
29
  return md
9
30
 
10
31
 
11
32
  def convert_structural_element(element: dict) -> str:
33
+ """Convert a structural element to markdown.
34
+
35
+ Args:
36
+ element: Structural element dict
37
+
38
+ Returns:
39
+ Markdown string
40
+ """
12
41
  if "sectionBreak" in element or "tableOfContents" in element:
13
42
  return ""
14
43
 
@@ -30,12 +59,29 @@ def convert_structural_element(element: dict) -> str:
30
59
 
31
60
 
32
61
  def extract_paragraph_content(text_run: dict) -> str:
62
+ """Extract and style paragraph content.
63
+
64
+ Args:
65
+ text_run: Text run dict
66
+
67
+ Returns:
68
+ Styled markdown string
69
+ """
33
70
  content = text_run["content"]
34
71
  style = text_run["textStyle"]
35
72
  return apply_text_style(content, style)
36
73
 
37
74
 
38
75
  def apply_text_style(content: str, style: dict) -> str:
76
+ """Apply text styling to content.
77
+
78
+ Args:
79
+ content: Text content
80
+ style: Style dict
81
+
82
+ Returns:
83
+ Styled content with markdown formatting
84
+ """
39
85
  append = "\n" if content.endswith("\n") else ""
40
86
  content = content.rstrip("\n")
41
87
  italic = style.get("italic", False)
@@ -48,6 +94,14 @@ def apply_text_style(content: str, style: dict) -> str:
48
94
 
49
95
 
50
96
  def get_paragraph_style_prepend_str(style: dict) -> str:
97
+ """Get markdown prefix for paragraph style.
98
+
99
+ Args:
100
+ style: Paragraph style dict
101
+
102
+ Returns:
103
+ Markdown prefix string (e.g., "# ", "## ", etc.)
104
+ """
51
105
  named_style = style["namedStyleType"]
52
106
  if named_style == "NORMAL_TEXT":
53
107
  return ""
@@ -62,3 +116,127 @@ def get_paragraph_style_prepend_str(style: dict) -> str:
62
116
  except ValueError:
63
117
  return ""
64
118
  return ""
119
+
120
+
121
+ def _flatten_tabs_for_conversion(
122
+ tabs: list[dict], max_depth: int = 4, current_depth: int = 0
123
+ ) -> list[dict]:
124
+ """Flatten tab hierarchy using depth-first traversal for conversion.
125
+
126
+ Args:
127
+ tabs: List of tab dicts with potential childTabs
128
+ max_depth: Maximum recursion depth (Google Docs enforces 3 levels, using 4 for safety)
129
+ current_depth: Current recursion depth
130
+
131
+ Returns:
132
+ Flattened list in depth-first order
133
+ """
134
+ if current_depth >= max_depth:
135
+ return []
136
+
137
+ result: list[dict] = []
138
+ for tab in tabs:
139
+ result.append(tab)
140
+ if tab.get("childTabs"):
141
+ result.extend(
142
+ _flatten_tabs_for_conversion(tab["childTabs"], max_depth, current_depth + 1)
143
+ )
144
+ return result
145
+
146
+
147
+ def _convert_tabs_to_markdown(tabs: list[dict]) -> str:
148
+ """Convert all tabs to markdown format.
149
+
150
+ Args:
151
+ tabs: List of tab dicts
152
+
153
+ Returns:
154
+ Markdown string for all tabs
155
+ """
156
+ md = ""
157
+ flattened_tabs = _flatten_tabs_for_conversion(tabs)
158
+
159
+ for tab in flattened_tabs:
160
+ md += _convert_single_tab_to_markdown(tab)
161
+
162
+ return md
163
+
164
+
165
+ def _convert_single_tab_to_markdown(tab: dict) -> str:
166
+ """Convert a single tab to markdown.
167
+
168
+ Args:
169
+ tab: Single tab dict
170
+
171
+ Returns:
172
+ Markdown string for the tab
173
+ """
174
+ if "documentTab" not in tab or "tabProperties" not in tab:
175
+ return ""
176
+
177
+ tab_props = tab.get("tabProperties")
178
+ if not tab_props:
179
+ return ""
180
+
181
+ nesting_level = _validate_nesting_level(tab_props.get("nestingLevel", 0))
182
+ tab_title = tab_props.get("title", "Untitled")
183
+ tab_id = tab_props.get("tabId", "")
184
+
185
+ header_prefix = "#" * (nesting_level + 1)
186
+ md = f"\n{header_prefix} {tab_title}\n\n"
187
+ if tab_id:
188
+ md += f"<!-- Tab ID: {tab_id} -->\n\n"
189
+ else:
190
+ md += "<!-- Tab ID: -->\n\n"
191
+
192
+ md += _convert_tab_body_to_markdown(tab.get("documentTab", {}))
193
+
194
+ return md
195
+
196
+
197
+ def _convert_body_to_markdown(body: dict) -> str:
198
+ """Convert document body to markdown.
199
+
200
+ Args:
201
+ body: Body dict with content
202
+
203
+ Returns:
204
+ Markdown string
205
+ """
206
+ md = ""
207
+ for element in body.get("content", []):
208
+ md += convert_structural_element(element)
209
+ return md
210
+
211
+
212
+ def _convert_tab_body_to_markdown(doc_tab: dict) -> str:
213
+ """Convert tab body content to markdown.
214
+
215
+ Args:
216
+ doc_tab: DocumentTab dict
217
+
218
+ Returns:
219
+ Markdown string
220
+ """
221
+ body = doc_tab.get("body")
222
+ if not body:
223
+ return ""
224
+
225
+ md = ""
226
+ for element in body.get("content", []):
227
+ md += convert_structural_element(element)
228
+ return md
229
+
230
+
231
+ def _validate_nesting_level(nesting_level: int) -> int:
232
+ """Validate and clamp nesting level to safe range.
233
+
234
+ Args:
235
+ nesting_level: The nesting level to validate
236
+
237
+ Returns:
238
+ Validated nesting level (0-5)
239
+ """
240
+ if not isinstance(nesting_level, int) or nesting_level < 0:
241
+ return 0
242
+ return min(nesting_level, 5)