google-workspace-mcp 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. google_workspace_mcp/__init__.py +3 -0
  2. google_workspace_mcp/__main__.py +43 -0
  3. google_workspace_mcp/app.py +8 -0
  4. google_workspace_mcp/auth/__init__.py +7 -0
  5. google_workspace_mcp/auth/gauth.py +62 -0
  6. google_workspace_mcp/config.py +60 -0
  7. google_workspace_mcp/prompts/__init__.py +3 -0
  8. google_workspace_mcp/prompts/calendar.py +36 -0
  9. google_workspace_mcp/prompts/drive.py +18 -0
  10. google_workspace_mcp/prompts/gmail.py +65 -0
  11. google_workspace_mcp/prompts/slides.py +40 -0
  12. google_workspace_mcp/resources/__init__.py +13 -0
  13. google_workspace_mcp/resources/calendar.py +79 -0
  14. google_workspace_mcp/resources/drive.py +93 -0
  15. google_workspace_mcp/resources/gmail.py +58 -0
  16. google_workspace_mcp/resources/sheets_resources.py +92 -0
  17. google_workspace_mcp/resources/slides.py +421 -0
  18. google_workspace_mcp/services/__init__.py +21 -0
  19. google_workspace_mcp/services/base.py +73 -0
  20. google_workspace_mcp/services/calendar.py +256 -0
  21. google_workspace_mcp/services/docs_service.py +388 -0
  22. google_workspace_mcp/services/drive.py +454 -0
  23. google_workspace_mcp/services/gmail.py +676 -0
  24. google_workspace_mcp/services/sheets_service.py +466 -0
  25. google_workspace_mcp/services/slides.py +959 -0
  26. google_workspace_mcp/tools/__init__.py +7 -0
  27. google_workspace_mcp/tools/calendar.py +229 -0
  28. google_workspace_mcp/tools/docs_tools.py +277 -0
  29. google_workspace_mcp/tools/drive.py +221 -0
  30. google_workspace_mcp/tools/gmail.py +344 -0
  31. google_workspace_mcp/tools/sheets_tools.py +322 -0
  32. google_workspace_mcp/tools/slides.py +478 -0
  33. google_workspace_mcp/utils/__init__.py +1 -0
  34. google_workspace_mcp/utils/markdown_slides.py +504 -0
  35. google_workspace_mcp-1.0.0.dist-info/METADATA +547 -0
  36. google_workspace_mcp-1.0.0.dist-info/RECORD +38 -0
  37. google_workspace_mcp-1.0.0.dist-info/WHEEL +4 -0
  38. google_workspace_mcp-1.0.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,504 @@
1
+ """
2
+ Utilities for converting markdown to Google Slides presentations.
3
+ Provides advanced markdown parsing and formatting for slide creation.
4
+ """
5
+
6
+ import logging
7
+ import re
8
+ from typing import Any
9
+
10
+ # Third-party dependencies for proper markdown parsing
11
+ import markdown
12
+ from bs4 import BeautifulSoup
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class MarkdownSlidesConverter:
18
+ """
19
+ Utility class for converting markdown content to Google Slides.
20
+ Handles parsing, element extraction, and formatting for slides.
21
+ """
22
+
23
+ # Define available slide layouts
24
+ LAYOUTS = {
25
+ "TITLE": "TITLE",
26
+ "TITLE_AND_BODY": "TITLE_AND_BODY",
27
+ "TITLE_AND_TWO_COLUMNS": "TITLE_AND_TWO_COLUMNS",
28
+ "TITLE_ONLY": "TITLE_ONLY",
29
+ "BLANK": "BLANK",
30
+ "SECTION_HEADER": "SECTION_HEADER",
31
+ "CAPTION_ONLY": "CAPTION_ONLY",
32
+ "BIG_NUMBER": "BIG_NUMBER",
33
+ }
34
+
35
+ def __init__(self):
36
+ """Initialize the converter."""
37
+ # Extensions to enable in the markdown parser
38
+ self.markdown_extensions = [
39
+ "extra", # Tables, attr_list, etc.
40
+ "codehilite", # Code highlighting
41
+ "sane_lists", # Better list handling
42
+ "smarty", # Smart quotes
43
+ "meta", # Metadata
44
+ ]
45
+
46
+ def split_slides(self, markdown_content: str) -> list[str]:
47
+ """
48
+ Split markdown content into individual slide sections.
49
+
50
+ Args:
51
+ markdown_content: Full markdown content
52
+
53
+ Returns:
54
+ List of slide section strings
55
+ """
56
+ # Normalize line endings first
57
+ normalized_content = markdown_content.replace("\r\n", "\n").replace("\r", "\n")
58
+
59
+ # Log input
60
+ logger.info(f"Splitting markdown of length {len(normalized_content)} into slides")
61
+ # logger.debug(f"Normalized content preview:\n{normalized_content[:500]}...") # Optional: log content preview
62
+
63
+ # First, try splitting by horizontal rule markers (---, ***) respecting blank lines around them
64
+ hr_pattern = r"\n\s*[-*]{3,}\s*\n"
65
+ sections_by_hr = re.split(hr_pattern, normalized_content)
66
+
67
+ # Log split results
68
+ logger.info(f"Split by horizontal rules found {len(sections_by_hr)} sections")
69
+ for i, section in enumerate(sections_by_hr):
70
+ logger.info(f"HR Section {i} length: {len(section)}")
71
+ logger.info(f"HR Section {i} preview: {section[:100]}...")
72
+
73
+ # If we found multiple sections using horizontal rules, use them
74
+ if len(sections_by_hr) > 1:
75
+ logger.info(f"Split by horizontal rule (---, ***) into {len(sections_by_hr)} sections")
76
+ return [s.strip() for s in sections_by_hr if s.strip()]
77
+
78
+ # If HR split didn't work, try splitting by H2 headers (##)
79
+ # But we need to keep the # in the content
80
+ h2_pattern = r"(?:^|\n)(?=## )" # Look for ## at beginning of a line
81
+ sections_by_h2 = re.split(h2_pattern, normalized_content)
82
+
83
+ # Restore the '## ' prefix to the subsequent sections
84
+ processed_sections_by_h2 = []
85
+ if sections_by_h2:
86
+ # The first section might not start with ##, keep it as is
87
+ if sections_by_h2[0].strip():
88
+ processed_sections_by_h2.append(sections_by_h2[0])
89
+ # For subsequent sections, prepend '## ' if split occurred
90
+ for section in sections_by_h2[1:]:
91
+ if section.strip(): # Avoid adding prefix to potentially empty trailing sections
92
+ processed_sections_by_h2.append(f"## {section}")
93
+
94
+ # If we found multiple sections using h2 headers, use them
95
+ if len(processed_sections_by_h2) > 1:
96
+ logger.info(f"Split by H2 headers (##) into {len(processed_sections_by_h2)} sections")
97
+ return [s.strip() for s in processed_sections_by_h2 if s.strip()]
98
+
99
+ # If we get here, we couldn't find multiple slides, so return the whole content as one slide
100
+ logger.info("Could not find slide separators (HR or H2), treating content as a single slide")
101
+ return [normalized_content.strip()] if normalized_content.strip() else []
102
+
103
+ def parse_slide_markdown(self, markdown_section: str) -> tuple[str, list[dict[str, Any]]]:
104
+ """
105
+ Parse a markdown section into slide elements with proper formatting.
106
+
107
+ Args:
108
+ markdown_section: Markdown content for a single slide
109
+
110
+ Returns:
111
+ Tuple of (layout, elements)
112
+ """
113
+ logger.info(f"Parsing slide section of length {len(markdown_section)}")
114
+ logger.info(f"Section preview: {markdown_section[:200]}...")
115
+
116
+ try:
117
+ # Convert markdown to HTML
118
+ html = markdown.markdown(markdown_section, extensions=self.markdown_extensions)
119
+
120
+ # Log the generated HTML
121
+ logger.info(f"Generated HTML of length {len(html)}")
122
+ logger.info(f"HTML preview: {html[:200]}...")
123
+
124
+ # Parse HTML
125
+ soup = BeautifulSoup(html, "html.parser")
126
+
127
+ # Extract slide elements
128
+ elements = []
129
+
130
+ # Track if we've processed standard elements
131
+ has_title = False
132
+ has_subtitle = False
133
+ has_bullets = False
134
+ has_image = False
135
+ has_table = False
136
+
137
+ # Track current Y position for element placement
138
+ current_y = 50 # Start position for first element
139
+
140
+ # Process headings
141
+ h1 = soup.find("h1")
142
+ if h1:
143
+ elements.append(
144
+ {
145
+ "type": "title",
146
+ "content": h1.get_text(),
147
+ "position": (100, current_y), # Use tracked Y position
148
+ "size": (600, 50),
149
+ }
150
+ )
151
+ has_title = True
152
+ current_y += 70 # Increment Y position for next element
153
+ # Remove the h1 from soup so we don't process it again
154
+ h1.extract()
155
+
156
+ h2 = soup.find("h2")
157
+ if h2:
158
+ elements.append(
159
+ {
160
+ "type": "subtitle",
161
+ "content": h2.get_text(),
162
+ "position": (100, current_y), # Use tracked Y position
163
+ "size": (600, 40),
164
+ }
165
+ )
166
+ has_subtitle = True
167
+ current_y += 50 # Increment Y position for next element
168
+ h2.extract()
169
+
170
+ # Process h3 (subheadings)
171
+ h3_elements = soup.find_all("h3")
172
+ for h3 in h3_elements:
173
+ elements.append(
174
+ {
175
+ "type": "text",
176
+ "content": h3.get_text(),
177
+ "position": (100, current_y), # Use tracked Y position
178
+ "size": (600, 40),
179
+ }
180
+ )
181
+ current_y += 50 # Increment Y position for next element
182
+ h3.extract()
183
+
184
+ # Process lists (ul, ol)
185
+ lists = soup.find_all(["ul", "ol"])
186
+ for list_elem in lists:
187
+ list_items = []
188
+ list_type = "bullets" if list_elem.name == "ul" else "numbered"
189
+
190
+ for li in list_elem.find_all("li", recursive=False):
191
+ # Extract text from li, handling nested formatting
192
+ item_text = self._extract_formatted_text(li)
193
+ list_items.append(item_text)
194
+
195
+ # Handle nested lists
196
+ nested_lists = li.find_all(["ul", "ol"], recursive=False)
197
+ for nested_list in nested_lists:
198
+ nested_items = []
199
+ for nested_li in nested_list.find_all("li"):
200
+ nested_text = self._extract_formatted_text(nested_li)
201
+ nested_items.append(
202
+ {"text": nested_text, "level": 1} # Nested level
203
+ )
204
+ if nested_items:
205
+ list_items[-1] = {
206
+ "text": item_text,
207
+ "level": 0,
208
+ "children": nested_items,
209
+ }
210
+
211
+ if list_items:
212
+ elements.append(
213
+ {
214
+ "type": list_type,
215
+ "items": list_items,
216
+ "position": (100, current_y), # Use tracked Y position
217
+ "size": (
218
+ 600,
219
+ max(200, len(list_items) * 25),
220
+ ), # Scale height based on items
221
+ }
222
+ )
223
+ has_bullets = True
224
+ current_y += max(200, len(list_items) * 25) + 20 # Add padding between elements
225
+ list_elem.extract()
226
+
227
+ # Process images
228
+ images = soup.find_all("img")
229
+ for img in images:
230
+ elements.append(
231
+ {
232
+ "type": "image",
233
+ "alt": img.get("alt", ""),
234
+ "url": img.get("src", ""),
235
+ "position": (250, current_y), # Use tracked Y position
236
+ "size": (300, 200), # Default image size
237
+ }
238
+ )
239
+ has_image = True
240
+ current_y += 220 # Increment Y position for next element
241
+ img.extract()
242
+
243
+ # Process tables
244
+ tables = soup.find_all("table")
245
+ for table in tables:
246
+ rows = []
247
+
248
+ # Process headers
249
+ headers = []
250
+ thead = table.find("thead")
251
+ if thead:
252
+ th_row = thead.find("tr")
253
+ if th_row:
254
+ for th in th_row.find_all("th"):
255
+ headers.append(self._extract_formatted_text(th))
256
+
257
+ # Process body rows
258
+ tbody = table.find("tbody") or table
259
+ for tr in tbody.find_all("tr"):
260
+ row = []
261
+ for td in tr.find_all(["td", "th"]):
262
+ row.append(self._extract_formatted_text(td))
263
+ if row:
264
+ rows.append(row)
265
+
266
+ if headers or rows:
267
+ elements.append(
268
+ {
269
+ "type": "table",
270
+ "headers": headers,
271
+ "rows": rows,
272
+ "position": (100, current_y), # Use tracked Y position
273
+ "size": (600, 200),
274
+ }
275
+ )
276
+ has_table = True
277
+ current_y += 220 # Increment Y position for next element
278
+ table.extract()
279
+
280
+ # Process remaining paragraphs as text
281
+ paragraphs = []
282
+ for p in soup.find_all("p"):
283
+ paragraphs.append(self._extract_formatted_text(p))
284
+
285
+ if paragraphs:
286
+ elements.append(
287
+ {
288
+ "type": "text",
289
+ "content": "\n\n".join(paragraphs),
290
+ "position": (100, current_y), # Use tracked Y position
291
+ "size": (600, 100),
292
+ }
293
+ )
294
+ current_y += 120 # Increment Y position for next element
295
+
296
+ # Determine best layout based on content
297
+ layout = self._determine_layout(has_title, has_subtitle, has_bullets, has_image, has_table)
298
+
299
+ # Check for speaker notes (special syntax: <!-- notes: ... -->)
300
+ notes_match = re.search(r"<!--\s*notes:\s*(.*?)\s*-->", markdown_section, re.DOTALL)
301
+ if notes_match:
302
+ elements.append({"type": "notes", "content": notes_match.group(1).strip()})
303
+
304
+ return layout, elements
305
+ except Exception as e:
306
+ logger.warning(f"Error parsing markdown: {str(e)}")
307
+ # Return a fallback layout and simple text element
308
+ return self.LAYOUTS["BLANK"], [
309
+ {
310
+ "type": "text",
311
+ "content": markdown_section,
312
+ "position": (100, 100),
313
+ "size": (600, 300),
314
+ }
315
+ ]
316
+
317
+ def _extract_formatted_text(self, element) -> str:
318
+ """
319
+ Extract text with formatting from BeautifulSoup element.
320
+
321
+ Args:
322
+ element: BeautifulSoup element
323
+
324
+ Returns:
325
+ Formatted text string with formatting markers for later processing
326
+ """
327
+ # If element is just a string, return it
328
+ if isinstance(element, str):
329
+ return element
330
+
331
+ formatted_text = ""
332
+ logger.info(f"Extracting formatted text from element: {element.name if hasattr(element, 'name') else 'text node'}")
333
+
334
+ # Process all nested elements to preserve formatting
335
+ for child in element.children:
336
+ if child.name is None: # Text node
337
+ formatted_text += child.string or ""
338
+ elif child.name == "strong" or child.name == "b":
339
+ child_text = self._extract_formatted_text(child)
340
+ formatted_text += f"**{child_text}**"
341
+ logger.info(f"Found BOLD text: '{child_text}'")
342
+ elif child.name == "em" or child.name == "i":
343
+ child_text = self._extract_formatted_text(child)
344
+ formatted_text += f"*{child_text}*"
345
+ logger.info(f"Found ITALIC text: '{child_text}'")
346
+ elif child.name == "code":
347
+ child_text = self._extract_formatted_text(child)
348
+ formatted_text += f"`{child_text}`"
349
+ elif child.name == "a":
350
+ child_text = self._extract_formatted_text(child)
351
+ formatted_text += child_text
352
+ elif child.name in ["p", "div", "li", "td", "th"]:
353
+ formatted_text += self._extract_formatted_text(child)
354
+ else:
355
+ # For any other element, just get its text
356
+ formatted_text += child.get_text()
357
+
358
+ return formatted_text.strip()
359
+
360
+ def _determine_layout(
361
+ self,
362
+ has_title: bool,
363
+ has_subtitle: bool,
364
+ has_bullets: bool,
365
+ has_image: bool,
366
+ has_table: bool,
367
+ ) -> str:
368
+ """
369
+ Determine the best slide layout based on content.
370
+
371
+ Args:
372
+ has_title: Whether slide has a title
373
+ has_subtitle: Whether slide has a subtitle
374
+ has_bullets: Whether slide has bullet points
375
+ has_image: Whether slide has an image
376
+ has_table: Whether slide has a table
377
+
378
+ Returns:
379
+ Slide layout name
380
+ """
381
+ if has_title:
382
+ if has_subtitle and (has_bullets or has_table):
383
+ return self.LAYOUTS["TITLE_AND_BODY"]
384
+ if has_image and not has_bullets and not has_table:
385
+ return self.LAYOUTS["CAPTION_ONLY"]
386
+ if has_bullets or has_table:
387
+ return self.LAYOUTS["TITLE_AND_BODY"]
388
+ return self.LAYOUTS["TITLE_ONLY"]
389
+ return self.LAYOUTS["BLANK"]
390
+
391
+ def create_text_style_requests(self, formatted_text: str, element_id: str, start_index: int = 0) -> tuple[str, list[dict]]:
392
+ """
393
+ Create requests to apply text styling for formatted text.
394
+
395
+ Args:
396
+ formatted_text: Text with markdown-style formatting markers
397
+ element_id: ID of the text element
398
+ start_index: Starting character index
399
+
400
+ Returns:
401
+ Tuple of (plain_text, style_requests)
402
+ """
403
+ # Don't try to style if no formatting is detected
404
+ if (
405
+ "**" not in formatted_text and "*" not in formatted_text # Only handle bold/italic for now
406
+ # and "`" not in formatted_text # Code handling TBD
407
+ ):
408
+ return formatted_text, []
409
+
410
+ # plain_text will be generated, style_requests will be populated
411
+ style_requests = []
412
+
413
+ try:
414
+ # First, build the plain text by removing all formatting
415
+ plain_text = formatted_text
416
+ # Remove bold markers first (important for correct offset calculation)
417
+ plain_text = re.sub(r"\*\*(.*?)\*\*", r"\1", plain_text)
418
+ # Remove italic markers
419
+ plain_text = re.sub(r"\*(.*?)\*", r"\1", plain_text)
420
+ # Remove code markers (if implemented)
421
+ # plain_text = re.sub(r"`(.*?)`", r"\1", plain_text)
422
+
423
+ # Helper function to calculate plain text index from original index
424
+ def get_plain_index(original_index, original_text):
425
+ text_before = original_text[:original_index]
426
+ # Count markers removed before this index
427
+ # bold_markers_removed = len(re.findall(r'\*\*', text_before)) * 2
428
+ # italic_markers_removed = len(re.findall(r'\*', text_before)) * 2 # Assumes * is only for italic
429
+ # code_markers_removed = len(re.findall(r'`', text_before)) * 2
430
+ # Need to be careful about nested/overlapping markers, this simple count might be insufficient
431
+ # A more robust way is to build the plain text incrementally and map indices
432
+
433
+ # Let's try the simplified approach first based on user code:
434
+ plain_equivalent_before = text_before
435
+ plain_equivalent_before = re.sub(r"\*\*(.*?)\*\*", r"\1", plain_equivalent_before)
436
+ plain_equivalent_before = re.sub(r"\*(.*?)\*", r"\1", plain_equivalent_before)
437
+ # plain_equivalent_before = re.sub(r"`(.*?)`", r"\1", plain_equivalent_before)
438
+ return len(plain_equivalent_before)
439
+
440
+ # Generate style requests for bold formatting
441
+ bold_matches = list(re.finditer(r"\*\*(.*?)\*\*", formatted_text))
442
+ for match in bold_matches:
443
+ content = match.group(1) # Text inside ** markers
444
+ # Calculate start index in plain text
445
+ start_idx = get_plain_index(match.start(), formatted_text)
446
+ end_idx = start_idx + len(content)
447
+
448
+ style_requests.append(
449
+ {
450
+ "updateTextStyle": {
451
+ "objectId": element_id,
452
+ "textRange": {"startIndex": start_idx, "endIndex": end_idx},
453
+ "style": {"bold": True},
454
+ "fields": "bold",
455
+ }
456
+ }
457
+ )
458
+
459
+ # Generate style requests for italic formatting
460
+ italic_matches = list(re.finditer(r"\*(.*?)\*", formatted_text))
461
+ for match in italic_matches:
462
+ # Skip if this is part of a bold marker (e.g., ** *italic* **) - simplistic check
463
+ is_part_of_bold = False
464
+ for bold_match in bold_matches:
465
+ # Check if italic is *inside* bold markers
466
+ if bold_match.start() < match.start() and match.end() < bold_match.end():
467
+ is_part_of_bold = True
468
+ break
469
+
470
+ if not is_part_of_bold:
471
+ content = match.group(1) # Text inside * markers
472
+ # Calculate position in plain text
473
+ start_idx = get_plain_index(match.start(), formatted_text)
474
+ end_idx = start_idx + len(content)
475
+
476
+ style_requests.append(
477
+ {
478
+ "updateTextStyle": {
479
+ "objectId": element_id,
480
+ "textRange": {
481
+ "startIndex": start_idx,
482
+ "endIndex": end_idx,
483
+ },
484
+ "style": {"italic": True},
485
+ "fields": "italic",
486
+ }
487
+ }
488
+ )
489
+
490
+ # Add logger info if needed
491
+ logger.info(f"Generated {len(style_requests)} style requests for element {element_id}")
492
+ return plain_text, style_requests
493
+
494
+ except Exception as e:
495
+ # If there's any error in formatting, just return the original text
496
+ logger.warning(f"Error processing text formatting: {str(e)}")
497
+ return (
498
+ formatted_text.replace("**", "").replace("*", "").replace("`", ""),
499
+ [],
500
+ )
501
+
502
+ def generate_slide_requests(self, presentation_id: str, elements: list[dict]) -> list[dict]:
503
+ # Implementation of generate_slide_requests method
504
+ pass