lm-deluge 0.0.67__py3-none-any.whl → 0.0.90__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lm-deluge might be problematic. Click here for more details.

Files changed (108) hide show
  1. lm_deluge/__init__.py +1 -2
  2. lm_deluge/api_requests/anthropic.py +117 -22
  3. lm_deluge/api_requests/base.py +84 -11
  4. lm_deluge/api_requests/bedrock.py +30 -6
  5. lm_deluge/api_requests/chat_reasoning.py +4 -0
  6. lm_deluge/api_requests/gemini.py +166 -20
  7. lm_deluge/api_requests/openai.py +145 -25
  8. lm_deluge/batches.py +15 -45
  9. lm_deluge/client.py +309 -50
  10. lm_deluge/config.py +15 -3
  11. lm_deluge/models/__init__.py +14 -1
  12. lm_deluge/models/anthropic.py +29 -14
  13. lm_deluge/models/arcee.py +16 -0
  14. lm_deluge/models/deepseek.py +36 -4
  15. lm_deluge/models/google.py +42 -0
  16. lm_deluge/models/grok.py +24 -0
  17. lm_deluge/models/kimi.py +36 -0
  18. lm_deluge/models/minimax.py +18 -0
  19. lm_deluge/models/openai.py +100 -0
  20. lm_deluge/models/openrouter.py +133 -7
  21. lm_deluge/models/together.py +11 -0
  22. lm_deluge/models/zai.py +50 -0
  23. lm_deluge/pipelines/gepa/__init__.py +95 -0
  24. lm_deluge/pipelines/gepa/core.py +354 -0
  25. lm_deluge/pipelines/gepa/docs/samples.py +705 -0
  26. lm_deluge/pipelines/gepa/examples/01_synthetic_keywords.py +140 -0
  27. lm_deluge/pipelines/gepa/examples/02_gsm8k_math.py +261 -0
  28. lm_deluge/pipelines/gepa/examples/03_hotpotqa_multihop.py +300 -0
  29. lm_deluge/pipelines/gepa/examples/04_batch_classification.py +271 -0
  30. lm_deluge/pipelines/gepa/examples/simple_qa.py +129 -0
  31. lm_deluge/pipelines/gepa/optimizer.py +435 -0
  32. lm_deluge/pipelines/gepa/proposer.py +235 -0
  33. lm_deluge/pipelines/gepa/util.py +165 -0
  34. lm_deluge/{llm_tools → pipelines}/score.py +2 -2
  35. lm_deluge/{llm_tools → pipelines}/translate.py +5 -3
  36. lm_deluge/prompt.py +537 -88
  37. lm_deluge/request_context.py +7 -2
  38. lm_deluge/server/__init__.py +24 -0
  39. lm_deluge/server/__main__.py +144 -0
  40. lm_deluge/server/adapters.py +369 -0
  41. lm_deluge/server/app.py +388 -0
  42. lm_deluge/server/auth.py +71 -0
  43. lm_deluge/server/model_policy.py +215 -0
  44. lm_deluge/server/models_anthropic.py +172 -0
  45. lm_deluge/server/models_openai.py +175 -0
  46. lm_deluge/tool/__init__.py +1130 -0
  47. lm_deluge/tool/builtin/anthropic/__init__.py +300 -0
  48. lm_deluge/tool/builtin/anthropic/bash.py +0 -0
  49. lm_deluge/tool/builtin/anthropic/computer_use.py +0 -0
  50. lm_deluge/tool/builtin/gemini.py +59 -0
  51. lm_deluge/tool/builtin/openai.py +74 -0
  52. lm_deluge/tool/cua/__init__.py +173 -0
  53. lm_deluge/tool/cua/actions.py +148 -0
  54. lm_deluge/tool/cua/base.py +27 -0
  55. lm_deluge/tool/cua/batch.py +215 -0
  56. lm_deluge/tool/cua/converters.py +466 -0
  57. lm_deluge/tool/cua/kernel.py +702 -0
  58. lm_deluge/tool/cua/trycua.py +989 -0
  59. lm_deluge/tool/prefab/__init__.py +45 -0
  60. lm_deluge/tool/prefab/batch_tool.py +156 -0
  61. lm_deluge/tool/prefab/docs.py +1119 -0
  62. lm_deluge/tool/prefab/email.py +294 -0
  63. lm_deluge/tool/prefab/filesystem.py +1711 -0
  64. lm_deluge/tool/prefab/full_text_search/__init__.py +285 -0
  65. lm_deluge/tool/prefab/full_text_search/tantivy_index.py +396 -0
  66. lm_deluge/tool/prefab/memory.py +458 -0
  67. lm_deluge/tool/prefab/otc/__init__.py +165 -0
  68. lm_deluge/tool/prefab/otc/executor.py +281 -0
  69. lm_deluge/tool/prefab/otc/parse.py +188 -0
  70. lm_deluge/tool/prefab/random.py +212 -0
  71. lm_deluge/tool/prefab/rlm/__init__.py +296 -0
  72. lm_deluge/tool/prefab/rlm/executor.py +349 -0
  73. lm_deluge/tool/prefab/rlm/parse.py +144 -0
  74. lm_deluge/tool/prefab/sandbox/__init__.py +19 -0
  75. lm_deluge/tool/prefab/sandbox/daytona_sandbox.py +483 -0
  76. lm_deluge/tool/prefab/sandbox/docker_sandbox.py +609 -0
  77. lm_deluge/tool/prefab/sandbox/fargate_sandbox.py +546 -0
  78. lm_deluge/tool/prefab/sandbox/modal_sandbox.py +469 -0
  79. lm_deluge/tool/prefab/sandbox/seatbelt_sandbox.py +827 -0
  80. lm_deluge/tool/prefab/sheets.py +385 -0
  81. lm_deluge/tool/prefab/skills.py +0 -0
  82. lm_deluge/tool/prefab/subagents.py +233 -0
  83. lm_deluge/tool/prefab/todos.py +342 -0
  84. lm_deluge/tool/prefab/tool_search.py +169 -0
  85. lm_deluge/tool/prefab/web_search.py +199 -0
  86. lm_deluge/tracker.py +16 -13
  87. lm_deluge/util/schema.py +412 -0
  88. lm_deluge/warnings.py +8 -0
  89. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/METADATA +23 -9
  90. lm_deluge-0.0.90.dist-info/RECORD +132 -0
  91. lm_deluge/built_in_tools/anthropic/__init__.py +0 -128
  92. lm_deluge/built_in_tools/openai.py +0 -28
  93. lm_deluge/presets/cerebras.py +0 -17
  94. lm_deluge/presets/meta.py +0 -13
  95. lm_deluge/tool.py +0 -849
  96. lm_deluge-0.0.67.dist-info/RECORD +0 -72
  97. lm_deluge/{llm_tools → pipelines}/__init__.py +1 -1
  98. /lm_deluge/{llm_tools → pipelines}/classify.py +0 -0
  99. /lm_deluge/{llm_tools → pipelines}/extract.py +0 -0
  100. /lm_deluge/{llm_tools → pipelines}/locate.py +0 -0
  101. /lm_deluge/{llm_tools → pipelines}/ocr.py +0 -0
  102. /lm_deluge/{built_in_tools/anthropic/bash.py → skills/anthropic.py} +0 -0
  103. /lm_deluge/{built_in_tools/anthropic/computer_use.py → skills/compat.py} +0 -0
  104. /lm_deluge/{built_in_tools → tool/builtin}/anthropic/editor.py +0 -0
  105. /lm_deluge/{built_in_tools → tool/builtin}/base.py +0 -0
  106. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/WHEEL +0 -0
  107. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/licenses/LICENSE +0 -0
  108. {lm_deluge-0.0.67.dist-info → lm_deluge-0.0.90.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1119 @@
1
+ """Google Docs manipulation prefab tool."""
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ from typing import Any
7
+
8
+ from lm_deluge.tool import Tool
9
+
10
+
11
+ # Mapping of heading levels to Google Docs named styles
12
+ HEADING_STYLES = {
13
+ 1: "HEADING_1",
14
+ 2: "HEADING_2",
15
+ 3: "HEADING_3",
16
+ 4: "HEADING_4",
17
+ 5: "HEADING_5",
18
+ 6: "HEADING_6",
19
+ }
20
+
21
+
22
+ class DocsManager:
23
+ """
24
+ A prefab tool for manipulating Google Docs.
25
+
26
+ Provides tools to read, write, and edit Google Docs documents.
27
+ Supports markdown-style formatting (bold, italic, underline, headings).
28
+
29
+ Args:
30
+ document_id: Optional. The ID of the Google Doc to manipulate.
31
+ If not provided, a new document will be created on first use.
32
+ credentials_json: Optional. JSON string or dict containing Google service account credentials.
33
+ If not provided, will look for GOOGLE_DOCS_CREDENTIALS env variable.
34
+ credentials_file: Optional. Path to a JSON file containing credentials.
35
+ Only used if credentials_json is not provided.
36
+ document_title: Optional. Title for new document (only used if document_id is not provided)
37
+
38
+ Example:
39
+ ```python
40
+ # Using existing document
41
+ manager = DocsManager(
42
+ document_id="your-doc-id-here",
43
+ credentials_json={"type": "service_account", ...}
44
+ )
45
+
46
+ # Creating a new document
47
+ manager = DocsManager(
48
+ document_title="My New Document",
49
+ credentials_json={"type": "service_account", ...}
50
+ )
51
+
52
+ # Get tools
53
+ tools = manager.get_tools()
54
+ ```
55
+ """
56
+
57
+ def __init__(
58
+ self,
59
+ document_id: str | None = None,
60
+ *,
61
+ credentials_json: str | dict[str, Any] | None = None,
62
+ credentials_file: str | None = None,
63
+ document_title: str = "New Document",
64
+ get_metadata_tool_name: str = "docs_get_metadata",
65
+ read_range_tool_name: str = "docs_read_range",
66
+ grep_tool_name: str = "docs_grep",
67
+ add_paragraph_tool_name: str = "docs_add_paragraph",
68
+ update_paragraph_tool_name: str = "docs_update_paragraph",
69
+ replace_tool_name: str = "docs_replace_text",
70
+ delete_range_tool_name: str = "docs_delete_range",
71
+ ):
72
+ self.document_id = document_id
73
+ self.document_title = document_title
74
+ self.get_metadata_tool_name = get_metadata_tool_name
75
+ self.read_range_tool_name = read_range_tool_name
76
+ self.grep_tool_name = grep_tool_name
77
+ self.add_paragraph_tool_name = add_paragraph_tool_name
78
+ self.update_paragraph_tool_name = update_paragraph_tool_name
79
+ self.replace_tool_name = replace_tool_name
80
+ self.delete_range_tool_name = delete_range_tool_name
81
+
82
+ # Handle credentials
83
+ if credentials_json is not None:
84
+ if isinstance(credentials_json, str):
85
+ self.credentials = json.loads(credentials_json)
86
+ else:
87
+ self.credentials = credentials_json
88
+ elif credentials_file is not None:
89
+ with open(credentials_file, "r") as f:
90
+ self.credentials = json.load(f)
91
+ else:
92
+ # Try to load from environment
93
+ env_creds = os.environ.get("GOOGLE_DOCS_CREDENTIALS")
94
+ if env_creds:
95
+ self.credentials = json.loads(env_creds)
96
+ else:
97
+ raise ValueError(
98
+ "No credentials provided. Please provide credentials_json, "
99
+ "credentials_file, or set GOOGLE_DOCS_CREDENTIALS environment variable."
100
+ )
101
+
102
+ self._service = None
103
+ self._tools: list[Tool] | None = None
104
+
105
+ def _get_service(self):
106
+ """Lazily initialize the Google Docs API service."""
107
+ if self._service is not None:
108
+ return self._service
109
+
110
+ try:
111
+ from google.oauth2 import service_account
112
+ from googleapiclient.discovery import build
113
+ except ImportError:
114
+ raise ImportError(
115
+ "Google Docs API dependencies not installed. "
116
+ "Please install with: pip install google-api-python-client google-auth"
117
+ )
118
+
119
+ # Create credentials from service account info
120
+ creds = service_account.Credentials.from_service_account_info(
121
+ self.credentials, scopes=["https://www.googleapis.com/auth/documents"]
122
+ )
123
+
124
+ # Build the service
125
+ self._service = build("docs", "v1", credentials=creds)
126
+ return self._service
127
+
128
+ def _ensure_document(self) -> str:
129
+ """
130
+ Ensure we have a document ID. If not, create a new document.
131
+
132
+ Returns:
133
+ The document ID
134
+ """
135
+ if self.document_id is not None:
136
+ return self.document_id
137
+
138
+ # Create a new document
139
+ try:
140
+ service = self._get_service()
141
+ document = (
142
+ service.documents()
143
+ .create(body={"title": self.document_title})
144
+ .execute()
145
+ )
146
+ self.document_id = document.get("documentId")
147
+ return self.document_id
148
+ except Exception as e:
149
+ raise RuntimeError(f"Failed to create document: {str(e)}")
150
+
151
+ def _get_document_end_index(self, document: dict[str, Any]) -> int:
152
+ """Get the end index of the document content (before the final newline)."""
153
+ body_content = document.get("body", {}).get("content", [])
154
+ if not body_content:
155
+ return 1
156
+ # endIndex - 1 because docs always end with a mandatory newline
157
+ end_index = body_content[-1].get("endIndex", 2) - 1
158
+ # Ensure we never return less than 1
159
+ return max(1, end_index)
160
+
161
+ def _is_document_empty(self, document: dict[str, Any]) -> bool:
162
+ """Check if the document is empty (only contains the mandatory newline)."""
163
+ body_content = document.get("body", {}).get("content", [])
164
+ if not body_content:
165
+ return True
166
+ # An empty doc has endIndex of 2 (index 1 is the mandatory newline)
167
+ last_end = body_content[-1].get("endIndex", 1)
168
+ return last_end <= 2
169
+
170
+ def _extract_text_with_formatting(
171
+ self,
172
+ document: dict[str, Any],
173
+ start_line: int | None = None,
174
+ end_line: int | None = None,
175
+ ) -> tuple[str, int]:
176
+ """
177
+ Extract text content from a document with markdown formatting.
178
+
179
+ Returns:
180
+ Tuple of (formatted text, total line count)
181
+ """
182
+ content = document.get("body", {}).get("content", [])
183
+ lines: list[str] = []
184
+
185
+ for element in content:
186
+ if "paragraph" in element:
187
+ para = element["paragraph"]
188
+ para_style = para.get("paragraphStyle", {}).get(
189
+ "namedStyleType", "NORMAL_TEXT"
190
+ )
191
+
192
+ # Build the line with inline formatting
193
+ line_parts: list[str] = []
194
+ for para_element in para.get("elements", []):
195
+ text_run = para_element.get("textRun")
196
+ if text_run:
197
+ text = text_run.get("content", "")
198
+ style = text_run.get("textStyle", {})
199
+
200
+ # Apply inline formatting
201
+ formatted = text.rstrip("\n")
202
+ if formatted:
203
+ is_bold = style.get("bold", False)
204
+ is_italic = style.get("italic", False)
205
+ is_underline = style.get("underline", False)
206
+
207
+ # Apply formatting in order: underline wraps innermost, then bold/italic
208
+ if is_bold and is_italic:
209
+ formatted = f"***{formatted}***"
210
+ elif is_bold:
211
+ formatted = f"**{formatted}**"
212
+ elif is_italic:
213
+ formatted = f"*{formatted}*"
214
+
215
+ if is_underline:
216
+ formatted = f"<u>{formatted}</u>"
217
+
218
+ line_parts.append(formatted)
219
+
220
+ line = "".join(line_parts)
221
+
222
+ # Apply heading prefix based on paragraph style
223
+ if para_style == "HEADING_1":
224
+ line = f"# {line}"
225
+ elif para_style == "HEADING_2":
226
+ line = f"## {line}"
227
+ elif para_style == "HEADING_3":
228
+ line = f"### {line}"
229
+ elif para_style == "HEADING_4":
230
+ line = f"#### {line}"
231
+ elif para_style == "HEADING_5":
232
+ line = f"##### {line}"
233
+ elif para_style == "HEADING_6":
234
+ line = f"###### {line}"
235
+ elif para_style == "TITLE":
236
+ line = f"# {line}"
237
+ elif para_style == "SUBTITLE":
238
+ line = f"## {line}"
239
+
240
+ lines.append(line)
241
+
242
+ elif "table" in element:
243
+ # Handle tables - just extract text for now
244
+ table = element["table"]
245
+ for row in table.get("tableRows", []):
246
+ row_parts: list[str] = []
247
+ for cell in row.get("tableCells", []):
248
+ cell_text_parts: list[str] = []
249
+ for cell_content in cell.get("content", []):
250
+ if "paragraph" in cell_content:
251
+ for para_element in cell_content["paragraph"].get(
252
+ "elements", []
253
+ ):
254
+ text_run = para_element.get("textRun")
255
+ if text_run:
256
+ cell_text_parts.append(
257
+ text_run.get("content", "").strip()
258
+ )
259
+ row_parts.append(" ".join(cell_text_parts))
260
+ lines.append(" | ".join(row_parts))
261
+
262
+ total_lines = len(lines)
263
+
264
+ # Apply line range filter
265
+ if start_line is not None or end_line is not None:
266
+ start_idx = (start_line - 1) if start_line else 0
267
+ end_idx = end_line if end_line else len(lines)
268
+ lines = lines[start_idx:end_idx]
269
+
270
+ return "\n".join(lines), total_lines
271
+
272
+ def _parse_markdown_text(self, text: str) -> list[dict[str, Any]]:
273
+ """
274
+ Parse markdown-formatted text into segments with formatting info.
275
+
276
+ Supports:
277
+ - **bold**
278
+ - *italic*
279
+ - <u>underline</u>
280
+ - ***bold italic***
281
+ - # Heading 1 through ###### Heading 6
282
+
283
+ Returns:
284
+ List of dicts with 'text', 'bold', 'italic', 'underline', 'heading_level' keys
285
+ """
286
+ lines = text.split("\n")
287
+ result: list[dict[str, Any]] = []
288
+
289
+ for i, line in enumerate(lines):
290
+ heading_level = 0
291
+
292
+ # Check for heading
293
+ heading_match = re.match(r"^(#{1,6})\s+(.*)$", line)
294
+ if heading_match:
295
+ heading_level = len(heading_match.group(1))
296
+ line = heading_match.group(2)
297
+
298
+ # Parse inline formatting
299
+ segments = self._parse_inline_formatting(line)
300
+
301
+ for seg in segments:
302
+ seg["heading_level"] = heading_level
303
+
304
+ result.extend(segments)
305
+
306
+ # Add newline between lines (except for last line)
307
+ if i < len(lines) - 1:
308
+ result.append(
309
+ {
310
+ "text": "\n",
311
+ "bold": False,
312
+ "italic": False,
313
+ "underline": False,
314
+ "heading_level": 0,
315
+ }
316
+ )
317
+
318
+ return result
319
+
320
+ def _parse_inline_formatting(self, text: str) -> list[dict[str, Any]]:
321
+ """Parse inline formatting (bold, italic, underline) from text."""
322
+ segments: list[dict[str, Any]] = []
323
+
324
+ # Pattern to match formatting markers
325
+ # Order matters: check *** before ** before *
326
+ pattern = r"(\*\*\*(.+?)\*\*\*|\*\*(.+?)\*\*|\*(.+?)\*|<u>(.+?)</u>)"
327
+
328
+ last_end = 0
329
+ for match in re.finditer(pattern, text):
330
+ # Add any text before this match
331
+ if match.start() > last_end:
332
+ segments.append(
333
+ {
334
+ "text": text[last_end : match.start()],
335
+ "bold": False,
336
+ "italic": False,
337
+ "underline": False,
338
+ }
339
+ )
340
+
341
+ # Determine what kind of formatting this is
342
+ full_match = match.group(0)
343
+ if full_match.startswith("***"):
344
+ segments.append(
345
+ {
346
+ "text": match.group(2),
347
+ "bold": True,
348
+ "italic": True,
349
+ "underline": False,
350
+ }
351
+ )
352
+ elif full_match.startswith("**"):
353
+ segments.append(
354
+ {
355
+ "text": match.group(3),
356
+ "bold": True,
357
+ "italic": False,
358
+ "underline": False,
359
+ }
360
+ )
361
+ elif full_match.startswith("*"):
362
+ segments.append(
363
+ {
364
+ "text": match.group(4),
365
+ "bold": False,
366
+ "italic": True,
367
+ "underline": False,
368
+ }
369
+ )
370
+ elif full_match.startswith("<u>"):
371
+ segments.append(
372
+ {
373
+ "text": match.group(5),
374
+ "bold": False,
375
+ "italic": False,
376
+ "underline": True,
377
+ }
378
+ )
379
+
380
+ last_end = match.end()
381
+
382
+ # Add any remaining text
383
+ if last_end < len(text):
384
+ segments.append(
385
+ {
386
+ "text": text[last_end:],
387
+ "bold": False,
388
+ "italic": False,
389
+ "underline": False,
390
+ }
391
+ )
392
+
393
+ # If no formatting found, return the whole text as one segment
394
+ if not segments:
395
+ segments.append(
396
+ {"text": text, "bold": False, "italic": False, "underline": False}
397
+ )
398
+
399
+ return segments
400
+
401
+ def _build_insert_requests(
402
+ self, segments: list[dict[str, Any]], start_index: int
403
+ ) -> tuple[list[dict[str, Any]], int]:
404
+ """
405
+ Build Google Docs API requests to insert formatted text.
406
+
407
+ Returns:
408
+ Tuple of (list of requests, end index after insertion)
409
+ """
410
+ requests: list[dict[str, Any]] = []
411
+ current_index = start_index
412
+
413
+ # Group segments by paragraph (split on heading changes and newlines)
414
+ paragraphs: list[tuple[int, list[dict[str, Any]]]] = []
415
+ current_para: list[dict[str, Any]] = []
416
+ current_heading = 0
417
+
418
+ for seg in segments:
419
+ if seg["text"] == "\n":
420
+ if current_para:
421
+ paragraphs.append((current_heading, current_para))
422
+ current_para = []
423
+ current_heading = 0
424
+ else:
425
+ if seg.get("heading_level", 0) > 0:
426
+ current_heading = seg["heading_level"]
427
+ current_para.append(seg)
428
+
429
+ if current_para:
430
+ paragraphs.append((current_heading, current_para))
431
+
432
+ # Build requests for each paragraph
433
+ for para_idx, (heading_level, para_segments) in enumerate(paragraphs):
434
+ para_start = current_index
435
+
436
+ # Insert all text segments for this paragraph
437
+ for seg in para_segments:
438
+ text = seg["text"]
439
+ if not text:
440
+ continue
441
+
442
+ # Insert the text
443
+ requests.append(
444
+ {"insertText": {"location": {"index": current_index}, "text": text}}
445
+ )
446
+
447
+ text_start = current_index
448
+ text_end = current_index + len(text)
449
+
450
+ # Apply text styling if needed
451
+ style_updates = {}
452
+ if seg.get("bold"):
453
+ style_updates["bold"] = True
454
+ if seg.get("italic"):
455
+ style_updates["italic"] = True
456
+ if seg.get("underline"):
457
+ style_updates["underline"] = True
458
+
459
+ if style_updates:
460
+ requests.append(
461
+ {
462
+ "updateTextStyle": {
463
+ "range": {
464
+ "startIndex": text_start,
465
+ "endIndex": text_end,
466
+ },
467
+ "textStyle": style_updates,
468
+ "fields": ",".join(style_updates.keys()),
469
+ }
470
+ }
471
+ )
472
+
473
+ current_index = text_end
474
+
475
+ # Add newline after paragraph (except for last one if we're appending)
476
+ if para_idx < len(paragraphs) - 1:
477
+ requests.append(
478
+ {"insertText": {"location": {"index": current_index}, "text": "\n"}}
479
+ )
480
+ para_end = current_index + 1
481
+ current_index = para_end
482
+ else:
483
+ para_end = current_index
484
+
485
+ # Apply paragraph style for headings
486
+ if heading_level > 0 and heading_level in HEADING_STYLES:
487
+ requests.append(
488
+ {
489
+ "updateParagraphStyle": {
490
+ "range": {"startIndex": para_start, "endIndex": para_end},
491
+ "paragraphStyle": {
492
+ "namedStyleType": HEADING_STYLES[heading_level]
493
+ },
494
+ "fields": "namedStyleType",
495
+ }
496
+ }
497
+ )
498
+
499
+ return requests, current_index
500
+
501
+ def _get_metadata(self) -> str:
502
+ """
503
+ Get document metadata including title and line count.
504
+
505
+ Returns:
506
+ JSON string with status, title, line count, and URL
507
+ """
508
+ try:
509
+ doc_id = self._ensure_document()
510
+ service = self._get_service()
511
+
512
+ document = service.documents().get(documentId=doc_id).execute()
513
+
514
+ title = document.get("title", "")
515
+ _, total_lines = self._extract_text_with_formatting(document)
516
+
517
+ return json.dumps(
518
+ {
519
+ "status": "success",
520
+ "document_id": doc_id,
521
+ "title": title,
522
+ "total_lines": total_lines,
523
+ "url": f"https://docs.google.com/document/d/{doc_id}",
524
+ }
525
+ )
526
+
527
+ except Exception as e:
528
+ return json.dumps({"status": "error", "error": str(e)})
529
+
530
+ def _read_range(self, start_line: int, end_line: int | None = None) -> str:
531
+ """
532
+ Read a range of lines from the document with markdown formatting.
533
+
534
+ Args:
535
+ start_line: First line to read (1-based)
536
+ end_line: Last line to read (inclusive). If None, reads to end of document.
537
+
538
+ Returns:
539
+ JSON string with status, content, and line info
540
+ """
541
+ try:
542
+ doc_id = self._ensure_document()
543
+ service = self._get_service()
544
+
545
+ document = service.documents().get(documentId=doc_id).execute()
546
+ title = document.get("title", "")
547
+
548
+ # First get total lines
549
+ _, total_lines = self._extract_text_with_formatting(document)
550
+
551
+ # If end_line not specified, read to end
552
+ actual_end_line = end_line if end_line is not None else total_lines
553
+
554
+ text_content, _ = self._extract_text_with_formatting(
555
+ document, start_line=start_line, end_line=actual_end_line
556
+ )
557
+
558
+ return json.dumps(
559
+ {
560
+ "status": "success",
561
+ "document_id": doc_id,
562
+ "title": title,
563
+ "content": text_content,
564
+ "start_line": start_line,
565
+ "end_line": min(actual_end_line, total_lines),
566
+ "total_lines": total_lines,
567
+ "url": f"https://docs.google.com/document/d/{doc_id}",
568
+ }
569
+ )
570
+
571
+ except Exception as e:
572
+ return json.dumps({"status": "error", "error": str(e)})
573
+
574
+ def _grep(self, pattern: str, ignore_case: bool = False) -> str:
575
+ """
576
+ Search for lines matching a pattern in the document.
577
+
578
+ Args:
579
+ pattern: Regular expression pattern to search for
580
+ ignore_case: If True, perform case-insensitive matching
581
+
582
+ Returns:
583
+ JSON string with status and matching lines with their line numbers
584
+ """
585
+ try:
586
+ doc_id = self._ensure_document()
587
+ service = self._get_service()
588
+
589
+ document = service.documents().get(documentId=doc_id).execute()
590
+
591
+ # Get all lines with formatting
592
+ content = document.get("body", {}).get("content", [])
593
+ lines: list[tuple[int, str]] = [] # (line_num, text)
594
+ line_num = 1
595
+
596
+ for element in content:
597
+ if "paragraph" in element:
598
+ para = element["paragraph"]
599
+
600
+ # Build the line text (without formatting for search)
601
+ line_parts: list[str] = []
602
+ for para_element in para.get("elements", []):
603
+ text_run = para_element.get("textRun")
604
+ if text_run:
605
+ text = text_run.get("content", "").rstrip("\n")
606
+ line_parts.append(text)
607
+
608
+ line_text = "".join(line_parts)
609
+ lines.append((line_num, line_text))
610
+ line_num += 1
611
+
612
+ # Compile the regex
613
+ flags = re.IGNORECASE if ignore_case else 0
614
+ try:
615
+ regex = re.compile(pattern, flags)
616
+ except re.error as e:
617
+ return json.dumps(
618
+ {"status": "error", "error": f"Invalid regex pattern: {str(e)}"}
619
+ )
620
+
621
+ # Find matching lines
622
+ matches: list[dict[str, Any]] = []
623
+ for num, text in lines:
624
+ if regex.search(text):
625
+ matches.append({"line": num, "content": text})
626
+
627
+ return json.dumps(
628
+ {
629
+ "status": "success",
630
+ "document_id": doc_id,
631
+ "pattern": pattern,
632
+ "ignore_case": ignore_case,
633
+ "matches": matches,
634
+ "match_count": len(matches),
635
+ "total_lines": len(lines),
636
+ }
637
+ )
638
+
639
+ except Exception as e:
640
+ return json.dumps({"status": "error", "error": str(e)})
641
+
642
+ def _add_paragraph(
643
+ self, text: str, after_line: int | None = None, markdown: bool = True
644
+ ) -> str:
645
+ """
646
+ Add a new paragraph to the document.
647
+
648
+ Args:
649
+ text: The text for the new paragraph
650
+ after_line: Insert after this line number (1-based). If None, appends to end.
651
+ markdown: If True, parse markdown formatting. If False, insert plain text.
652
+
653
+ Returns:
654
+ JSON string with status and result
655
+ """
656
+ try:
657
+ doc_id = self._ensure_document()
658
+ service = self._get_service()
659
+
660
+ document = service.documents().get(documentId=doc_id).execute()
661
+
662
+ if after_line is None:
663
+ # Append to end
664
+ insert_index = self._get_document_end_index(document)
665
+ else:
666
+ # Insert after the specified line
667
+ paragraphs = self._get_paragraph_info(document)
668
+
669
+ if after_line < 0 or after_line > len(paragraphs):
670
+ return json.dumps(
671
+ {
672
+ "status": "error",
673
+ "error": f"Line {after_line} out of range. Document has {len(paragraphs)} lines. Use 0 to insert at beginning.",
674
+ }
675
+ )
676
+
677
+ if after_line == 0:
678
+ # Insert at the very beginning
679
+ insert_index = 1
680
+ else:
681
+ # Insert after the specified paragraph
682
+ para = paragraphs[after_line - 1]
683
+ insert_index = para["end_index"] - 1 # Before the trailing newline
684
+
685
+ # Ensure text starts with newline if we're not at the beginning
686
+ if insert_index > 1 and not text.startswith("\n"):
687
+ text = "\n" + text
688
+
689
+ if markdown:
690
+ segments = self._parse_markdown_text(text)
691
+ requests, _ = self._build_insert_requests(segments, insert_index)
692
+ else:
693
+ requests = [
694
+ {"insertText": {"location": {"index": insert_index}, "text": text}}
695
+ ]
696
+
697
+ if requests:
698
+ service.documents().batchUpdate(
699
+ documentId=doc_id, body={"requests": requests}
700
+ ).execute()
701
+
702
+ position = (
703
+ f"after line {after_line}" if after_line is not None else "at end"
704
+ )
705
+ return json.dumps(
706
+ {
707
+ "status": "success",
708
+ "message": f"Successfully added paragraph {position}",
709
+ "document_id": doc_id,
710
+ "url": f"https://docs.google.com/document/d/{doc_id}",
711
+ }
712
+ )
713
+
714
+ except Exception as e:
715
+ return json.dumps({"status": "error", "error": str(e)})
716
+
717
+ def _replace_text(
718
+ self, search_text: str, replace_text: str, match_case: bool = True
719
+ ) -> str:
720
+ """
721
+ Replace all occurrences of text in the document.
722
+
723
+ Args:
724
+ search_text: The text to search for
725
+ replace_text: The text to replace it with
726
+ match_case: Whether to match case (default: True)
727
+
728
+ Returns:
729
+ JSON string with status and number of replacements
730
+ """
731
+ try:
732
+ doc_id = self._ensure_document()
733
+ service = self._get_service()
734
+
735
+ requests = [
736
+ {
737
+ "replaceAllText": {
738
+ "containsText": {"text": search_text, "matchCase": match_case},
739
+ "replaceText": replace_text,
740
+ }
741
+ }
742
+ ]
743
+
744
+ result = (
745
+ service.documents()
746
+ .batchUpdate(documentId=doc_id, body={"requests": requests})
747
+ .execute()
748
+ )
749
+
750
+ occurrences = (
751
+ result.get("replies", [{}])[0]
752
+ .get("replaceAllText", {})
753
+ .get("occurrencesChanged", 0)
754
+ )
755
+
756
+ return json.dumps(
757
+ {
758
+ "status": "success",
759
+ "replacements": occurrences,
760
+ "message": f"Replaced {occurrences} occurrence(s) of '{search_text}' with '{replace_text}'",
761
+ "document_id": doc_id,
762
+ "url": f"https://docs.google.com/document/d/{doc_id}",
763
+ }
764
+ )
765
+
766
+ except Exception as e:
767
+ return json.dumps({"status": "error", "error": str(e)})
768
+
769
+ def _get_paragraph_info(self, document: dict[str, Any]) -> list[dict[str, Any]]:
770
+ """Get info about each paragraph in the document."""
771
+ content = document.get("body", {}).get("content", [])
772
+ paragraphs: list[dict[str, Any]] = []
773
+ line_num = 1
774
+
775
+ for element in content:
776
+ if "paragraph" in element:
777
+ para = element["paragraph"]
778
+ start_index = element.get("startIndex", 1)
779
+ end_index = element.get("endIndex", start_index)
780
+
781
+ # Extract text
782
+ text_parts: list[str] = []
783
+ for para_element in para.get("elements", []):
784
+ text_run = para_element.get("textRun")
785
+ if text_run:
786
+ text_parts.append(text_run.get("content", ""))
787
+
788
+ text = "".join(text_parts).rstrip("\n")
789
+
790
+ paragraphs.append(
791
+ {
792
+ "line": line_num,
793
+ "start_index": start_index,
794
+ "end_index": end_index,
795
+ "text": text,
796
+ "style": para.get("paragraphStyle", {}).get(
797
+ "namedStyleType", "NORMAL_TEXT"
798
+ ),
799
+ }
800
+ )
801
+ line_num += 1
802
+
803
+ return paragraphs
804
+
805
+ def _update_paragraph(self, line: int, new_text: str, markdown: bool = True) -> str:
806
+ """
807
+ Update the content of a specific paragraph/line.
808
+
809
+ Args:
810
+ line: The line number to update (1-based)
811
+ new_text: The new text for the paragraph
812
+ markdown: If True, parse markdown formatting.
813
+
814
+ Returns:
815
+ JSON string with status and result
816
+ """
817
+ try:
818
+ doc_id = self._ensure_document()
819
+ service = self._get_service()
820
+
821
+ document = service.documents().get(documentId=doc_id).execute()
822
+ paragraphs = self._get_paragraph_info(document)
823
+
824
+ if line < 1 or line > len(paragraphs):
825
+ return json.dumps(
826
+ {
827
+ "status": "error",
828
+ "error": f"Line {line} out of range. Document has {len(paragraphs)} lines.",
829
+ }
830
+ )
831
+
832
+ para = paragraphs[line - 1]
833
+ start_index = para["start_index"]
834
+ end_index = para["end_index"] - 1 # Don't delete the trailing newline
835
+
836
+ requests: list[dict[str, Any]] = []
837
+
838
+ # Delete the old content (if there is any)
839
+ if end_index > start_index:
840
+ requests.append(
841
+ {
842
+ "deleteContentRange": {
843
+ "range": {"startIndex": start_index, "endIndex": end_index}
844
+ }
845
+ }
846
+ )
847
+
848
+ # Insert new content
849
+ if markdown:
850
+ segments = self._parse_markdown_text(new_text)
851
+ insert_requests, _ = self._build_insert_requests(segments, start_index)
852
+ requests.extend(insert_requests)
853
+ else:
854
+ requests.append(
855
+ {
856
+ "insertText": {
857
+ "location": {"index": start_index},
858
+ "text": new_text,
859
+ }
860
+ }
861
+ )
862
+
863
+ if requests:
864
+ service.documents().batchUpdate(
865
+ documentId=doc_id, body={"requests": requests}
866
+ ).execute()
867
+
868
+ return json.dumps(
869
+ {
870
+ "status": "success",
871
+ "message": f"Successfully updated line {line}",
872
+ "document_id": doc_id,
873
+ "url": f"https://docs.google.com/document/d/{doc_id}",
874
+ }
875
+ )
876
+
877
+ except Exception as e:
878
+ return json.dumps({"status": "error", "error": str(e)})
879
+
880
+ def _delete_range(self, start_line: int, end_line: int) -> str:
881
+ """
882
+ Delete a range of lines from the document.
883
+
884
+ Args:
885
+ start_line: First line to delete (1-based)
886
+ end_line: Last line to delete (inclusive)
887
+
888
+ Returns:
889
+ JSON string with status and result
890
+ """
891
+ try:
892
+ doc_id = self._ensure_document()
893
+ service = self._get_service()
894
+
895
+ document = service.documents().get(documentId=doc_id).execute()
896
+ paragraphs = self._get_paragraph_info(document)
897
+
898
+ if not paragraphs:
899
+ return json.dumps(
900
+ {
901
+ "status": "success",
902
+ "message": "Document is already empty",
903
+ "document_id": doc_id,
904
+ }
905
+ )
906
+
907
+ if start_line < 1:
908
+ start_line = 1
909
+ if end_line > len(paragraphs):
910
+ end_line = len(paragraphs)
911
+
912
+ if start_line > end_line or start_line > len(paragraphs):
913
+ return json.dumps(
914
+ {
915
+ "status": "error",
916
+ "error": f"Invalid range. Document has {len(paragraphs)} lines.",
917
+ }
918
+ )
919
+
920
+ # Get the range to delete
921
+ start_para = paragraphs[start_line - 1]
922
+ end_para = paragraphs[end_line - 1]
923
+
924
+ start_index = start_para["start_index"]
925
+ end_index = end_para["end_index"]
926
+
927
+ # Don't delete past the document's final newline
928
+ # The API doesn't allow deleting the mandatory trailing newline
929
+ doc_end = self._get_document_end_index(document)
930
+ if end_index > doc_end:
931
+ end_index = doc_end
932
+
933
+ if end_index <= start_index:
934
+ return json.dumps(
935
+ {
936
+ "status": "success",
937
+ "message": "Nothing to delete",
938
+ "document_id": doc_id,
939
+ }
940
+ )
941
+
942
+ requests = [
943
+ {
944
+ "deleteContentRange": {
945
+ "range": {"startIndex": start_index, "endIndex": end_index}
946
+ }
947
+ }
948
+ ]
949
+
950
+ service.documents().batchUpdate(
951
+ documentId=doc_id, body={"requests": requests}
952
+ ).execute()
953
+
954
+ lines_deleted = end_line - start_line + 1
955
+ return json.dumps(
956
+ {
957
+ "status": "success",
958
+ "message": f"Successfully deleted {lines_deleted} line(s)",
959
+ "document_id": doc_id,
960
+ "url": f"https://docs.google.com/document/d/{doc_id}",
961
+ }
962
+ )
963
+
964
+ except Exception as e:
965
+ return json.dumps({"status": "error", "error": str(e)})
966
+
967
+ def get_tools(self) -> list[Tool]:
968
+ """Return the list of Google Docs tools."""
969
+ if self._tools is not None:
970
+ return self._tools
971
+
972
+ self._tools = [
973
+ Tool(
974
+ name=self.get_metadata_tool_name,
975
+ description=(
976
+ "Get metadata about the Google Doc including its title and total line count. "
977
+ "Use this to check the document length before reading or modifying."
978
+ ),
979
+ run=self._get_metadata,
980
+ parameters={},
981
+ required=[],
982
+ ),
983
+ Tool(
984
+ name=self.read_range_tool_name,
985
+ description=(
986
+ "Read lines from the Google Doc with markdown formatting. "
987
+ "Returns content with formatting preserved (headings as #, bold as **, "
988
+ "italic as *, underline as <u>). "
989
+ "If end_line is omitted, reads from start_line to end of document. "
990
+ "Use start_line=1 without end_line to read the entire document."
991
+ ),
992
+ run=self._read_range,
993
+ parameters={
994
+ "start_line": {
995
+ "type": "integer",
996
+ "description": "First line to read (1-based)",
997
+ },
998
+ "end_line": {
999
+ "type": "integer",
1000
+ "description": "Last line to read (inclusive). Omit to read to end of document.",
1001
+ },
1002
+ },
1003
+ required=["start_line"],
1004
+ ),
1005
+ Tool(
1006
+ name=self.grep_tool_name,
1007
+ description=(
1008
+ "Search for lines matching a pattern in the Google Doc. "
1009
+ "Returns matching lines with their line numbers. "
1010
+ "Supports regular expressions."
1011
+ ),
1012
+ run=self._grep,
1013
+ parameters={
1014
+ "pattern": {
1015
+ "type": "string",
1016
+ "description": "Regular expression pattern to search for",
1017
+ },
1018
+ "ignore_case": {
1019
+ "type": "boolean",
1020
+ "description": "If true, perform case-insensitive matching (default: false)",
1021
+ },
1022
+ },
1023
+ required=["pattern"],
1024
+ ),
1025
+ Tool(
1026
+ name=self.add_paragraph_tool_name,
1027
+ description=(
1028
+ "Add a new paragraph to the Google Doc. "
1029
+ "Supports markdown formatting: # headings, **bold**, *italic*, <u>underline</u>. "
1030
+ "Use after_line to insert after a specific line, or omit to append at end. "
1031
+ "Use after_line=0 to insert at the very beginning."
1032
+ ),
1033
+ run=self._add_paragraph,
1034
+ parameters={
1035
+ "text": {
1036
+ "type": "string",
1037
+ "description": "The text for the new paragraph (with optional markdown formatting)",
1038
+ },
1039
+ "after_line": {
1040
+ "type": "integer",
1041
+ "description": "Insert after this line number (1-based). Use 0 to insert at beginning. Omit to append at end.",
1042
+ },
1043
+ "markdown": {
1044
+ "type": "boolean",
1045
+ "description": "If true (default), parse markdown formatting. If false, insert as plain text.",
1046
+ },
1047
+ },
1048
+ required=["text"],
1049
+ ),
1050
+ Tool(
1051
+ name=self.update_paragraph_tool_name,
1052
+ description=(
1053
+ "Update the content of a specific line/paragraph in the document. "
1054
+ "Replaces the entire line with new content. "
1055
+ "Supports markdown formatting."
1056
+ ),
1057
+ run=self._update_paragraph,
1058
+ parameters={
1059
+ "line": {
1060
+ "type": "integer",
1061
+ "description": "The line number to update (1-based)",
1062
+ },
1063
+ "new_text": {
1064
+ "type": "string",
1065
+ "description": "The new text for the paragraph (with optional markdown)",
1066
+ },
1067
+ "markdown": {
1068
+ "type": "boolean",
1069
+ "description": "If true (default), parse markdown formatting.",
1070
+ },
1071
+ },
1072
+ required=["line", "new_text"],
1073
+ ),
1074
+ Tool(
1075
+ name=self.replace_tool_name,
1076
+ description=(
1077
+ "Replace all occurrences of text in the Google Doc. "
1078
+ "Useful for template-based document generation or bulk edits. "
1079
+ "Note: replacement text is inserted as plain text without formatting."
1080
+ ),
1081
+ run=self._replace_text,
1082
+ parameters={
1083
+ "search_text": {
1084
+ "type": "string",
1085
+ "description": "The text to search for",
1086
+ },
1087
+ "replace_text": {
1088
+ "type": "string",
1089
+ "description": "The text to replace it with",
1090
+ },
1091
+ "match_case": {
1092
+ "type": "boolean",
1093
+ "description": "Whether to match case (default: true)",
1094
+ },
1095
+ },
1096
+ required=["search_text", "replace_text"],
1097
+ ),
1098
+ Tool(
1099
+ name=self.delete_range_tool_name,
1100
+ description=(
1101
+ "Delete a range of lines from the Google Doc. "
1102
+ "Use this to remove content from the document."
1103
+ ),
1104
+ run=self._delete_range,
1105
+ parameters={
1106
+ "start_line": {
1107
+ "type": "integer",
1108
+ "description": "First line to delete (1-based)",
1109
+ },
1110
+ "end_line": {
1111
+ "type": "integer",
1112
+ "description": "Last line to delete (inclusive)",
1113
+ },
1114
+ },
1115
+ required=["start_line", "end_line"],
1116
+ ),
1117
+ ]
1118
+
1119
+ return self._tools