onetool-mcp 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. bench/__init__.py +5 -0
  2. bench/cli.py +69 -0
  3. bench/harness/__init__.py +66 -0
  4. bench/harness/client.py +692 -0
  5. bench/harness/config.py +397 -0
  6. bench/harness/csv_writer.py +109 -0
  7. bench/harness/evaluate.py +512 -0
  8. bench/harness/metrics.py +283 -0
  9. bench/harness/runner.py +899 -0
  10. bench/py.typed +0 -0
  11. bench/reporter.py +629 -0
  12. bench/run.py +487 -0
  13. bench/secrets.py +101 -0
  14. bench/utils.py +16 -0
  15. onetool/__init__.py +4 -0
  16. onetool/cli.py +391 -0
  17. onetool/py.typed +0 -0
  18. onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
  19. onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
  20. onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
  21. onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
  22. onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
  23. onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
  24. ot/__init__.py +37 -0
  25. ot/__main__.py +6 -0
  26. ot/_cli.py +107 -0
  27. ot/_tui.py +53 -0
  28. ot/config/__init__.py +46 -0
  29. ot/config/defaults/bench.yaml +4 -0
  30. ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
  31. ot/config/defaults/diagram-templates/c4-context.puml +30 -0
  32. ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
  33. ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
  34. ot/config/defaults/diagram-templates/microservices.d2 +81 -0
  35. ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
  36. ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
  37. ot/config/defaults/onetool.yaml +25 -0
  38. ot/config/defaults/prompts.yaml +97 -0
  39. ot/config/defaults/servers.yaml +7 -0
  40. ot/config/defaults/snippets.yaml +4 -0
  41. ot/config/defaults/tool_templates/__init__.py +7 -0
  42. ot/config/defaults/tool_templates/extension.py +52 -0
  43. ot/config/defaults/tool_templates/isolated.py +61 -0
  44. ot/config/dynamic.py +121 -0
  45. ot/config/global_templates/__init__.py +2 -0
  46. ot/config/global_templates/bench-secrets-template.yaml +6 -0
  47. ot/config/global_templates/bench.yaml +9 -0
  48. ot/config/global_templates/onetool.yaml +27 -0
  49. ot/config/global_templates/secrets-template.yaml +44 -0
  50. ot/config/global_templates/servers.yaml +18 -0
  51. ot/config/global_templates/snippets.yaml +235 -0
  52. ot/config/loader.py +1087 -0
  53. ot/config/mcp.py +145 -0
  54. ot/config/secrets.py +190 -0
  55. ot/config/tool_config.py +125 -0
  56. ot/decorators.py +116 -0
  57. ot/executor/__init__.py +35 -0
  58. ot/executor/base.py +16 -0
  59. ot/executor/fence_processor.py +83 -0
  60. ot/executor/linter.py +142 -0
  61. ot/executor/pack_proxy.py +260 -0
  62. ot/executor/param_resolver.py +140 -0
  63. ot/executor/pep723.py +288 -0
  64. ot/executor/result_store.py +369 -0
  65. ot/executor/runner.py +496 -0
  66. ot/executor/simple.py +163 -0
  67. ot/executor/tool_loader.py +396 -0
  68. ot/executor/validator.py +398 -0
  69. ot/executor/worker_pool.py +388 -0
  70. ot/executor/worker_proxy.py +189 -0
  71. ot/http_client.py +145 -0
  72. ot/logging/__init__.py +37 -0
  73. ot/logging/config.py +315 -0
  74. ot/logging/entry.py +213 -0
  75. ot/logging/format.py +188 -0
  76. ot/logging/span.py +349 -0
  77. ot/meta.py +1555 -0
  78. ot/paths.py +453 -0
  79. ot/prompts.py +218 -0
  80. ot/proxy/__init__.py +21 -0
  81. ot/proxy/manager.py +396 -0
  82. ot/py.typed +0 -0
  83. ot/registry/__init__.py +189 -0
  84. ot/registry/models.py +57 -0
  85. ot/registry/parser.py +269 -0
  86. ot/registry/registry.py +413 -0
  87. ot/server.py +315 -0
  88. ot/shortcuts/__init__.py +15 -0
  89. ot/shortcuts/aliases.py +87 -0
  90. ot/shortcuts/snippets.py +258 -0
  91. ot/stats/__init__.py +35 -0
  92. ot/stats/html.py +250 -0
  93. ot/stats/jsonl_writer.py +283 -0
  94. ot/stats/reader.py +354 -0
  95. ot/stats/timing.py +57 -0
  96. ot/support.py +63 -0
  97. ot/tools.py +114 -0
  98. ot/utils/__init__.py +81 -0
  99. ot/utils/batch.py +161 -0
  100. ot/utils/cache.py +120 -0
  101. ot/utils/deps.py +403 -0
  102. ot/utils/exceptions.py +23 -0
  103. ot/utils/factory.py +179 -0
  104. ot/utils/format.py +65 -0
  105. ot/utils/http.py +202 -0
  106. ot/utils/platform.py +45 -0
  107. ot/utils/sanitize.py +130 -0
  108. ot/utils/truncate.py +69 -0
  109. ot_tools/__init__.py +4 -0
  110. ot_tools/_convert/__init__.py +12 -0
  111. ot_tools/_convert/excel.py +279 -0
  112. ot_tools/_convert/pdf.py +254 -0
  113. ot_tools/_convert/powerpoint.py +268 -0
  114. ot_tools/_convert/utils.py +358 -0
  115. ot_tools/_convert/word.py +283 -0
  116. ot_tools/brave_search.py +604 -0
  117. ot_tools/code_search.py +736 -0
  118. ot_tools/context7.py +495 -0
  119. ot_tools/convert.py +614 -0
  120. ot_tools/db.py +415 -0
  121. ot_tools/diagram.py +1604 -0
  122. ot_tools/diagram.yaml +167 -0
  123. ot_tools/excel.py +1372 -0
  124. ot_tools/file.py +1348 -0
  125. ot_tools/firecrawl.py +732 -0
  126. ot_tools/grounding_search.py +646 -0
  127. ot_tools/package.py +604 -0
  128. ot_tools/py.typed +0 -0
  129. ot_tools/ripgrep.py +544 -0
  130. ot_tools/scaffold.py +471 -0
  131. ot_tools/transform.py +213 -0
  132. ot_tools/web_fetch.py +384 -0
ot_tools/convert.py ADDED
@@ -0,0 +1,614 @@
1
+ """Document conversion tools for OneTool.
2
+
3
+ Converts PDF, Word, PowerPoint, and Excel documents to Markdown
4
+ with LLM-optimised output including YAML frontmatter and TOC.
5
+
6
+ Supports glob patterns for batch conversion with async parallel processing.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ # Pack for dot notation: convert.pdf(), convert.word(), etc.
12
+ pack = "convert"
13
+
14
+ __all__ = ["auto", "excel", "pdf", "powerpoint", "word"]
15
+
16
+ # Dependency declarations for CLI validation
17
+ # Use dict format for packages where import_name differs from package name
18
+ __ot_requires__ = {
19
+ "lib": [
20
+ {"name": "pymupdf", "import_name": "fitz", "install": "pip install pymupdf"},
21
+ {"name": "python-docx", "import_name": "docx", "install": "pip install python-docx"},
22
+ {"name": "python-pptx", "import_name": "pptx", "install": "pip install python-pptx"},
23
+ ("openpyxl", "pip install openpyxl"),
24
+ {"name": "Pillow", "import_name": "PIL", "install": "pip install Pillow"},
25
+ {"name": "formulas", "import_name": "formulas", "install": "pip install formulas", "optional": True},
26
+ ],
27
+ }
28
+
29
+ import asyncio
30
+ import atexit
31
+ import os
32
+ from collections.abc import Callable
33
+ from concurrent.futures import ThreadPoolExecutor
34
+ from pathlib import Path
35
+ from typing import Any
36
+
37
+ from ot.logging import LogSpan
38
+ from ot.paths import resolve_cwd_path
39
+ from ot_tools._convert import (
40
+ convert_excel,
41
+ convert_pdf,
42
+ convert_powerpoint,
43
+ convert_word,
44
+ )
45
+
46
+ # Type alias for converter functions
47
+ ConverterFunc = Callable[[Path, Path, str], dict[str, Any]]
48
+
49
+ # Shared thread pool for file conversions (created lazily, sized for parallelism)
50
+ _conversion_executor: ThreadPoolExecutor | None = None
51
+
52
+
53
+ def _get_conversion_executor() -> ThreadPoolExecutor:
54
+ """Get or create the shared conversion thread pool."""
55
+ global _conversion_executor
56
+ if _conversion_executor is None:
57
+ # Use CPU count but cap at reasonable max for I/O-bound work
58
+ max_workers = min(os.cpu_count() or 4, 8)
59
+ _conversion_executor = ThreadPoolExecutor(
60
+ max_workers=max_workers,
61
+ thread_name_prefix="convert",
62
+ )
63
+ return _conversion_executor
64
+
65
+
66
+ def _shutdown_executor() -> None:
67
+ """Shutdown the conversion thread pool on exit."""
68
+ global _conversion_executor
69
+ if _conversion_executor is not None:
70
+ _conversion_executor.shutdown(wait=False)
71
+ _conversion_executor = None
72
+
73
+
74
+ atexit.register(_shutdown_executor)
75
+
76
+
77
+ def _resolve_glob(pattern: str) -> list[Path]:
78
+ """Resolve glob pattern to list of files.
79
+
80
+ Uses SDK resolve_cwd_path() for consistent path resolution.
81
+
82
+ Args:
83
+ pattern: Glob pattern (can include ~, relative, or absolute paths)
84
+
85
+ Returns:
86
+ List of matching file paths
87
+ """
88
+ cwd = resolve_cwd_path(".")
89
+ # Expand ~ and resolve relative to project dir
90
+ path = Path(pattern).expanduser()
91
+ if not path.is_absolute():
92
+ path = cwd / pattern
93
+
94
+ # If pattern has no glob chars and exists, return it directly
95
+ if path.exists() and path.is_file():
96
+ return [path]
97
+
98
+ # Otherwise glob from parent
99
+ parent = path.parent
100
+ glob_pattern = path.name
101
+
102
+ # Handle recursive globs in parent
103
+ if "**" in str(path):
104
+ # Find the base directory before **
105
+ parts = Path(pattern).expanduser().parts
106
+ base_parts: list[str] = []
107
+ glob_parts: list[str] = []
108
+ found_glob = False
109
+ for part in parts:
110
+ if "**" in part or "*" in part or "?" in part:
111
+ found_glob = True
112
+ if found_glob:
113
+ glob_parts.append(part)
114
+ else:
115
+ base_parts.append(part)
116
+
117
+ if base_parts:
118
+ base = Path(*base_parts)
119
+ if not base.is_absolute():
120
+ base = cwd / base
121
+ else:
122
+ base = cwd
123
+
124
+ glob_pattern = str(Path(*glob_parts)) if glob_parts else "*"
125
+ return list(base.glob(glob_pattern))
126
+
127
+ # Simple glob in directory
128
+ if not parent.is_absolute():
129
+ parent = cwd / parent.relative_to(".") if str(parent) != "." else cwd
130
+
131
+ if parent.exists():
132
+ return list(parent.glob(glob_pattern))
133
+
134
+ return []
135
+
136
+
137
+ def _get_source_rel(path: Path) -> str:
138
+ """Get relative path for frontmatter source field."""
139
+ cwd = resolve_cwd_path(".")
140
+ try:
141
+ return str(path.relative_to(cwd))
142
+ except ValueError:
143
+ return str(path)
144
+
145
+
146
+ def _resolve_output_dir(output_dir: str) -> Path:
147
+ """Resolve output directory path.
148
+
149
+ Uses SDK resolve_cwd_path() for consistent path resolution.
150
+ """
151
+ return resolve_cwd_path(output_dir)
152
+
153
+
154
+ async def _convert_file_async(
155
+ converter: Any,
156
+ input_path: Path,
157
+ output_dir: Path,
158
+ source_rel: str,
159
+ **kwargs: Any,
160
+ ) -> dict[str, Any]:
161
+ """Run conversion in shared thread pool for async execution."""
162
+ loop = asyncio.get_event_loop()
163
+ executor = _get_conversion_executor()
164
+ return await loop.run_in_executor(
165
+ executor,
166
+ lambda: converter(input_path, output_dir, source_rel, **kwargs),
167
+ )
168
+
169
+
170
+ async def _convert_batch_async(
171
+ files: list[Path],
172
+ output_dir: Path,
173
+ converter: Any,
174
+ **kwargs: Any,
175
+ ) -> dict[str, Any]:
176
+ """Convert multiple files in parallel."""
177
+ tasks = []
178
+ for path in files:
179
+ source_rel = _get_source_rel(path)
180
+ tasks.append(_convert_file_async(converter, path, output_dir, source_rel, **kwargs))
181
+
182
+ results = await asyncio.gather(*tasks, return_exceptions=True)
183
+
184
+ converted = 0
185
+ failed = 0
186
+ outputs: list[str] = []
187
+ errors: list[str] = []
188
+
189
+ for path, res in zip(files, results, strict=True):
190
+ if isinstance(res, BaseException):
191
+ failed += 1
192
+ errors.append(f"{path.name}: {res}")
193
+ else:
194
+ converted += 1
195
+ outputs.append(res["output"])
196
+
197
+ return {
198
+ "converted": converted,
199
+ "failed": failed,
200
+ "outputs": outputs,
201
+ "errors": errors,
202
+ }
203
+
204
+
205
+ async def _convert_auto_batch_async(
206
+ files: list[Path],
207
+ output_dir: Path,
208
+ converters: dict[str, ConverterFunc],
209
+ ) -> dict[str, Any]:
210
+ """Convert multiple files in parallel with auto-detection."""
211
+ tasks = []
212
+ task_paths: list[Path] = []
213
+ skipped = 0
214
+
215
+ for path in files:
216
+ ext = path.suffix.lower()
217
+ if ext not in converters:
218
+ skipped += 1
219
+ continue
220
+
221
+ source_rel = _get_source_rel(path)
222
+ converter = converters[ext]
223
+ tasks.append(_convert_file_async(converter, path, output_dir, source_rel))
224
+ task_paths.append(path)
225
+
226
+ if not tasks:
227
+ return {
228
+ "converted": 0,
229
+ "failed": 0,
230
+ "skipped": skipped,
231
+ "outputs": [],
232
+ "errors": [],
233
+ }
234
+
235
+ results = await asyncio.gather(*tasks, return_exceptions=True)
236
+
237
+ converted = 0
238
+ failed = 0
239
+ outputs: list[str] = []
240
+ errors: list[str] = []
241
+
242
+ for path, res in zip(task_paths, results, strict=True):
243
+ if isinstance(res, BaseException):
244
+ failed += 1
245
+ errors.append(f"{path.name}: {res}")
246
+ else:
247
+ converted += 1
248
+ outputs.append(res["output"])
249
+
250
+ return {
251
+ "converted": converted,
252
+ "failed": failed,
253
+ "skipped": skipped,
254
+ "outputs": outputs,
255
+ "errors": errors,
256
+ }
257
+
258
+
259
+ def pdf(
260
+ *,
261
+ pattern: str,
262
+ output_dir: str,
263
+ ) -> str:
264
+ """Convert PDF documents to Markdown.
265
+
266
+ Converts PDF files to Markdown with page-by-page text extraction,
267
+ embedded image export, and outline-based heading structure.
268
+
269
+ Args:
270
+ pattern: Glob pattern for input files (e.g., "docs/*.pdf", "report.pdf")
271
+ output_dir: Directory for output files
272
+
273
+ Returns:
274
+ Conversion summary with output paths, or error message
275
+
276
+ Example:
277
+ convert.pdf(pattern="docs/report.pdf", output_dir="docs/md")
278
+ convert.pdf(pattern="input/*.pdf", output_dir="output")
279
+ """
280
+ with LogSpan(span="convert.pdf", pattern=pattern, output_dir=output_dir) as s:
281
+ files = _resolve_glob(pattern)
282
+ if not files:
283
+ s.add(error="no_match")
284
+ return f"No files matched pattern: {pattern}"
285
+
286
+ out_path = _resolve_output_dir(output_dir)
287
+
288
+ if len(files) == 1:
289
+ # Single file conversion
290
+ try:
291
+ source_rel = _get_source_rel(files[0])
292
+ result = convert_pdf(files[0], out_path, source_rel)
293
+ s.add(converted=1, pages=result["pages"], images=result["images"])
294
+ return f"Converted {files[0].name}: {result['pages']} pages, {result['images']} images\nOutput: {result['output']}"
295
+ except Exception as e:
296
+ s.add(error=str(e))
297
+ return f"Error converting {files[0].name}: {e}"
298
+
299
+ # Batch conversion
300
+ try:
301
+ result = asyncio.run(_convert_batch_async(files, out_path, convert_pdf))
302
+ s.add(converted=result["converted"], failed=result["failed"])
303
+
304
+ lines = [f"Converted {result['converted']} files, {result['failed']} failed"]
305
+ if result["outputs"]:
306
+ lines.append("\nOutputs:")
307
+ for output in result["outputs"]:
308
+ lines.append(f" {output}")
309
+ if result["errors"]:
310
+ lines.append("\nErrors:")
311
+ for error in result["errors"]:
312
+ lines.append(f" {error}")
313
+
314
+ return "\n".join(lines)
315
+ except Exception as e:
316
+ s.add(error=str(e))
317
+ return f"Error: {e}"
318
+
319
+
320
+ def word(
321
+ *,
322
+ pattern: str,
323
+ output_dir: str,
324
+ ) -> str:
325
+ """Convert Word documents to Markdown.
326
+
327
+ Converts DOCX files to Markdown with heading style detection,
328
+ table conversion, and embedded image export.
329
+
330
+ Args:
331
+ pattern: Glob pattern for input files (e.g., "docs/*.docx", "spec.docx")
332
+ output_dir: Directory for output files
333
+
334
+ Returns:
335
+ Conversion summary with output paths, or error message
336
+
337
+ Example:
338
+ convert.word(pattern="specs/design.docx", output_dir="specs/md")
339
+ convert.word(pattern="docs/**/*.docx", output_dir="output")
340
+ """
341
+ with LogSpan(span="convert.word", pattern=pattern, output_dir=output_dir) as s:
342
+ files = _resolve_glob(pattern)
343
+ if not files:
344
+ s.add(error="no_match")
345
+ return f"No files matched pattern: {pattern}"
346
+
347
+ out_path = _resolve_output_dir(output_dir)
348
+
349
+ if len(files) == 1:
350
+ try:
351
+ source_rel = _get_source_rel(files[0])
352
+ result = convert_word(files[0], out_path, source_rel)
353
+ s.add(
354
+ converted=1,
355
+ paragraphs=result["paragraphs"],
356
+ tables=result["tables"],
357
+ images=result["images"],
358
+ )
359
+ return f"Converted {files[0].name}: {result['paragraphs']} paragraphs, {result['tables']} tables, {result['images']} images\nOutput: {result['output']}"
360
+ except Exception as e:
361
+ s.add(error=str(e))
362
+ return f"Error converting {files[0].name}: {e}"
363
+
364
+ try:
365
+ result = asyncio.run(_convert_batch_async(files, out_path, convert_word))
366
+ s.add(converted=result["converted"], failed=result["failed"])
367
+
368
+ lines = [f"Converted {result['converted']} files, {result['failed']} failed"]
369
+ if result["outputs"]:
370
+ lines.append("\nOutputs:")
371
+ for output in result["outputs"]:
372
+ lines.append(f" {output}")
373
+ if result["errors"]:
374
+ lines.append("\nErrors:")
375
+ for error in result["errors"]:
376
+ lines.append(f" {error}")
377
+
378
+ return "\n".join(lines)
379
+ except Exception as e:
380
+ s.add(error=str(e))
381
+ return f"Error: {e}"
382
+
383
+
384
+ def powerpoint(
385
+ *,
386
+ pattern: str,
387
+ output_dir: str,
388
+ include_notes: bool = False,
389
+ ) -> str:
390
+ """Convert PowerPoint presentations to Markdown.
391
+
392
+ Converts PPTX files to Markdown with slide structure,
393
+ table conversion, and embedded image export.
394
+
395
+ Args:
396
+ pattern: Glob pattern for input files (e.g., "slides/*.pptx")
397
+ output_dir: Directory for output files
398
+ include_notes: Include speaker notes after slide content
399
+
400
+ Returns:
401
+ Conversion summary with output paths, or error message
402
+
403
+ Example:
404
+ convert.powerpoint(pattern="slides/deck.pptx", output_dir="slides/md")
405
+ convert.powerpoint(pattern="presentations/*.pptx", output_dir="output", include_notes=True)
406
+ """
407
+ with LogSpan(
408
+ span="convert.powerpoint",
409
+ pattern=pattern,
410
+ output_dir=output_dir,
411
+ include_notes=include_notes,
412
+ ) as s:
413
+ files = _resolve_glob(pattern)
414
+ if not files:
415
+ s.add(error="no_match")
416
+ return f"No files matched pattern: {pattern}"
417
+
418
+ out_path = _resolve_output_dir(output_dir)
419
+
420
+ if len(files) == 1:
421
+ try:
422
+ source_rel = _get_source_rel(files[0])
423
+ result = convert_powerpoint(
424
+ files[0], out_path, source_rel, include_notes=include_notes
425
+ )
426
+ s.add(converted=1, slides=result["slides"], images=result["images"])
427
+ return f"Converted {files[0].name}: {result['slides']} slides, {result['images']} images\nOutput: {result['output']}"
428
+ except Exception as e:
429
+ s.add(error=str(e))
430
+ return f"Error converting {files[0].name}: {e}"
431
+
432
+ try:
433
+ result = asyncio.run(
434
+ _convert_batch_async(
435
+ files, out_path, convert_powerpoint, include_notes=include_notes
436
+ )
437
+ )
438
+ s.add(converted=result["converted"], failed=result["failed"])
439
+
440
+ lines = [f"Converted {result['converted']} files, {result['failed']} failed"]
441
+ if result["outputs"]:
442
+ lines.append("\nOutputs:")
443
+ for output in result["outputs"]:
444
+ lines.append(f" {output}")
445
+ if result["errors"]:
446
+ lines.append("\nErrors:")
447
+ for error in result["errors"]:
448
+ lines.append(f" {error}")
449
+
450
+ return "\n".join(lines)
451
+ except Exception as e:
452
+ s.add(error=str(e))
453
+ return f"Error: {e}"
454
+
455
+
456
+ def excel(
457
+ *,
458
+ pattern: str,
459
+ output_dir: str,
460
+ include_formulas: bool = False,
461
+ compute_formulas: bool = False,
462
+ ) -> str:
463
+ """Convert Excel spreadsheets to Markdown.
464
+
465
+ Converts XLSX files to Markdown tables with sheet-based sections.
466
+ Uses streaming for memory-efficient processing of large files.
467
+
468
+ Args:
469
+ pattern: Glob pattern for input files (e.g., "data/*.xlsx")
470
+ output_dir: Directory for output files
471
+ include_formulas: Include cell formulas as comments
472
+ compute_formulas: Evaluate formulas when cached values are missing
473
+ (requires 'formulas' library: pip install formulas)
474
+
475
+ Returns:
476
+ Conversion summary with output paths, or error message
477
+
478
+ Example:
479
+ convert.excel(pattern="data/report.xlsx", output_dir="data/md")
480
+ convert.excel(pattern="spreadsheets/*.xlsx", output_dir="output", include_formulas=True)
481
+ convert.excel(pattern="data/*.xlsx", output_dir="out", compute_formulas=True)
482
+ """
483
+ with LogSpan(
484
+ span="convert.excel",
485
+ pattern=pattern,
486
+ output_dir=output_dir,
487
+ include_formulas=include_formulas,
488
+ compute_formulas=compute_formulas,
489
+ ) as s:
490
+ files = _resolve_glob(pattern)
491
+ if not files:
492
+ s.add(error="no_match")
493
+ return f"No files matched pattern: {pattern}"
494
+
495
+ out_path = _resolve_output_dir(output_dir)
496
+
497
+ if len(files) == 1:
498
+ try:
499
+ source_rel = _get_source_rel(files[0])
500
+ result = convert_excel(
501
+ files[0], out_path, source_rel,
502
+ include_formulas=include_formulas,
503
+ compute_formulas=compute_formulas,
504
+ )
505
+ s.add(converted=1, sheets=result["sheets"], rows=result["rows"])
506
+ return f"Converted {files[0].name}: {result['sheets']} sheets, {result['rows']} rows\nOutput: {result['output']}"
507
+ except Exception as e:
508
+ s.add(error=str(e))
509
+ return f"Error converting {files[0].name}: {e}"
510
+
511
+ try:
512
+ result = asyncio.run(
513
+ _convert_batch_async(
514
+ files, out_path, convert_excel,
515
+ include_formulas=include_formulas,
516
+ compute_formulas=compute_formulas,
517
+ )
518
+ )
519
+ s.add(converted=result["converted"], failed=result["failed"])
520
+
521
+ lines = [f"Converted {result['converted']} files, {result['failed']} failed"]
522
+ if result["outputs"]:
523
+ lines.append("\nOutputs:")
524
+ for output in result["outputs"]:
525
+ lines.append(f" {output}")
526
+ if result["errors"]:
527
+ lines.append("\nErrors:")
528
+ for error in result["errors"]:
529
+ lines.append(f" {error}")
530
+
531
+ return "\n".join(lines)
532
+ except Exception as e:
533
+ s.add(error=str(e))
534
+ return f"Error: {e}"
535
+
536
+
537
+ def auto(
538
+ *,
539
+ pattern: str,
540
+ output_dir: str,
541
+ ) -> str:
542
+ """Auto-detect format and convert documents to Markdown.
543
+
544
+ Detects file format from extension and uses the appropriate converter.
545
+ Supports PDF, DOCX, PPTX, and XLSX formats.
546
+
547
+ Args:
548
+ pattern: Glob pattern for input files (e.g., "docs/*", "input/**/*")
549
+ output_dir: Directory for output files
550
+
551
+ Returns:
552
+ Conversion summary with output paths, or error message
553
+
554
+ Example:
555
+ convert.auto(pattern="docs/*", output_dir="output")
556
+ convert.auto(pattern="input/**/*.{pdf,docx}", output_dir="converted")
557
+ """
558
+ with LogSpan(span="convert.auto", pattern=pattern, output_dir=output_dir) as s:
559
+ files = _resolve_glob(pattern)
560
+ if not files:
561
+ s.add(error="no_match")
562
+ return f"No files matched pattern: {pattern}"
563
+
564
+ out_path = _resolve_output_dir(output_dir)
565
+
566
+ # Converters by extension
567
+ converters: dict[str, ConverterFunc] = {
568
+ ".pdf": convert_pdf,
569
+ ".docx": convert_word,
570
+ ".pptx": convert_powerpoint,
571
+ ".xlsx": convert_excel,
572
+ }
573
+
574
+ # Single supported file - convert directly
575
+ supported_files = [f for f in files if f.suffix.lower() in converters]
576
+ skipped = len(files) - len(supported_files)
577
+
578
+ if len(supported_files) == 1:
579
+ path = supported_files[0]
580
+ try:
581
+ source_rel = _get_source_rel(path)
582
+ result = converters[path.suffix.lower()](path, out_path, source_rel)
583
+ s.add(converted=1, failed=0, skipped=skipped)
584
+ msg = f"Converted {path.name}\nOutput: {result['output']}"
585
+ if skipped:
586
+ msg += f"\n{skipped} skipped (unsupported format)"
587
+ return msg
588
+ except Exception as e:
589
+ s.add(converted=0, failed=1, skipped=skipped, error=str(e))
590
+ return f"Error converting {path.name}: {e}"
591
+
592
+ if not supported_files:
593
+ s.add(converted=0, failed=0, skipped=skipped)
594
+ return f"No supported files found. {skipped} skipped (unsupported format)"
595
+
596
+ # Batch conversion with async parallel processing
597
+ try:
598
+ result = asyncio.run(_convert_auto_batch_async(files, out_path, converters))
599
+ s.add(converted=result["converted"], failed=result["failed"], skipped=result["skipped"])
600
+
601
+ lines = [f"Converted {result['converted']} files, {result['failed']} failed, {result['skipped']} skipped (unsupported format)"]
602
+ if result["outputs"]:
603
+ lines.append("\nOutputs:")
604
+ for output in result["outputs"]:
605
+ lines.append(f" {output}")
606
+ if result["errors"]:
607
+ lines.append("\nErrors:")
608
+ for error in result["errors"]:
609
+ lines.append(f" {error}")
610
+
611
+ return "\n".join(lines)
612
+ except Exception as e:
613
+ s.add(error=str(e))
614
+ return f"Error: {e}"