onetool-mcp 1.0.0b1__py3-none-any.whl → 1.0.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. onetool/cli.py +63 -4
  2. onetool_mcp-1.0.0rc2.dist-info/METADATA +266 -0
  3. onetool_mcp-1.0.0rc2.dist-info/RECORD +129 -0
  4. {onetool_mcp-1.0.0b1.dist-info → onetool_mcp-1.0.0rc2.dist-info}/licenses/LICENSE.txt +1 -1
  5. {onetool_mcp-1.0.0b1.dist-info → onetool_mcp-1.0.0rc2.dist-info}/licenses/NOTICE.txt +54 -64
  6. ot/__main__.py +6 -6
  7. ot/config/__init__.py +48 -46
  8. ot/config/global_templates/__init__.py +2 -2
  9. ot/config/{defaults → global_templates}/diagram-templates/api-flow.mmd +33 -33
  10. ot/config/{defaults → global_templates}/diagram-templates/c4-context.puml +30 -30
  11. ot/config/{defaults → global_templates}/diagram-templates/class-diagram.mmd +87 -87
  12. ot/config/{defaults → global_templates}/diagram-templates/feature-mindmap.mmd +70 -70
  13. ot/config/{defaults → global_templates}/diagram-templates/microservices.d2 +81 -81
  14. ot/config/{defaults → global_templates}/diagram-templates/project-gantt.mmd +37 -37
  15. ot/config/{defaults → global_templates}/diagram-templates/state-machine.mmd +42 -42
  16. ot/config/global_templates/diagram.yaml +167 -0
  17. ot/config/global_templates/onetool.yaml +3 -1
  18. ot/config/{defaults → global_templates}/prompts.yaml +102 -97
  19. ot/config/global_templates/security.yaml +31 -0
  20. ot/config/global_templates/servers.yaml +93 -12
  21. ot/config/global_templates/snippets.yaml +5 -26
  22. ot/config/{defaults → global_templates}/tool_templates/__init__.py +7 -7
  23. ot/config/loader.py +221 -105
  24. ot/config/mcp.py +5 -1
  25. ot/config/secrets.py +192 -190
  26. ot/decorators.py +116 -116
  27. ot/executor/__init__.py +35 -35
  28. ot/executor/base.py +16 -16
  29. ot/executor/fence_processor.py +83 -83
  30. ot/executor/linter.py +142 -142
  31. ot/executor/pep723.py +288 -288
  32. ot/executor/runner.py +20 -6
  33. ot/executor/simple.py +163 -163
  34. ot/executor/validator.py +603 -164
  35. ot/http_client.py +145 -145
  36. ot/logging/__init__.py +37 -37
  37. ot/logging/entry.py +213 -213
  38. ot/logging/format.py +191 -188
  39. ot/logging/span.py +349 -349
  40. ot/meta.py +236 -14
  41. ot/paths.py +32 -49
  42. ot/prompts.py +218 -218
  43. ot/proxy/manager.py +14 -2
  44. ot/registry/__init__.py +189 -189
  45. ot/registry/parser.py +269 -269
  46. ot/server.py +330 -315
  47. ot/shortcuts/__init__.py +15 -15
  48. ot/shortcuts/aliases.py +87 -87
  49. ot/shortcuts/snippets.py +258 -258
  50. ot/stats/__init__.py +35 -35
  51. ot/stats/html.py +2 -2
  52. ot/stats/reader.py +354 -354
  53. ot/stats/timing.py +57 -57
  54. ot/support.py +63 -63
  55. ot/tools.py +1 -1
  56. ot/utils/batch.py +161 -161
  57. ot/utils/cache.py +120 -120
  58. ot/utils/exceptions.py +23 -23
  59. ot/utils/factory.py +178 -179
  60. ot/utils/format.py +65 -65
  61. ot/utils/http.py +202 -202
  62. ot/utils/platform.py +45 -45
  63. ot/utils/truncate.py +69 -69
  64. ot_tools/__init__.py +4 -4
  65. ot_tools/_convert/__init__.py +12 -12
  66. ot_tools/_convert/pdf.py +254 -254
  67. ot_tools/diagram.yaml +167 -167
  68. ot_tools/scaffold.py +2 -2
  69. ot_tools/transform.py +124 -19
  70. ot_tools/web_fetch.py +94 -43
  71. onetool_mcp-1.0.0b1.dist-info/METADATA +0 -163
  72. onetool_mcp-1.0.0b1.dist-info/RECORD +0 -132
  73. ot/config/defaults/bench.yaml +0 -4
  74. ot/config/defaults/onetool.yaml +0 -25
  75. ot/config/defaults/servers.yaml +0 -7
  76. ot/config/defaults/snippets.yaml +0 -4
  77. ot_tools/firecrawl.py +0 -732
  78. {onetool_mcp-1.0.0b1.dist-info → onetool_mcp-1.0.0rc2.dist-info}/WHEEL +0 -0
  79. {onetool_mcp-1.0.0b1.dist-info → onetool_mcp-1.0.0rc2.dist-info}/entry_points.txt +0 -0
  80. /ot/config/{defaults → global_templates}/tool_templates/extension.py +0 -0
  81. /ot/config/{defaults → global_templates}/tool_templates/isolated.py +0 -0
ot_tools/_convert/pdf.py CHANGED
@@ -1,254 +1,254 @@
1
- """PDF to Markdown converter.
2
-
3
- Converts PDF documents to Markdown with:
4
- - Lazy page loading via PyMuPDF
5
- - Outline-based heading extraction
6
- - Hash-based image naming for diff stability
7
- - YAML frontmatter and TOC generation
8
- """
9
-
10
- from __future__ import annotations
11
-
12
- import io
13
- from pathlib import Path # noqa: TC003 (used at runtime)
14
- from typing import TYPE_CHECKING, Any
15
-
16
- try:
17
- import fitz # type: ignore[import-untyped] # PyMuPDF
18
- except ImportError as e:
19
- raise ImportError(
20
- "pymupdf is required for convert. Install with: pip install pymupdf"
21
- ) from e
22
-
23
- from PIL import Image
24
-
25
- if TYPE_CHECKING:
26
- from PIL.Image import Image as PILImage
27
-
28
- from ot_tools._convert.utils import (
29
- IncrementalWriter,
30
- compute_file_checksum,
31
- compute_image_hash,
32
- get_mtime_iso,
33
- normalise_whitespace,
34
- write_toc_file,
35
- )
36
-
37
-
38
- def _merge_smask(image_bytes: bytes, sm_bytes: bytes) -> bytes:
39
- """Merge soft-mask into image for transparency.
40
-
41
- Args:
42
- image_bytes: Base image bytes
43
- sm_bytes: Soft-mask bytes
44
-
45
- Returns:
46
- PNG bytes with transparency
47
- """
48
- with (
49
- Image.open(io.BytesIO(image_bytes)) as im_file,
50
- Image.open(io.BytesIO(sm_bytes)) as mask_file,
51
- ):
52
- mask: PILImage = mask_file.convert("L")
53
- im: PILImage = im_file.convert("RGBA")
54
- if mask.size != im.size:
55
- mask = mask.resize(im.size)
56
- im.putalpha(mask)
57
- buf = io.BytesIO()
58
- im.save(buf, format="PNG")
59
- return buf.getvalue()
60
-
61
-
62
- def _detect_image_format(image_bytes: bytes) -> str:
63
- """Detect image format from bytes.
64
-
65
- Args:
66
- image_bytes: Image data
67
-
68
- Returns:
69
- File extension (e.g., 'png', 'jpg')
70
- """
71
- try:
72
- with Image.open(io.BytesIO(image_bytes)) as im:
73
- format_map = {
74
- "JPEG": "jpg",
75
- "PNG": "png",
76
- "GIF": "gif",
77
- "BMP": "bmp",
78
- "TIFF": "tiff",
79
- "WEBP": "webp",
80
- }
81
- return format_map.get(im.format or "", "png")
82
- except Exception:
83
- return "png"
84
-
85
-
86
- def _get_outline_headings(doc: fitz.Document) -> list[tuple[int, str, int]]:
87
- """Extract outline/bookmarks from PDF.
88
-
89
- Args:
90
- doc: PyMuPDF document
91
-
92
- Returns:
93
- List of (level, title, page_number) tuples
94
- """
95
- try:
96
- toc = doc.get_toc()
97
- return [(level, title, page) for level, title, page in toc]
98
- except Exception:
99
- return []
100
-
101
-
102
- def _extract_and_save_image(
103
- doc: fitz.Document,
104
- xref: int,
105
- images_dir: Path,
106
- writer: IncrementalWriter,
107
- ) -> bool:
108
- """Extract a single image and save to disk.
109
-
110
- This function encapsulates image processing so that memory (image_bytes)
111
- is freed when the function returns, preventing accumulation.
112
-
113
- Args:
114
- doc: PyMuPDF document
115
- xref: Image xref in the document
116
- images_dir: Directory for saving images
117
- writer: Incremental writer for markdown output
118
-
119
- Returns:
120
- True if image was successfully extracted, False otherwise
121
- """
122
- base_image = doc.extract_image(xref)
123
- image_bytes = base_image.get("image")
124
- smask = base_image.get("smask")
125
-
126
- if not image_bytes:
127
- return False
128
-
129
- # Handle soft-mask (transparency)
130
- if smask:
131
- try:
132
- sm_base = doc.extract_image(smask)
133
- sm_bytes = sm_base.get("image")
134
- if sm_bytes:
135
- image_bytes = _merge_smask(image_bytes, sm_bytes)
136
- extension = "png"
137
- else:
138
- extension = _detect_image_format(image_bytes)
139
- except Exception:
140
- extension = _detect_image_format(image_bytes)
141
- else:
142
- extension = _detect_image_format(image_bytes)
143
-
144
- # Hash-based naming for diff stability
145
- img_hash = compute_image_hash(image_bytes)
146
- img_name = f"img_{img_hash}.{extension}"
147
- img_path = images_dir / img_name
148
-
149
- # Only write if not already extracted (dedup by hash)
150
- if not img_path.exists():
151
- images_dir.mkdir(parents=True, exist_ok=True)
152
- img_path.write_bytes(image_bytes)
153
-
154
- rel_path = f"{images_dir.name}/{img_name}"
155
- writer.write(f"![{img_name}]({rel_path})\n\n")
156
-
157
- return True
158
-
159
-
160
- def convert_pdf(
161
- input_path: Path,
162
- output_dir: Path,
163
- source_rel: str,
164
- ) -> dict[str, Any]:
165
- """Convert PDF to Markdown.
166
-
167
- Args:
168
- input_path: Path to PDF file
169
- output_dir: Directory for output files
170
- source_rel: Relative path to source for frontmatter
171
-
172
- Returns:
173
- Dict with 'output', 'pages', 'images' keys
174
- """
175
- output_dir.mkdir(parents=True, exist_ok=True)
176
-
177
- doc = fitz.open(input_path)
178
- try:
179
- total_pages = len(doc)
180
-
181
- # Get metadata for frontmatter
182
- checksum = compute_file_checksum(input_path)
183
- mtime = get_mtime_iso(input_path)
184
-
185
- # Get outline for heading insertion
186
- outline = _get_outline_headings(doc)
187
- outline_by_page: dict[int, list[tuple[int, str]]] = {}
188
- for level, title, page in outline:
189
- if page not in outline_by_page:
190
- outline_by_page[page] = []
191
- outline_by_page[page].append((level, title))
192
-
193
- # Set up images directory
194
- images_dir = output_dir / f"{input_path.stem}_images"
195
- writer = IncrementalWriter()
196
- images_extracted = 0
197
-
198
- # Process pages with lazy loading
199
- for pageno in range(total_pages):
200
- page = doc[pageno]
201
- page_num = pageno + 1
202
-
203
- # Insert outline headings for this page
204
- if page_num in outline_by_page:
205
- for level, title in outline_by_page[page_num]:
206
- writer.write_heading(min(level, 6), title)
207
- elif not outline:
208
- # No outline - use page numbers as structure
209
- writer.write_heading(1, f"Page {page_num}")
210
-
211
- # Extract text
212
- text = page.get_text("text")
213
- if text.strip():
214
- writer.write(text.rstrip() + "\n\n")
215
-
216
- # Extract images - process one at a time to minimize memory
217
- image_list = page.get_images(full=True)
218
- for img in image_list:
219
- xref = img[0]
220
- try:
221
- result = _extract_and_save_image(
222
- doc, xref, images_dir, writer
223
- )
224
- if result:
225
- images_extracted += 1
226
- except Exception:
227
- # Skip failed image extraction
228
- continue
229
- finally:
230
- doc.close()
231
-
232
- # Write main output (pure content, no frontmatter - line numbers start at 1)
233
- content = normalise_whitespace(writer.get_content())
234
- output_path = output_dir / f"{input_path.stem}.md"
235
- output_path.write_text(content, encoding="utf-8")
236
-
237
- # Write separate TOC file (includes frontmatter)
238
- headings = writer.get_headings()
239
- toc_path = write_toc_file(
240
- headings=headings,
241
- output_dir=output_dir,
242
- stem=input_path.stem,
243
- source=source_rel,
244
- converted=mtime,
245
- pages=total_pages,
246
- checksum=checksum,
247
- )
248
-
249
- return {
250
- "output": str(output_path),
251
- "toc": str(toc_path),
252
- "pages": total_pages,
253
- "images": images_extracted,
254
- }
1
+ """PDF to Markdown converter.
2
+
3
+ Converts PDF documents to Markdown with:
4
+ - Lazy page loading via PyMuPDF
5
+ - Outline-based heading extraction
6
+ - Hash-based image naming for diff stability
7
+ - YAML frontmatter and TOC generation
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import io
13
+ from pathlib import Path # noqa: TC003 (used at runtime)
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ try:
17
+ import fitz # type: ignore[import-untyped] # PyMuPDF
18
+ except ImportError as e:
19
+ raise ImportError(
20
+ "pymupdf is required for convert. Install with: pip install pymupdf"
21
+ ) from e
22
+
23
+ from PIL import Image
24
+
25
+ if TYPE_CHECKING:
26
+ from PIL.Image import Image as PILImage
27
+
28
+ from ot_tools._convert.utils import (
29
+ IncrementalWriter,
30
+ compute_file_checksum,
31
+ compute_image_hash,
32
+ get_mtime_iso,
33
+ normalise_whitespace,
34
+ write_toc_file,
35
+ )
36
+
37
+
38
+ def _merge_smask(image_bytes: bytes, sm_bytes: bytes) -> bytes:
39
+ """Merge soft-mask into image for transparency.
40
+
41
+ Args:
42
+ image_bytes: Base image bytes
43
+ sm_bytes: Soft-mask bytes
44
+
45
+ Returns:
46
+ PNG bytes with transparency
47
+ """
48
+ with (
49
+ Image.open(io.BytesIO(image_bytes)) as im_file,
50
+ Image.open(io.BytesIO(sm_bytes)) as mask_file,
51
+ ):
52
+ mask: PILImage = mask_file.convert("L")
53
+ im: PILImage = im_file.convert("RGBA")
54
+ if mask.size != im.size:
55
+ mask = mask.resize(im.size)
56
+ im.putalpha(mask)
57
+ buf = io.BytesIO()
58
+ im.save(buf, format="PNG")
59
+ return buf.getvalue()
60
+
61
+
62
+ def _detect_image_format(image_bytes: bytes) -> str:
63
+ """Detect image format from bytes.
64
+
65
+ Args:
66
+ image_bytes: Image data
67
+
68
+ Returns:
69
+ File extension (e.g., 'png', 'jpg')
70
+ """
71
+ try:
72
+ with Image.open(io.BytesIO(image_bytes)) as im:
73
+ format_map = {
74
+ "JPEG": "jpg",
75
+ "PNG": "png",
76
+ "GIF": "gif",
77
+ "BMP": "bmp",
78
+ "TIFF": "tiff",
79
+ "WEBP": "webp",
80
+ }
81
+ return format_map.get(im.format or "", "png")
82
+ except Exception:
83
+ return "png"
84
+
85
+
86
+ def _get_outline_headings(doc: fitz.Document) -> list[tuple[int, str, int]]:
87
+ """Extract outline/bookmarks from PDF.
88
+
89
+ Args:
90
+ doc: PyMuPDF document
91
+
92
+ Returns:
93
+ List of (level, title, page_number) tuples
94
+ """
95
+ try:
96
+ toc = doc.get_toc()
97
+ return [(level, title, page) for level, title, page in toc]
98
+ except Exception:
99
+ return []
100
+
101
+
102
+ def _extract_and_save_image(
103
+ doc: fitz.Document,
104
+ xref: int,
105
+ images_dir: Path,
106
+ writer: IncrementalWriter,
107
+ ) -> bool:
108
+ """Extract a single image and save to disk.
109
+
110
+ This function encapsulates image processing so that memory (image_bytes)
111
+ is freed when the function returns, preventing accumulation.
112
+
113
+ Args:
114
+ doc: PyMuPDF document
115
+ xref: Image xref in the document
116
+ images_dir: Directory for saving images
117
+ writer: Incremental writer for markdown output
118
+
119
+ Returns:
120
+ True if image was successfully extracted, False otherwise
121
+ """
122
+ base_image = doc.extract_image(xref)
123
+ image_bytes = base_image.get("image")
124
+ smask = base_image.get("smask")
125
+
126
+ if not image_bytes:
127
+ return False
128
+
129
+ # Handle soft-mask (transparency)
130
+ if smask:
131
+ try:
132
+ sm_base = doc.extract_image(smask)
133
+ sm_bytes = sm_base.get("image")
134
+ if sm_bytes:
135
+ image_bytes = _merge_smask(image_bytes, sm_bytes)
136
+ extension = "png"
137
+ else:
138
+ extension = _detect_image_format(image_bytes)
139
+ except Exception:
140
+ extension = _detect_image_format(image_bytes)
141
+ else:
142
+ extension = _detect_image_format(image_bytes)
143
+
144
+ # Hash-based naming for diff stability
145
+ img_hash = compute_image_hash(image_bytes)
146
+ img_name = f"img_{img_hash}.{extension}"
147
+ img_path = images_dir / img_name
148
+
149
+ # Only write if not already extracted (dedup by hash)
150
+ if not img_path.exists():
151
+ images_dir.mkdir(parents=True, exist_ok=True)
152
+ img_path.write_bytes(image_bytes)
153
+
154
+ rel_path = f"{images_dir.name}/{img_name}"
155
+ writer.write(f"![{img_name}]({rel_path})\n\n")
156
+
157
+ return True
158
+
159
+
160
+ def convert_pdf(
161
+ input_path: Path,
162
+ output_dir: Path,
163
+ source_rel: str,
164
+ ) -> dict[str, Any]:
165
+ """Convert PDF to Markdown.
166
+
167
+ Args:
168
+ input_path: Path to PDF file
169
+ output_dir: Directory for output files
170
+ source_rel: Relative path to source for frontmatter
171
+
172
+ Returns:
173
+ Dict with 'output', 'pages', 'images' keys
174
+ """
175
+ output_dir.mkdir(parents=True, exist_ok=True)
176
+
177
+ doc = fitz.open(input_path)
178
+ try:
179
+ total_pages = len(doc)
180
+
181
+ # Get metadata for frontmatter
182
+ checksum = compute_file_checksum(input_path)
183
+ mtime = get_mtime_iso(input_path)
184
+
185
+ # Get outline for heading insertion
186
+ outline = _get_outline_headings(doc)
187
+ outline_by_page: dict[int, list[tuple[int, str]]] = {}
188
+ for level, title, page in outline:
189
+ if page not in outline_by_page:
190
+ outline_by_page[page] = []
191
+ outline_by_page[page].append((level, title))
192
+
193
+ # Set up images directory
194
+ images_dir = output_dir / f"{input_path.stem}_images"
195
+ writer = IncrementalWriter()
196
+ images_extracted = 0
197
+
198
+ # Process pages with lazy loading
199
+ for pageno in range(total_pages):
200
+ page = doc[pageno]
201
+ page_num = pageno + 1
202
+
203
+ # Insert outline headings for this page
204
+ if page_num in outline_by_page:
205
+ for level, title in outline_by_page[page_num]:
206
+ writer.write_heading(min(level, 6), title)
207
+ elif not outline:
208
+ # No outline - use page numbers as structure
209
+ writer.write_heading(1, f"Page {page_num}")
210
+
211
+ # Extract text
212
+ text = page.get_text("text")
213
+ if text.strip():
214
+ writer.write(text.rstrip() + "\n\n")
215
+
216
+ # Extract images - process one at a time to minimize memory
217
+ image_list = page.get_images(full=True)
218
+ for img in image_list:
219
+ xref = img[0]
220
+ try:
221
+ result = _extract_and_save_image(
222
+ doc, xref, images_dir, writer
223
+ )
224
+ if result:
225
+ images_extracted += 1
226
+ except Exception:
227
+ # Skip failed image extraction
228
+ continue
229
+ finally:
230
+ doc.close()
231
+
232
+ # Write main output (pure content, no frontmatter - line numbers start at 1)
233
+ content = normalise_whitespace(writer.get_content())
234
+ output_path = output_dir / f"{input_path.stem}.md"
235
+ output_path.write_text(content, encoding="utf-8")
236
+
237
+ # Write separate TOC file (includes frontmatter)
238
+ headings = writer.get_headings()
239
+ toc_path = write_toc_file(
240
+ headings=headings,
241
+ output_dir=output_dir,
242
+ stem=input_path.stem,
243
+ source=source_rel,
244
+ converted=mtime,
245
+ pages=total_pages,
246
+ checksum=checksum,
247
+ )
248
+
249
+ return {
250
+ "output": str(output_path),
251
+ "toc": str(toc_path),
252
+ "pages": total_pages,
253
+ "images": images_extracted,
254
+ }