onetool-mcp 1.0.0b1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- bench/__init__.py +5 -0
- bench/cli.py +69 -0
- bench/harness/__init__.py +66 -0
- bench/harness/client.py +692 -0
- bench/harness/config.py +397 -0
- bench/harness/csv_writer.py +109 -0
- bench/harness/evaluate.py +512 -0
- bench/harness/metrics.py +283 -0
- bench/harness/runner.py +899 -0
- bench/py.typed +0 -0
- bench/reporter.py +629 -0
- bench/run.py +487 -0
- bench/secrets.py +101 -0
- bench/utils.py +16 -0
- onetool/__init__.py +4 -0
- onetool/cli.py +391 -0
- onetool/py.typed +0 -0
- onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
- onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
- onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
- onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
- onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
- onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
- ot/__init__.py +37 -0
- ot/__main__.py +6 -0
- ot/_cli.py +107 -0
- ot/_tui.py +53 -0
- ot/config/__init__.py +46 -0
- ot/config/defaults/bench.yaml +4 -0
- ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
- ot/config/defaults/diagram-templates/c4-context.puml +30 -0
- ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
- ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
- ot/config/defaults/diagram-templates/microservices.d2 +81 -0
- ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
- ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
- ot/config/defaults/onetool.yaml +25 -0
- ot/config/defaults/prompts.yaml +97 -0
- ot/config/defaults/servers.yaml +7 -0
- ot/config/defaults/snippets.yaml +4 -0
- ot/config/defaults/tool_templates/__init__.py +7 -0
- ot/config/defaults/tool_templates/extension.py +52 -0
- ot/config/defaults/tool_templates/isolated.py +61 -0
- ot/config/dynamic.py +121 -0
- ot/config/global_templates/__init__.py +2 -0
- ot/config/global_templates/bench-secrets-template.yaml +6 -0
- ot/config/global_templates/bench.yaml +9 -0
- ot/config/global_templates/onetool.yaml +27 -0
- ot/config/global_templates/secrets-template.yaml +44 -0
- ot/config/global_templates/servers.yaml +18 -0
- ot/config/global_templates/snippets.yaml +235 -0
- ot/config/loader.py +1087 -0
- ot/config/mcp.py +145 -0
- ot/config/secrets.py +190 -0
- ot/config/tool_config.py +125 -0
- ot/decorators.py +116 -0
- ot/executor/__init__.py +35 -0
- ot/executor/base.py +16 -0
- ot/executor/fence_processor.py +83 -0
- ot/executor/linter.py +142 -0
- ot/executor/pack_proxy.py +260 -0
- ot/executor/param_resolver.py +140 -0
- ot/executor/pep723.py +288 -0
- ot/executor/result_store.py +369 -0
- ot/executor/runner.py +496 -0
- ot/executor/simple.py +163 -0
- ot/executor/tool_loader.py +396 -0
- ot/executor/validator.py +398 -0
- ot/executor/worker_pool.py +388 -0
- ot/executor/worker_proxy.py +189 -0
- ot/http_client.py +145 -0
- ot/logging/__init__.py +37 -0
- ot/logging/config.py +315 -0
- ot/logging/entry.py +213 -0
- ot/logging/format.py +188 -0
- ot/logging/span.py +349 -0
- ot/meta.py +1555 -0
- ot/paths.py +453 -0
- ot/prompts.py +218 -0
- ot/proxy/__init__.py +21 -0
- ot/proxy/manager.py +396 -0
- ot/py.typed +0 -0
- ot/registry/__init__.py +189 -0
- ot/registry/models.py +57 -0
- ot/registry/parser.py +269 -0
- ot/registry/registry.py +413 -0
- ot/server.py +315 -0
- ot/shortcuts/__init__.py +15 -0
- ot/shortcuts/aliases.py +87 -0
- ot/shortcuts/snippets.py +258 -0
- ot/stats/__init__.py +35 -0
- ot/stats/html.py +250 -0
- ot/stats/jsonl_writer.py +283 -0
- ot/stats/reader.py +354 -0
- ot/stats/timing.py +57 -0
- ot/support.py +63 -0
- ot/tools.py +114 -0
- ot/utils/__init__.py +81 -0
- ot/utils/batch.py +161 -0
- ot/utils/cache.py +120 -0
- ot/utils/deps.py +403 -0
- ot/utils/exceptions.py +23 -0
- ot/utils/factory.py +179 -0
- ot/utils/format.py +65 -0
- ot/utils/http.py +202 -0
- ot/utils/platform.py +45 -0
- ot/utils/sanitize.py +130 -0
- ot/utils/truncate.py +69 -0
- ot_tools/__init__.py +4 -0
- ot_tools/_convert/__init__.py +12 -0
- ot_tools/_convert/excel.py +279 -0
- ot_tools/_convert/pdf.py +254 -0
- ot_tools/_convert/powerpoint.py +268 -0
- ot_tools/_convert/utils.py +358 -0
- ot_tools/_convert/word.py +283 -0
- ot_tools/brave_search.py +604 -0
- ot_tools/code_search.py +736 -0
- ot_tools/context7.py +495 -0
- ot_tools/convert.py +614 -0
- ot_tools/db.py +415 -0
- ot_tools/diagram.py +1604 -0
- ot_tools/diagram.yaml +167 -0
- ot_tools/excel.py +1372 -0
- ot_tools/file.py +1348 -0
- ot_tools/firecrawl.py +732 -0
- ot_tools/grounding_search.py +646 -0
- ot_tools/package.py +604 -0
- ot_tools/py.typed +0 -0
- ot_tools/ripgrep.py +544 -0
- ot_tools/scaffold.py +471 -0
- ot_tools/transform.py +213 -0
- ot_tools/web_fetch.py +384 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
"""Word document to Markdown converter.
|
|
2
|
+
|
|
3
|
+
Converts DOCX documents to Markdown with:
|
|
4
|
+
- Heading style detection (Heading 1-6)
|
|
5
|
+
- Table conversion
|
|
6
|
+
- Hash-based image naming for diff stability
|
|
7
|
+
- YAML frontmatter and TOC generation
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
from pathlib import Path # noqa: TC003 (used at runtime)
|
|
13
|
+
from typing import TYPE_CHECKING, Any
|
|
14
|
+
|
|
15
|
+
try:
|
|
16
|
+
from docx import Document
|
|
17
|
+
from docx.oxml.table import CT_Tbl
|
|
18
|
+
from docx.oxml.text.paragraph import CT_P
|
|
19
|
+
from docx.table import Table
|
|
20
|
+
from docx.text.paragraph import Paragraph
|
|
21
|
+
except ImportError as e:
|
|
22
|
+
raise ImportError(
|
|
23
|
+
"python-docx is required for convert. Install with: pip install python-docx"
|
|
24
|
+
) from e
|
|
25
|
+
|
|
26
|
+
from ot_tools._convert.utils import (
|
|
27
|
+
IncrementalWriter,
|
|
28
|
+
compute_file_checksum,
|
|
29
|
+
get_mtime_iso,
|
|
30
|
+
normalise_whitespace,
|
|
31
|
+
save_image,
|
|
32
|
+
write_toc_file,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
if TYPE_CHECKING:
|
|
36
|
+
from docx.document import Document as DocumentType
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def convert_word(
|
|
40
|
+
input_path: Path,
|
|
41
|
+
output_dir: Path,
|
|
42
|
+
source_rel: str,
|
|
43
|
+
) -> dict[str, Any]:
|
|
44
|
+
"""Convert Word document to Markdown.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
input_path: Path to DOCX file
|
|
48
|
+
output_dir: Directory for output files
|
|
49
|
+
source_rel: Relative path to source for frontmatter
|
|
50
|
+
|
|
51
|
+
Returns:
|
|
52
|
+
Dict with 'output', 'paragraphs', 'tables', 'images' keys
|
|
53
|
+
"""
|
|
54
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
doc: DocumentType = Document(str(input_path))
|
|
57
|
+
try:
|
|
58
|
+
# Get metadata for frontmatter
|
|
59
|
+
checksum = compute_file_checksum(input_path)
|
|
60
|
+
mtime = get_mtime_iso(input_path)
|
|
61
|
+
|
|
62
|
+
# Count pages (approximate - Word doesn't store exact page count)
|
|
63
|
+
# Use paragraph count / 40 as rough estimate
|
|
64
|
+
# Note: This is stored as a string with "~" prefix to indicate estimate
|
|
65
|
+
page_count_estimate = max(1, len(doc.paragraphs) // 40)
|
|
66
|
+
|
|
67
|
+
# Set up images directory
|
|
68
|
+
images_dir = output_dir / f"{input_path.stem}_images"
|
|
69
|
+
writer = IncrementalWriter()
|
|
70
|
+
images_extracted = 0
|
|
71
|
+
paragraphs_processed = 0
|
|
72
|
+
tables_processed = 0
|
|
73
|
+
processed_image_rels: set[str] = set()
|
|
74
|
+
|
|
75
|
+
# Process document elements in order
|
|
76
|
+
for element in doc.element.body:
|
|
77
|
+
if isinstance(element, CT_P):
|
|
78
|
+
paragraph = Paragraph(element, doc)
|
|
79
|
+
_process_paragraph(
|
|
80
|
+
paragraph, writer, doc, images_dir, processed_image_rels
|
|
81
|
+
)
|
|
82
|
+
if paragraph.text.strip():
|
|
83
|
+
paragraphs_processed += 1
|
|
84
|
+
# Count images extracted during paragraph processing
|
|
85
|
+
images_extracted = len(processed_image_rels)
|
|
86
|
+
|
|
87
|
+
elif isinstance(element, CT_Tbl):
|
|
88
|
+
table = Table(element, doc)
|
|
89
|
+
_process_table(table, writer)
|
|
90
|
+
tables_processed += 1
|
|
91
|
+
|
|
92
|
+
# Extract remaining images not caught inline
|
|
93
|
+
for rel_id, rel in doc.part.rels.items():
|
|
94
|
+
if "image" in rel.target_ref and rel_id not in processed_image_rels:
|
|
95
|
+
try:
|
|
96
|
+
image_data = rel.target_part.blob
|
|
97
|
+
save_image(image_data, images_dir, rel.target_part.content_type)
|
|
98
|
+
images_extracted += 1
|
|
99
|
+
processed_image_rels.add(rel_id)
|
|
100
|
+
except Exception:
|
|
101
|
+
continue
|
|
102
|
+
finally:
|
|
103
|
+
# Ensure document resources are released
|
|
104
|
+
# python-docx Document doesn't have explicit close, but we can
|
|
105
|
+
# help garbage collection by clearing references
|
|
106
|
+
del doc
|
|
107
|
+
|
|
108
|
+
# Write main output (pure content, no frontmatter - line numbers start at 1)
|
|
109
|
+
content = normalise_whitespace(writer.get_content())
|
|
110
|
+
output_path = output_dir / f"{input_path.stem}.md"
|
|
111
|
+
output_path.write_text(content, encoding="utf-8")
|
|
112
|
+
|
|
113
|
+
# Write separate TOC file (includes frontmatter)
|
|
114
|
+
headings = writer.get_headings()
|
|
115
|
+
toc_path = write_toc_file(
|
|
116
|
+
headings=headings,
|
|
117
|
+
output_dir=output_dir,
|
|
118
|
+
stem=input_path.stem,
|
|
119
|
+
source=source_rel,
|
|
120
|
+
converted=mtime,
|
|
121
|
+
pages=f"~{page_count_estimate}", # ~ indicates estimated page count
|
|
122
|
+
checksum=checksum,
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
return {
|
|
126
|
+
"output": str(output_path),
|
|
127
|
+
"toc": str(toc_path),
|
|
128
|
+
"paragraphs": paragraphs_processed,
|
|
129
|
+
"tables": tables_processed,
|
|
130
|
+
"images": images_extracted,
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def _process_paragraph(
|
|
135
|
+
paragraph: Paragraph,
|
|
136
|
+
writer: IncrementalWriter,
|
|
137
|
+
doc: Any,
|
|
138
|
+
images_dir: Path,
|
|
139
|
+
processed_rels: set[str],
|
|
140
|
+
) -> None:
|
|
141
|
+
"""Process a paragraph, handling headings, text, and images."""
|
|
142
|
+
text = paragraph.text.strip()
|
|
143
|
+
if not text:
|
|
144
|
+
return
|
|
145
|
+
|
|
146
|
+
# Get style
|
|
147
|
+
style_name = (
|
|
148
|
+
paragraph.style.name.lower() if paragraph.style and paragraph.style.name else ""
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Handle headings via style
|
|
152
|
+
if "heading" in style_name:
|
|
153
|
+
try:
|
|
154
|
+
level = int(style_name.split()[-1])
|
|
155
|
+
level = min(level, 6)
|
|
156
|
+
writer.write_heading(level, text)
|
|
157
|
+
return
|
|
158
|
+
except (ValueError, IndexError):
|
|
159
|
+
pass
|
|
160
|
+
|
|
161
|
+
# Handle special styles
|
|
162
|
+
if "title" in style_name:
|
|
163
|
+
writer.write_heading(1, text)
|
|
164
|
+
return
|
|
165
|
+
elif "subtitle" in style_name:
|
|
166
|
+
writer.write_heading(2, text)
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
# Process formatted text
|
|
170
|
+
formatted = _format_paragraph_runs(paragraph, doc, images_dir, processed_rels)
|
|
171
|
+
|
|
172
|
+
# Handle quote/block styles
|
|
173
|
+
if "quote" in style_name or "block" in style_name:
|
|
174
|
+
lines = formatted.split("\n")
|
|
175
|
+
formatted = "\n".join(f"> {line}" for line in lines)
|
|
176
|
+
writer.write(formatted + "\n\n")
|
|
177
|
+
return
|
|
178
|
+
|
|
179
|
+
# Handle list styles
|
|
180
|
+
if "list" in style_name:
|
|
181
|
+
writer.write(f"- {formatted}\n")
|
|
182
|
+
return
|
|
183
|
+
|
|
184
|
+
# Regular paragraph
|
|
185
|
+
writer.write(formatted + "\n\n")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _format_paragraph_runs(
|
|
189
|
+
paragraph: Paragraph,
|
|
190
|
+
doc: Any,
|
|
191
|
+
images_dir: Path,
|
|
192
|
+
processed_rels: set[str],
|
|
193
|
+
) -> str:
|
|
194
|
+
"""Process runs within a paragraph for formatting and images."""
|
|
195
|
+
parts: list[str] = []
|
|
196
|
+
|
|
197
|
+
# Process inline images
|
|
198
|
+
try:
|
|
199
|
+
drawings = paragraph._element.xpath(".//w:drawing")
|
|
200
|
+
for drawing in drawings:
|
|
201
|
+
img_ref = _process_drawing(drawing, doc, images_dir, processed_rels)
|
|
202
|
+
if img_ref:
|
|
203
|
+
parts.append(img_ref)
|
|
204
|
+
except Exception:
|
|
205
|
+
pass
|
|
206
|
+
|
|
207
|
+
# Process text runs
|
|
208
|
+
for run in paragraph.runs:
|
|
209
|
+
text = run.text
|
|
210
|
+
if not text:
|
|
211
|
+
continue
|
|
212
|
+
|
|
213
|
+
# Apply formatting
|
|
214
|
+
if run.bold and run.italic:
|
|
215
|
+
text = f"***{text}***"
|
|
216
|
+
elif run.bold:
|
|
217
|
+
text = f"**{text}**"
|
|
218
|
+
elif run.italic:
|
|
219
|
+
text = f"*{text}*"
|
|
220
|
+
|
|
221
|
+
if run.underline:
|
|
222
|
+
text = f"<u>{text}</u>"
|
|
223
|
+
|
|
224
|
+
parts.append(text)
|
|
225
|
+
|
|
226
|
+
return "".join(parts)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def _process_drawing(
|
|
230
|
+
drawing_elem: Any,
|
|
231
|
+
doc: Any,
|
|
232
|
+
images_dir: Path,
|
|
233
|
+
processed_rels: set[str],
|
|
234
|
+
) -> str:
|
|
235
|
+
"""Extract image from drawing element."""
|
|
236
|
+
try:
|
|
237
|
+
blips = drawing_elem.xpath(".//a:blip")
|
|
238
|
+
for blip in blips:
|
|
239
|
+
r_embed = blip.get(
|
|
240
|
+
"{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
if r_embed and r_embed not in processed_rels:
|
|
244
|
+
# Direct dictionary lookup instead of iteration - O(1) vs O(n)
|
|
245
|
+
rel = doc.part.rels.get(r_embed)
|
|
246
|
+
if rel is not None and "image" in rel.target_ref:
|
|
247
|
+
try:
|
|
248
|
+
image_data = rel.target_part.blob
|
|
249
|
+
processed_rels.add(r_embed)
|
|
250
|
+
|
|
251
|
+
img_path = save_image(
|
|
252
|
+
image_data, images_dir, rel.target_part.content_type
|
|
253
|
+
)
|
|
254
|
+
rel_path = f"{images_dir.name}/{img_path.name}"
|
|
255
|
+
return f""
|
|
256
|
+
except Exception:
|
|
257
|
+
return ""
|
|
258
|
+
except Exception:
|
|
259
|
+
pass
|
|
260
|
+
return ""
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def _process_table(table: Table, writer: IncrementalWriter) -> None:
|
|
264
|
+
"""Convert table to Markdown format."""
|
|
265
|
+
if not table.rows:
|
|
266
|
+
return
|
|
267
|
+
|
|
268
|
+
# Process header row
|
|
269
|
+
header_cells = [cell.text.strip() for cell in table.rows[0].cells]
|
|
270
|
+
if not header_cells:
|
|
271
|
+
return
|
|
272
|
+
|
|
273
|
+
writer.write("| " + " | ".join(header_cells) + " |\n")
|
|
274
|
+
writer.write("| " + " | ".join("---" for _ in header_cells) + " |\n")
|
|
275
|
+
|
|
276
|
+
# Process data rows
|
|
277
|
+
for row in table.rows[1:]:
|
|
278
|
+
cells = [cell.text.strip() for cell in row.cells]
|
|
279
|
+
while len(cells) < len(header_cells):
|
|
280
|
+
cells.append("")
|
|
281
|
+
writer.write("| " + " | ".join(cells[: len(header_cells)]) + " |\n")
|
|
282
|
+
|
|
283
|
+
writer.write("\n")
|