onetool-mcp 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. bench/__init__.py +5 -0
  2. bench/cli.py +69 -0
  3. bench/harness/__init__.py +66 -0
  4. bench/harness/client.py +692 -0
  5. bench/harness/config.py +397 -0
  6. bench/harness/csv_writer.py +109 -0
  7. bench/harness/evaluate.py +512 -0
  8. bench/harness/metrics.py +283 -0
  9. bench/harness/runner.py +899 -0
  10. bench/py.typed +0 -0
  11. bench/reporter.py +629 -0
  12. bench/run.py +487 -0
  13. bench/secrets.py +101 -0
  14. bench/utils.py +16 -0
  15. onetool/__init__.py +4 -0
  16. onetool/cli.py +391 -0
  17. onetool/py.typed +0 -0
  18. onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
  19. onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
  20. onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
  21. onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
  22. onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
  23. onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
  24. ot/__init__.py +37 -0
  25. ot/__main__.py +6 -0
  26. ot/_cli.py +107 -0
  27. ot/_tui.py +53 -0
  28. ot/config/__init__.py +46 -0
  29. ot/config/defaults/bench.yaml +4 -0
  30. ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
  31. ot/config/defaults/diagram-templates/c4-context.puml +30 -0
  32. ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
  33. ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
  34. ot/config/defaults/diagram-templates/microservices.d2 +81 -0
  35. ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
  36. ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
  37. ot/config/defaults/onetool.yaml +25 -0
  38. ot/config/defaults/prompts.yaml +97 -0
  39. ot/config/defaults/servers.yaml +7 -0
  40. ot/config/defaults/snippets.yaml +4 -0
  41. ot/config/defaults/tool_templates/__init__.py +7 -0
  42. ot/config/defaults/tool_templates/extension.py +52 -0
  43. ot/config/defaults/tool_templates/isolated.py +61 -0
  44. ot/config/dynamic.py +121 -0
  45. ot/config/global_templates/__init__.py +2 -0
  46. ot/config/global_templates/bench-secrets-template.yaml +6 -0
  47. ot/config/global_templates/bench.yaml +9 -0
  48. ot/config/global_templates/onetool.yaml +27 -0
  49. ot/config/global_templates/secrets-template.yaml +44 -0
  50. ot/config/global_templates/servers.yaml +18 -0
  51. ot/config/global_templates/snippets.yaml +235 -0
  52. ot/config/loader.py +1087 -0
  53. ot/config/mcp.py +145 -0
  54. ot/config/secrets.py +190 -0
  55. ot/config/tool_config.py +125 -0
  56. ot/decorators.py +116 -0
  57. ot/executor/__init__.py +35 -0
  58. ot/executor/base.py +16 -0
  59. ot/executor/fence_processor.py +83 -0
  60. ot/executor/linter.py +142 -0
  61. ot/executor/pack_proxy.py +260 -0
  62. ot/executor/param_resolver.py +140 -0
  63. ot/executor/pep723.py +288 -0
  64. ot/executor/result_store.py +369 -0
  65. ot/executor/runner.py +496 -0
  66. ot/executor/simple.py +163 -0
  67. ot/executor/tool_loader.py +396 -0
  68. ot/executor/validator.py +398 -0
  69. ot/executor/worker_pool.py +388 -0
  70. ot/executor/worker_proxy.py +189 -0
  71. ot/http_client.py +145 -0
  72. ot/logging/__init__.py +37 -0
  73. ot/logging/config.py +315 -0
  74. ot/logging/entry.py +213 -0
  75. ot/logging/format.py +188 -0
  76. ot/logging/span.py +349 -0
  77. ot/meta.py +1555 -0
  78. ot/paths.py +453 -0
  79. ot/prompts.py +218 -0
  80. ot/proxy/__init__.py +21 -0
  81. ot/proxy/manager.py +396 -0
  82. ot/py.typed +0 -0
  83. ot/registry/__init__.py +189 -0
  84. ot/registry/models.py +57 -0
  85. ot/registry/parser.py +269 -0
  86. ot/registry/registry.py +413 -0
  87. ot/server.py +315 -0
  88. ot/shortcuts/__init__.py +15 -0
  89. ot/shortcuts/aliases.py +87 -0
  90. ot/shortcuts/snippets.py +258 -0
  91. ot/stats/__init__.py +35 -0
  92. ot/stats/html.py +250 -0
  93. ot/stats/jsonl_writer.py +283 -0
  94. ot/stats/reader.py +354 -0
  95. ot/stats/timing.py +57 -0
  96. ot/support.py +63 -0
  97. ot/tools.py +114 -0
  98. ot/utils/__init__.py +81 -0
  99. ot/utils/batch.py +161 -0
  100. ot/utils/cache.py +120 -0
  101. ot/utils/deps.py +403 -0
  102. ot/utils/exceptions.py +23 -0
  103. ot/utils/factory.py +179 -0
  104. ot/utils/format.py +65 -0
  105. ot/utils/http.py +202 -0
  106. ot/utils/platform.py +45 -0
  107. ot/utils/sanitize.py +130 -0
  108. ot/utils/truncate.py +69 -0
  109. ot_tools/__init__.py +4 -0
  110. ot_tools/_convert/__init__.py +12 -0
  111. ot_tools/_convert/excel.py +279 -0
  112. ot_tools/_convert/pdf.py +254 -0
  113. ot_tools/_convert/powerpoint.py +268 -0
  114. ot_tools/_convert/utils.py +358 -0
  115. ot_tools/_convert/word.py +283 -0
  116. ot_tools/brave_search.py +604 -0
  117. ot_tools/code_search.py +736 -0
  118. ot_tools/context7.py +495 -0
  119. ot_tools/convert.py +614 -0
  120. ot_tools/db.py +415 -0
  121. ot_tools/diagram.py +1604 -0
  122. ot_tools/diagram.yaml +167 -0
  123. ot_tools/excel.py +1372 -0
  124. ot_tools/file.py +1348 -0
  125. ot_tools/firecrawl.py +732 -0
  126. ot_tools/grounding_search.py +646 -0
  127. ot_tools/package.py +604 -0
  128. ot_tools/py.typed +0 -0
  129. ot_tools/ripgrep.py +544 -0
  130. ot_tools/scaffold.py +471 -0
  131. ot_tools/transform.py +213 -0
  132. ot_tools/web_fetch.py +384 -0
@@ -0,0 +1,283 @@
1
+ """Word document to Markdown converter.
2
+
3
+ Converts DOCX documents to Markdown with:
4
+ - Heading style detection (Heading 1-6)
5
+ - Table conversion
6
+ - Hash-based image naming for diff stability
7
+ - YAML frontmatter and TOC generation
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ from pathlib import Path # noqa: TC003 (used at runtime)
13
+ from typing import TYPE_CHECKING, Any
14
+
15
+ try:
16
+ from docx import Document
17
+ from docx.oxml.table import CT_Tbl
18
+ from docx.oxml.text.paragraph import CT_P
19
+ from docx.table import Table
20
+ from docx.text.paragraph import Paragraph
21
+ except ImportError as e:
22
+ raise ImportError(
23
+ "python-docx is required for convert. Install with: pip install python-docx"
24
+ ) from e
25
+
26
+ from ot_tools._convert.utils import (
27
+ IncrementalWriter,
28
+ compute_file_checksum,
29
+ get_mtime_iso,
30
+ normalise_whitespace,
31
+ save_image,
32
+ write_toc_file,
33
+ )
34
+
35
+ if TYPE_CHECKING:
36
+ from docx.document import Document as DocumentType
37
+
38
+
39
+ def convert_word(
40
+ input_path: Path,
41
+ output_dir: Path,
42
+ source_rel: str,
43
+ ) -> dict[str, Any]:
44
+ """Convert Word document to Markdown.
45
+
46
+ Args:
47
+ input_path: Path to DOCX file
48
+ output_dir: Directory for output files
49
+ source_rel: Relative path to source for frontmatter
50
+
51
+ Returns:
52
+ Dict with 'output', 'paragraphs', 'tables', 'images' keys
53
+ """
54
+ output_dir.mkdir(parents=True, exist_ok=True)
55
+
56
+ doc: DocumentType = Document(str(input_path))
57
+ try:
58
+ # Get metadata for frontmatter
59
+ checksum = compute_file_checksum(input_path)
60
+ mtime = get_mtime_iso(input_path)
61
+
62
+ # Count pages (approximate - Word doesn't store exact page count)
63
+ # Use paragraph count / 40 as rough estimate
64
+ # Note: This is stored as a string with "~" prefix to indicate estimate
65
+ page_count_estimate = max(1, len(doc.paragraphs) // 40)
66
+
67
+ # Set up images directory
68
+ images_dir = output_dir / f"{input_path.stem}_images"
69
+ writer = IncrementalWriter()
70
+ images_extracted = 0
71
+ paragraphs_processed = 0
72
+ tables_processed = 0
73
+ processed_image_rels: set[str] = set()
74
+
75
+ # Process document elements in order
76
+ for element in doc.element.body:
77
+ if isinstance(element, CT_P):
78
+ paragraph = Paragraph(element, doc)
79
+ _process_paragraph(
80
+ paragraph, writer, doc, images_dir, processed_image_rels
81
+ )
82
+ if paragraph.text.strip():
83
+ paragraphs_processed += 1
84
+ # Count images extracted during paragraph processing
85
+ images_extracted = len(processed_image_rels)
86
+
87
+ elif isinstance(element, CT_Tbl):
88
+ table = Table(element, doc)
89
+ _process_table(table, writer)
90
+ tables_processed += 1
91
+
92
+ # Extract remaining images not caught inline
93
+ for rel_id, rel in doc.part.rels.items():
94
+ if "image" in rel.target_ref and rel_id not in processed_image_rels:
95
+ try:
96
+ image_data = rel.target_part.blob
97
+ save_image(image_data, images_dir, rel.target_part.content_type)
98
+ images_extracted += 1
99
+ processed_image_rels.add(rel_id)
100
+ except Exception:
101
+ continue
102
+ finally:
103
+ # Ensure document resources are released
104
+ # python-docx Document doesn't have explicit close, but we can
105
+ # help garbage collection by clearing references
106
+ del doc
107
+
108
+ # Write main output (pure content, no frontmatter - line numbers start at 1)
109
+ content = normalise_whitespace(writer.get_content())
110
+ output_path = output_dir / f"{input_path.stem}.md"
111
+ output_path.write_text(content, encoding="utf-8")
112
+
113
+ # Write separate TOC file (includes frontmatter)
114
+ headings = writer.get_headings()
115
+ toc_path = write_toc_file(
116
+ headings=headings,
117
+ output_dir=output_dir,
118
+ stem=input_path.stem,
119
+ source=source_rel,
120
+ converted=mtime,
121
+ pages=f"~{page_count_estimate}", # ~ indicates estimated page count
122
+ checksum=checksum,
123
+ )
124
+
125
+ return {
126
+ "output": str(output_path),
127
+ "toc": str(toc_path),
128
+ "paragraphs": paragraphs_processed,
129
+ "tables": tables_processed,
130
+ "images": images_extracted,
131
+ }
132
+
133
+
134
+ def _process_paragraph(
135
+ paragraph: Paragraph,
136
+ writer: IncrementalWriter,
137
+ doc: Any,
138
+ images_dir: Path,
139
+ processed_rels: set[str],
140
+ ) -> None:
141
+ """Process a paragraph, handling headings, text, and images."""
142
+ text = paragraph.text.strip()
143
+ if not text:
144
+ return
145
+
146
+ # Get style
147
+ style_name = (
148
+ paragraph.style.name.lower() if paragraph.style and paragraph.style.name else ""
149
+ )
150
+
151
+ # Handle headings via style
152
+ if "heading" in style_name:
153
+ try:
154
+ level = int(style_name.split()[-1])
155
+ level = min(level, 6)
156
+ writer.write_heading(level, text)
157
+ return
158
+ except (ValueError, IndexError):
159
+ pass
160
+
161
+ # Handle special styles
162
+ if "title" in style_name:
163
+ writer.write_heading(1, text)
164
+ return
165
+ elif "subtitle" in style_name:
166
+ writer.write_heading(2, text)
167
+ return
168
+
169
+ # Process formatted text
170
+ formatted = _format_paragraph_runs(paragraph, doc, images_dir, processed_rels)
171
+
172
+ # Handle quote/block styles
173
+ if "quote" in style_name or "block" in style_name:
174
+ lines = formatted.split("\n")
175
+ formatted = "\n".join(f"> {line}" for line in lines)
176
+ writer.write(formatted + "\n\n")
177
+ return
178
+
179
+ # Handle list styles
180
+ if "list" in style_name:
181
+ writer.write(f"- {formatted}\n")
182
+ return
183
+
184
+ # Regular paragraph
185
+ writer.write(formatted + "\n\n")
186
+
187
+
188
+ def _format_paragraph_runs(
189
+ paragraph: Paragraph,
190
+ doc: Any,
191
+ images_dir: Path,
192
+ processed_rels: set[str],
193
+ ) -> str:
194
+ """Process runs within a paragraph for formatting and images."""
195
+ parts: list[str] = []
196
+
197
+ # Process inline images
198
+ try:
199
+ drawings = paragraph._element.xpath(".//w:drawing")
200
+ for drawing in drawings:
201
+ img_ref = _process_drawing(drawing, doc, images_dir, processed_rels)
202
+ if img_ref:
203
+ parts.append(img_ref)
204
+ except Exception:
205
+ pass
206
+
207
+ # Process text runs
208
+ for run in paragraph.runs:
209
+ text = run.text
210
+ if not text:
211
+ continue
212
+
213
+ # Apply formatting
214
+ if run.bold and run.italic:
215
+ text = f"***{text}***"
216
+ elif run.bold:
217
+ text = f"**{text}**"
218
+ elif run.italic:
219
+ text = f"*{text}*"
220
+
221
+ if run.underline:
222
+ text = f"<u>{text}</u>"
223
+
224
+ parts.append(text)
225
+
226
+ return "".join(parts)
227
+
228
+
229
+ def _process_drawing(
230
+ drawing_elem: Any,
231
+ doc: Any,
232
+ images_dir: Path,
233
+ processed_rels: set[str],
234
+ ) -> str:
235
+ """Extract image from drawing element."""
236
+ try:
237
+ blips = drawing_elem.xpath(".//a:blip")
238
+ for blip in blips:
239
+ r_embed = blip.get(
240
+ "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}embed"
241
+ )
242
+
243
+ if r_embed and r_embed not in processed_rels:
244
+ # Direct dictionary lookup instead of iteration - O(1) vs O(n)
245
+ rel = doc.part.rels.get(r_embed)
246
+ if rel is not None and "image" in rel.target_ref:
247
+ try:
248
+ image_data = rel.target_part.blob
249
+ processed_rels.add(r_embed)
250
+
251
+ img_path = save_image(
252
+ image_data, images_dir, rel.target_part.content_type
253
+ )
254
+ rel_path = f"{images_dir.name}/{img_path.name}"
255
+ return f"![{img_path.name}]({rel_path})"
256
+ except Exception:
257
+ return ""
258
+ except Exception:
259
+ pass
260
+ return ""
261
+
262
+
263
+ def _process_table(table: Table, writer: IncrementalWriter) -> None:
264
+ """Convert table to Markdown format."""
265
+ if not table.rows:
266
+ return
267
+
268
+ # Process header row
269
+ header_cells = [cell.text.strip() for cell in table.rows[0].cells]
270
+ if not header_cells:
271
+ return
272
+
273
+ writer.write("| " + " | ".join(header_cells) + " |\n")
274
+ writer.write("| " + " | ".join("---" for _ in header_cells) + " |\n")
275
+
276
+ # Process data rows
277
+ for row in table.rows[1:]:
278
+ cells = [cell.text.strip() for cell in row.cells]
279
+ while len(cells) < len(header_cells):
280
+ cells.append("")
281
+ writer.write("| " + " | ".join(cells[: len(header_cells)]) + " |\n")
282
+
283
+ writer.write("\n")