onetool-mcp 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. bench/__init__.py +5 -0
  2. bench/cli.py +69 -0
  3. bench/harness/__init__.py +66 -0
  4. bench/harness/client.py +692 -0
  5. bench/harness/config.py +397 -0
  6. bench/harness/csv_writer.py +109 -0
  7. bench/harness/evaluate.py +512 -0
  8. bench/harness/metrics.py +283 -0
  9. bench/harness/runner.py +899 -0
  10. bench/py.typed +0 -0
  11. bench/reporter.py +629 -0
  12. bench/run.py +487 -0
  13. bench/secrets.py +101 -0
  14. bench/utils.py +16 -0
  15. onetool/__init__.py +4 -0
  16. onetool/cli.py +391 -0
  17. onetool/py.typed +0 -0
  18. onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
  19. onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
  20. onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
  21. onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
  22. onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
  23. onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
  24. ot/__init__.py +37 -0
  25. ot/__main__.py +6 -0
  26. ot/_cli.py +107 -0
  27. ot/_tui.py +53 -0
  28. ot/config/__init__.py +46 -0
  29. ot/config/defaults/bench.yaml +4 -0
  30. ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
  31. ot/config/defaults/diagram-templates/c4-context.puml +30 -0
  32. ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
  33. ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
  34. ot/config/defaults/diagram-templates/microservices.d2 +81 -0
  35. ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
  36. ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
  37. ot/config/defaults/onetool.yaml +25 -0
  38. ot/config/defaults/prompts.yaml +97 -0
  39. ot/config/defaults/servers.yaml +7 -0
  40. ot/config/defaults/snippets.yaml +4 -0
  41. ot/config/defaults/tool_templates/__init__.py +7 -0
  42. ot/config/defaults/tool_templates/extension.py +52 -0
  43. ot/config/defaults/tool_templates/isolated.py +61 -0
  44. ot/config/dynamic.py +121 -0
  45. ot/config/global_templates/__init__.py +2 -0
  46. ot/config/global_templates/bench-secrets-template.yaml +6 -0
  47. ot/config/global_templates/bench.yaml +9 -0
  48. ot/config/global_templates/onetool.yaml +27 -0
  49. ot/config/global_templates/secrets-template.yaml +44 -0
  50. ot/config/global_templates/servers.yaml +18 -0
  51. ot/config/global_templates/snippets.yaml +235 -0
  52. ot/config/loader.py +1087 -0
  53. ot/config/mcp.py +145 -0
  54. ot/config/secrets.py +190 -0
  55. ot/config/tool_config.py +125 -0
  56. ot/decorators.py +116 -0
  57. ot/executor/__init__.py +35 -0
  58. ot/executor/base.py +16 -0
  59. ot/executor/fence_processor.py +83 -0
  60. ot/executor/linter.py +142 -0
  61. ot/executor/pack_proxy.py +260 -0
  62. ot/executor/param_resolver.py +140 -0
  63. ot/executor/pep723.py +288 -0
  64. ot/executor/result_store.py +369 -0
  65. ot/executor/runner.py +496 -0
  66. ot/executor/simple.py +163 -0
  67. ot/executor/tool_loader.py +396 -0
  68. ot/executor/validator.py +398 -0
  69. ot/executor/worker_pool.py +388 -0
  70. ot/executor/worker_proxy.py +189 -0
  71. ot/http_client.py +145 -0
  72. ot/logging/__init__.py +37 -0
  73. ot/logging/config.py +315 -0
  74. ot/logging/entry.py +213 -0
  75. ot/logging/format.py +188 -0
  76. ot/logging/span.py +349 -0
  77. ot/meta.py +1555 -0
  78. ot/paths.py +453 -0
  79. ot/prompts.py +218 -0
  80. ot/proxy/__init__.py +21 -0
  81. ot/proxy/manager.py +396 -0
  82. ot/py.typed +0 -0
  83. ot/registry/__init__.py +189 -0
  84. ot/registry/models.py +57 -0
  85. ot/registry/parser.py +269 -0
  86. ot/registry/registry.py +413 -0
  87. ot/server.py +315 -0
  88. ot/shortcuts/__init__.py +15 -0
  89. ot/shortcuts/aliases.py +87 -0
  90. ot/shortcuts/snippets.py +258 -0
  91. ot/stats/__init__.py +35 -0
  92. ot/stats/html.py +250 -0
  93. ot/stats/jsonl_writer.py +283 -0
  94. ot/stats/reader.py +354 -0
  95. ot/stats/timing.py +57 -0
  96. ot/support.py +63 -0
  97. ot/tools.py +114 -0
  98. ot/utils/__init__.py +81 -0
  99. ot/utils/batch.py +161 -0
  100. ot/utils/cache.py +120 -0
  101. ot/utils/deps.py +403 -0
  102. ot/utils/exceptions.py +23 -0
  103. ot/utils/factory.py +179 -0
  104. ot/utils/format.py +65 -0
  105. ot/utils/http.py +202 -0
  106. ot/utils/platform.py +45 -0
  107. ot/utils/sanitize.py +130 -0
  108. ot/utils/truncate.py +69 -0
  109. ot_tools/__init__.py +4 -0
  110. ot_tools/_convert/__init__.py +12 -0
  111. ot_tools/_convert/excel.py +279 -0
  112. ot_tools/_convert/pdf.py +254 -0
  113. ot_tools/_convert/powerpoint.py +268 -0
  114. ot_tools/_convert/utils.py +358 -0
  115. ot_tools/_convert/word.py +283 -0
  116. ot_tools/brave_search.py +604 -0
  117. ot_tools/code_search.py +736 -0
  118. ot_tools/context7.py +495 -0
  119. ot_tools/convert.py +614 -0
  120. ot_tools/db.py +415 -0
  121. ot_tools/diagram.py +1604 -0
  122. ot_tools/diagram.yaml +167 -0
  123. ot_tools/excel.py +1372 -0
  124. ot_tools/file.py +1348 -0
  125. ot_tools/firecrawl.py +732 -0
  126. ot_tools/grounding_search.py +646 -0
  127. ot_tools/package.py +604 -0
  128. ot_tools/py.typed +0 -0
  129. ot_tools/ripgrep.py +544 -0
  130. ot_tools/scaffold.py +471 -0
  131. ot_tools/transform.py +213 -0
  132. ot_tools/web_fetch.py +384 -0
ot_tools/web_fetch.py ADDED
@@ -0,0 +1,384 @@
1
+ """Web content extraction tools using trafilatura.
2
+
3
+ Provides web page fetching with high-quality content extraction,
4
+ supporting single and batch URL processing with configurable output formats.
5
+
6
+ Reference: https://github.com/adbar/trafilatura
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import json
12
+ from urllib.parse import urlparse
13
+
14
+ # Pack for dot notation: web.fetch(), web.fetch_batch()
15
+ pack = "web"
16
+
17
+ __all__ = ["fetch", "fetch_batch"]
18
+
19
+ from typing import Any, Literal
20
+
21
+ import trafilatura
22
+ from pydantic import BaseModel, Field
23
+ from trafilatura.settings import use_config
24
+
25
+ from ot.config import get_tool_config
26
+ from ot.logging import LogSpan
27
+ from ot.utils import (
28
+ batch_execute,
29
+ cache,
30
+ format_batch_results,
31
+ normalize_items,
32
+ truncate,
33
+ )
34
+
35
+
36
+ class Config(BaseModel):
37
+ """Pack configuration - discovered by registry."""
38
+
39
+ timeout: float = Field(
40
+ default=30.0,
41
+ ge=1.0,
42
+ le=120.0,
43
+ description="Request timeout in seconds",
44
+ )
45
+ max_length: int = Field(
46
+ default=50000,
47
+ ge=1000,
48
+ le=500000,
49
+ description="Maximum content length in characters",
50
+ )
51
+
52
+
53
+ def _get_config() -> Config:
54
+ """Get web pack configuration."""
55
+ return get_tool_config("web", Config)
56
+
57
+
58
+ def _create_config(timeout: float) -> Any:
59
+ """Create trafilatura config with custom settings."""
60
+ config = use_config()
61
+ config.set("DEFAULT", "DOWNLOAD_TIMEOUT", str(int(timeout)))
62
+ return config
63
+
64
+
65
+ def _validate_url(url: str) -> None:
66
+ """Validate URL format.
67
+
68
+ Args:
69
+ url: The URL to validate
70
+
71
+ Raises:
72
+ ValueError: If URL is empty or malformed
73
+ """
74
+ if not url or not url.strip():
75
+ raise ValueError("URL cannot be empty")
76
+ parsed = urlparse(url)
77
+ if not parsed.scheme or not parsed.netloc:
78
+ raise ValueError(f"Invalid URL format: {url}")
79
+
80
+
81
+ def _validate_options(favor_precision: bool, favor_recall: bool) -> None:
82
+ """Validate mutually exclusive options.
83
+
84
+ Args:
85
+ favor_precision: Whether precision is favored
86
+ favor_recall: Whether recall is favored
87
+
88
+ Raises:
89
+ ValueError: If both options are True
90
+ """
91
+ if favor_precision and favor_recall:
92
+ raise ValueError(
93
+ "Cannot set both favor_precision and favor_recall to True. "
94
+ "Choose one extraction mode: precision (less text, more accurate) "
95
+ "or recall (more text, may include noise)."
96
+ )
97
+
98
+
99
+ def _format_error(
100
+ url: str,
101
+ error: str,
102
+ message: str,
103
+ output_format: str,
104
+ ) -> str:
105
+ """Format error message, using JSON structure when appropriate.
106
+
107
+ Args:
108
+ url: The URL that failed
109
+ error: Error type identifier
110
+ message: Human-readable error message
111
+ output_format: The requested output format
112
+
113
+ Returns:
114
+ Formatted error string (JSON if output_format is "json")
115
+ """
116
+ if output_format == "json":
117
+ return json.dumps({"error": error, "url": url, "message": message})
118
+ return f"Error: {message}"
119
+
120
+
121
+ @cache(ttl=300) # Cache fetched pages for 5 minutes
122
+ def _fetch_url_cached(url: str, timeout: float) -> str | None:
123
+ """Fetch URL with caching to avoid redundant requests."""
124
+ with LogSpan(span="web.download", url=url, timeout=timeout) as span:
125
+ config = _create_config(timeout)
126
+ result = trafilatura.fetch_url(url, config=config)
127
+ span.add(success=result is not None)
128
+ if result:
129
+ span.add(responseLen=len(result))
130
+ return result
131
+
132
+
133
+ def fetch(
134
+ *,
135
+ url: str,
136
+ output_format: Literal["text", "markdown", "json"] = "markdown",
137
+ include_links: bool = False,
138
+ include_images: bool = False,
139
+ include_tables: bool = True,
140
+ include_comments: bool = False,
141
+ include_formatting: bool = True,
142
+ include_metadata: bool = False,
143
+ favor_precision: bool = False,
144
+ favor_recall: bool = False,
145
+ fast: bool = False,
146
+ target_language: str | None = None,
147
+ max_length: int | None = None,
148
+ timeout: float | None = None,
149
+ use_cache: bool = True,
150
+ ) -> str:
151
+ """Fetch and extract main content from a web page.
152
+
153
+ Uses trafilatura to extract the main content, filtering out navigation,
154
+ ads, and boilerplate. Returns clean text optimized for LLM consumption.
155
+
156
+ Args:
157
+ url: The URL to fetch
158
+ output_format: Output format - "text", "markdown" (default), or "json"
159
+ include_links: Include hyperlinks in output (default: False)
160
+ include_images: Include image references (default: False)
161
+ include_tables: Include table content (default: True)
162
+ include_comments: Include comments section (default: False)
163
+ include_formatting: Keep structural elements like headers, lists (default: True)
164
+ include_metadata: Include HTTP response metadata (status_code, final_url,
165
+ content_type) in JSON output (default: False, requires output_format="json")
166
+ favor_precision: Prefer precision over recall (default: False)
167
+ favor_recall: Prefer recall over precision (default: False)
168
+ fast: Skip fallback extraction for speed (default: False)
169
+ target_language: Filter by ISO 639-1 language code (e.g., "en")
170
+ max_length: Maximum output length in characters (defaults to config, 0 = unlimited)
171
+ timeout: Request timeout in seconds (defaults to config)
172
+ use_cache: Use cached pages if available (default: True)
173
+
174
+ Returns:
175
+ Extracted content in the specified format, or error message on failure
176
+
177
+ Raises:
178
+ ValueError: If URL is empty/malformed or if both favor_precision and
179
+ favor_recall are True
180
+
181
+ Example:
182
+ # Basic usage with defaults
183
+ content = web.fetch("https://docs.python.org/3/library/asyncio.html")
184
+
185
+ # Get plain text with faster extraction
186
+ content = web.fetch(url, output_format="text", fast=True)
187
+
188
+ # Include links for research
189
+ content = web.fetch(url, include_links=True)
190
+
191
+ # Get content with metadata
192
+ content = web.fetch(url, output_format="json", include_metadata=True)
193
+ """
194
+ # Validate inputs before starting the span
195
+ _validate_url(url)
196
+ _validate_options(favor_precision, favor_recall)
197
+
198
+ with LogSpan(span="web.fetch", url=url, output_format=output_format) as s:
199
+ try:
200
+ # Get config values
201
+ pack_config = _get_config()
202
+
203
+ if timeout is None:
204
+ timeout = pack_config.timeout
205
+ if max_length is None:
206
+ max_length = pack_config.max_length
207
+ config = _create_config(timeout)
208
+
209
+ # Fetch the page (with optional caching)
210
+ if use_cache:
211
+ downloaded = _fetch_url_cached(url, timeout)
212
+ else:
213
+ downloaded = trafilatura.fetch_url(url, config=config)
214
+
215
+ if downloaded is None:
216
+ s.add(error="fetch_failed")
217
+ return _format_error(
218
+ url, "fetch_failed", f"Failed to fetch URL: {url}", output_format
219
+ )
220
+
221
+ # Map output format to trafilatura format
222
+ trafilatura_format: str = output_format
223
+ if output_format == "text":
224
+ trafilatura_format = "txt"
225
+
226
+ # Extract content
227
+ result = trafilatura.extract(
228
+ downloaded,
229
+ url=url,
230
+ output_format=trafilatura_format,
231
+ include_links=include_links,
232
+ include_images=include_images,
233
+ include_tables=include_tables,
234
+ include_comments=include_comments,
235
+ include_formatting=include_formatting,
236
+ favor_precision=favor_precision,
237
+ favor_recall=favor_recall,
238
+ fast=fast,
239
+ target_language=target_language,
240
+ with_metadata=output_format == "json",
241
+ config=config,
242
+ )
243
+
244
+ if result is None:
245
+ s.add(error="no_content")
246
+ return _format_error(
247
+ url,
248
+ "no_content",
249
+ f"No content could be extracted from: {url}",
250
+ output_format,
251
+ )
252
+
253
+ # Wrap with metadata if requested (JSON only)
254
+ if include_metadata and output_format == "json":
255
+ try:
256
+ content_data = json.loads(result)
257
+ except json.JSONDecodeError:
258
+ content_data = result
259
+ result = json.dumps(
260
+ {
261
+ "content": content_data,
262
+ "metadata": {
263
+ "final_url": url,
264
+ "content_type": "text/html",
265
+ },
266
+ }
267
+ )
268
+
269
+ # Truncate if needed
270
+ if max_length > 0:
271
+ result = truncate(
272
+ result, max_length, indicator="\n\n[Content truncated...]"
273
+ )
274
+
275
+ s.add(contentLen=len(result), cached=use_cache)
276
+ return result
277
+
278
+ except TimeoutError:
279
+ s.add(error="timeout")
280
+ return _format_error(
281
+ url, "timeout", f"Timeout after {timeout}s fetching: {url}", output_format
282
+ )
283
+ except ConnectionError as e:
284
+ s.add(error="connection_failed")
285
+ return _format_error(
286
+ url, "connection_failed", f"Connection failed for {url}: {e}", output_format
287
+ )
288
+ except Exception as e:
289
+ s.add(error=str(e))
290
+ return _format_error(url, "error", f"Error fetching {url}: {e}", output_format)
291
+
292
+
293
+ def fetch_batch(
294
+ *,
295
+ urls: list[str] | list[tuple[str, str]],
296
+ output_format: Literal["text", "markdown", "json"] = "markdown",
297
+ include_links: bool = False,
298
+ include_images: bool = False,
299
+ include_tables: bool = True,
300
+ include_comments: bool = False,
301
+ include_formatting: bool = True,
302
+ favor_precision: bool = False,
303
+ favor_recall: bool = False,
304
+ fast: bool = False,
305
+ target_language: str | None = None,
306
+ max_length: int | None = None,
307
+ timeout: float | None = None,
308
+ use_cache: bool = True,
309
+ max_workers: int = 5,
310
+ ) -> str:
311
+ """Fetch multiple URLs concurrently and return concatenated results.
312
+
313
+ Fetches all URLs in parallel using threads, then concatenates the results
314
+ with clear section separators. Failed fetches include error messages.
315
+
316
+ Args:
317
+ urls: List of URLs to fetch. Each item can be:
318
+ - A string (URL used as both source and label)
319
+ - A tuple of (url, label) for custom section labels
320
+ output_format: Output format - "text", "markdown" (default), or "json"
321
+ include_links: Include hyperlinks in output (default: False)
322
+ include_images: Include image references (default: False)
323
+ include_tables: Include table content (default: True)
324
+ include_comments: Include comments section (default: False)
325
+ include_formatting: Keep structural elements like headers, lists (default: True)
326
+ favor_precision: Prefer precision over recall (default: False)
327
+ favor_recall: Prefer recall over precision (default: False)
328
+ fast: Skip fallback extraction for speed (default: False)
329
+ target_language: Filter by ISO 639-1 language code (e.g., "en")
330
+ max_length: Max length per URL in characters (defaults to config, 0 = unlimited)
331
+ timeout: Request timeout per URL in seconds (defaults to config)
332
+ use_cache: Use cached pages if available (default: True)
333
+ max_workers: Maximum concurrent fetches (default: 5)
334
+
335
+ Returns:
336
+ Concatenated content with section separators
337
+
338
+ Raises:
339
+ ValueError: If both favor_precision and favor_recall are True
340
+
341
+ Example:
342
+ # Simple list of URLs
343
+ content = web.fetch_batch([
344
+ "https://docs.python.org/3/library/asyncio.html",
345
+ "https://docs.python.org/3/library/threading.html",
346
+ ])
347
+
348
+ # With custom labels
349
+ content = web.fetch_batch([
350
+ ("https://fastapi.tiangolo.com/tutorial/", "FastAPI Tutorial"),
351
+ ("https://docs.pydantic.dev/latest/", "Pydantic Docs"),
352
+ ])
353
+ """
354
+ # Validate mutually exclusive options upfront
355
+ _validate_options(favor_precision, favor_recall)
356
+
357
+ normalized = normalize_items(urls)
358
+
359
+ with LogSpan(span="web.batch", urlCount=len(normalized), output_format=output_format) as s:
360
+
361
+ def _fetch_one(url: str, label: str) -> tuple[str, str]:
362
+ """Fetch a single URL and return (label, result)."""
363
+ result = fetch(
364
+ url=url,
365
+ output_format=output_format,
366
+ include_links=include_links,
367
+ include_images=include_images,
368
+ include_tables=include_tables,
369
+ include_comments=include_comments,
370
+ include_formatting=include_formatting,
371
+ favor_precision=favor_precision,
372
+ favor_recall=favor_recall,
373
+ fast=fast,
374
+ target_language=target_language,
375
+ max_length=max_length,
376
+ timeout=timeout,
377
+ use_cache=use_cache,
378
+ )
379
+ return label, result
380
+
381
+ results = batch_execute(_fetch_one, normalized, max_workers=max_workers)
382
+ output = format_batch_results(results, normalized)
383
+ s.add(outputLen=len(output))
384
+ return output