onetool-mcp 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. bench/__init__.py +5 -0
  2. bench/cli.py +69 -0
  3. bench/harness/__init__.py +66 -0
  4. bench/harness/client.py +692 -0
  5. bench/harness/config.py +397 -0
  6. bench/harness/csv_writer.py +109 -0
  7. bench/harness/evaluate.py +512 -0
  8. bench/harness/metrics.py +283 -0
  9. bench/harness/runner.py +899 -0
  10. bench/py.typed +0 -0
  11. bench/reporter.py +629 -0
  12. bench/run.py +487 -0
  13. bench/secrets.py +101 -0
  14. bench/utils.py +16 -0
  15. onetool/__init__.py +4 -0
  16. onetool/cli.py +391 -0
  17. onetool/py.typed +0 -0
  18. onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
  19. onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
  20. onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
  21. onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
  22. onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
  23. onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
  24. ot/__init__.py +37 -0
  25. ot/__main__.py +6 -0
  26. ot/_cli.py +107 -0
  27. ot/_tui.py +53 -0
  28. ot/config/__init__.py +46 -0
  29. ot/config/defaults/bench.yaml +4 -0
  30. ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
  31. ot/config/defaults/diagram-templates/c4-context.puml +30 -0
  32. ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
  33. ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
  34. ot/config/defaults/diagram-templates/microservices.d2 +81 -0
  35. ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
  36. ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
  37. ot/config/defaults/onetool.yaml +25 -0
  38. ot/config/defaults/prompts.yaml +97 -0
  39. ot/config/defaults/servers.yaml +7 -0
  40. ot/config/defaults/snippets.yaml +4 -0
  41. ot/config/defaults/tool_templates/__init__.py +7 -0
  42. ot/config/defaults/tool_templates/extension.py +52 -0
  43. ot/config/defaults/tool_templates/isolated.py +61 -0
  44. ot/config/dynamic.py +121 -0
  45. ot/config/global_templates/__init__.py +2 -0
  46. ot/config/global_templates/bench-secrets-template.yaml +6 -0
  47. ot/config/global_templates/bench.yaml +9 -0
  48. ot/config/global_templates/onetool.yaml +27 -0
  49. ot/config/global_templates/secrets-template.yaml +44 -0
  50. ot/config/global_templates/servers.yaml +18 -0
  51. ot/config/global_templates/snippets.yaml +235 -0
  52. ot/config/loader.py +1087 -0
  53. ot/config/mcp.py +145 -0
  54. ot/config/secrets.py +190 -0
  55. ot/config/tool_config.py +125 -0
  56. ot/decorators.py +116 -0
  57. ot/executor/__init__.py +35 -0
  58. ot/executor/base.py +16 -0
  59. ot/executor/fence_processor.py +83 -0
  60. ot/executor/linter.py +142 -0
  61. ot/executor/pack_proxy.py +260 -0
  62. ot/executor/param_resolver.py +140 -0
  63. ot/executor/pep723.py +288 -0
  64. ot/executor/result_store.py +369 -0
  65. ot/executor/runner.py +496 -0
  66. ot/executor/simple.py +163 -0
  67. ot/executor/tool_loader.py +396 -0
  68. ot/executor/validator.py +398 -0
  69. ot/executor/worker_pool.py +388 -0
  70. ot/executor/worker_proxy.py +189 -0
  71. ot/http_client.py +145 -0
  72. ot/logging/__init__.py +37 -0
  73. ot/logging/config.py +315 -0
  74. ot/logging/entry.py +213 -0
  75. ot/logging/format.py +188 -0
  76. ot/logging/span.py +349 -0
  77. ot/meta.py +1555 -0
  78. ot/paths.py +453 -0
  79. ot/prompts.py +218 -0
  80. ot/proxy/__init__.py +21 -0
  81. ot/proxy/manager.py +396 -0
  82. ot/py.typed +0 -0
  83. ot/registry/__init__.py +189 -0
  84. ot/registry/models.py +57 -0
  85. ot/registry/parser.py +269 -0
  86. ot/registry/registry.py +413 -0
  87. ot/server.py +315 -0
  88. ot/shortcuts/__init__.py +15 -0
  89. ot/shortcuts/aliases.py +87 -0
  90. ot/shortcuts/snippets.py +258 -0
  91. ot/stats/__init__.py +35 -0
  92. ot/stats/html.py +250 -0
  93. ot/stats/jsonl_writer.py +283 -0
  94. ot/stats/reader.py +354 -0
  95. ot/stats/timing.py +57 -0
  96. ot/support.py +63 -0
  97. ot/tools.py +114 -0
  98. ot/utils/__init__.py +81 -0
  99. ot/utils/batch.py +161 -0
  100. ot/utils/cache.py +120 -0
  101. ot/utils/deps.py +403 -0
  102. ot/utils/exceptions.py +23 -0
  103. ot/utils/factory.py +179 -0
  104. ot/utils/format.py +65 -0
  105. ot/utils/http.py +202 -0
  106. ot/utils/platform.py +45 -0
  107. ot/utils/sanitize.py +130 -0
  108. ot/utils/truncate.py +69 -0
  109. ot_tools/__init__.py +4 -0
  110. ot_tools/_convert/__init__.py +12 -0
  111. ot_tools/_convert/excel.py +279 -0
  112. ot_tools/_convert/pdf.py +254 -0
  113. ot_tools/_convert/powerpoint.py +268 -0
  114. ot_tools/_convert/utils.py +358 -0
  115. ot_tools/_convert/word.py +283 -0
  116. ot_tools/brave_search.py +604 -0
  117. ot_tools/code_search.py +736 -0
  118. ot_tools/context7.py +495 -0
  119. ot_tools/convert.py +614 -0
  120. ot_tools/db.py +415 -0
  121. ot_tools/diagram.py +1604 -0
  122. ot_tools/diagram.yaml +167 -0
  123. ot_tools/excel.py +1372 -0
  124. ot_tools/file.py +1348 -0
  125. ot_tools/firecrawl.py +732 -0
  126. ot_tools/grounding_search.py +646 -0
  127. ot_tools/package.py +604 -0
  128. ot_tools/py.typed +0 -0
  129. ot_tools/ripgrep.py +544 -0
  130. ot_tools/scaffold.py +471 -0
  131. ot_tools/transform.py +213 -0
  132. ot_tools/web_fetch.py +384 -0
bench/run.py ADDED
@@ -0,0 +1,487 @@
1
+ """Run command for running agent benchmarks with MCP servers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import glob
6
+ import os
7
+ from pathlib import Path
8
+
9
+ import questionary
10
+ import typer
11
+ import yaml
12
+ from pydantic import BaseModel, Field
13
+ from rich.console import Console
14
+
15
+ from bench.cli import app
16
+ from bench.harness.config import load_config
17
+ from bench.harness.csv_writer import write_results_csv
18
+ from bench.harness.runner import AgenticRunner
19
+ from bench.reporter import ConsoleReporter
20
+ from bench.utils import run_async
21
+ from ot._tui import ask_select
22
+ from ot.logging import LogSpan, configure_logging
23
+ from ot.paths import get_effective_cwd, get_global_dir
24
+ from ot.support import get_support_banner, get_version
25
+
26
+ # Exit codes
27
+ EXIT_SUCCESS = 0
28
+ EXIT_RUNTIME_ERROR = 1
29
+ EXIT_CONFIG_ERROR = 2
30
+ EXIT_FILE_NOT_FOUND = 3
31
+
32
+
33
+ def _print_startup_banner(console: Console) -> None:
34
+ """Print startup message."""
35
+ version = get_version()
36
+ console.print(f"[bold cyan]OneTool Benchmark[/bold cyan] [dim]v{version}[/dim]")
37
+ console.print(get_support_banner())
38
+ console.print()
39
+
40
+
41
+ class BenchFavorite(BaseModel):
42
+ """A favorite benchmark entry."""
43
+
44
+ name: str = Field(description="Display name in picker")
45
+ path: str = Field(description="File path or directory")
46
+
47
+
48
+ class BenchConfig(BaseModel):
49
+ """Configuration for bench CLI."""
50
+
51
+ favorites: list[BenchFavorite] = Field(
52
+ default_factory=list, description="Favorite benchmarks"
53
+ )
54
+
55
+
56
+ def load_bench_config(config_path: Path | str | None = None) -> BenchConfig:
57
+ """Load bench configuration from YAML file.
58
+
59
+ Resolution order (when config_path is None):
60
+ 1. BENCH_CONFIG env var
61
+ 2. cwd/.onetool/config/bench.yaml
62
+ 3. cwd/.onetool/bench.yaml
63
+ 4. ~/.onetool/bench.yaml
64
+ 5. Built-in defaults
65
+ """
66
+ if config_path is None:
67
+ # Check BENCH_CONFIG env var first
68
+ env_config = os.getenv("BENCH_CONFIG")
69
+ if env_config:
70
+ config_path = Path(env_config)
71
+ else:
72
+ cwd = get_effective_cwd()
73
+ # Try project config: cwd/.onetool/config/bench.yaml (preferred)
74
+ project_config = cwd / ".onetool" / "config" / "bench.yaml"
75
+ if project_config.exists():
76
+ config_path = project_config
77
+ else:
78
+ # Try legacy location: cwd/.onetool/bench.yaml
79
+ legacy_config = cwd / ".onetool" / "bench.yaml"
80
+ if legacy_config.exists():
81
+ config_path = legacy_config
82
+ else:
83
+ # Try global config: ~/.onetool/bench.yaml
84
+ global_config = get_global_dir() / "bench.yaml"
85
+ if global_config.exists():
86
+ config_path = global_config
87
+ else:
88
+ # No config found, use defaults
89
+ return BenchConfig()
90
+ else:
91
+ config_path = Path(config_path)
92
+
93
+ if not config_path.exists():
94
+ return BenchConfig()
95
+
96
+ with config_path.open() as f:
97
+ raw_data = yaml.safe_load(f) or {}
98
+
99
+ return BenchConfig.model_validate(raw_data)
100
+
101
+
102
+ def get_yaml_description(file_path: Path) -> str | None:
103
+ """Extract description field from a YAML benchmark file."""
104
+ try:
105
+ with file_path.open() as f:
106
+ data = yaml.safe_load(f)
107
+ if isinstance(data, dict):
108
+ return data.get("description")
109
+ except Exception:
110
+ pass
111
+ return None
112
+
113
+
114
+ def scan_yaml_files(directory: Path) -> list[Path]:
115
+ """Recursively scan directory for YAML files, excluding hidden directories."""
116
+ files = []
117
+ for path in directory.rglob("*"):
118
+ # Skip hidden directories
119
+ if any(part.startswith(".") for part in path.parts):
120
+ continue
121
+ if path.is_file() and path.suffix in (".yaml", ".yml"):
122
+ files.append(path)
123
+ return sorted(files)
124
+
125
+
126
+ def run_tui_picker(console: Console) -> list[Path] | None:
127
+ """Run interactive TUI for selecting benchmark file(s).
128
+
129
+ Returns:
130
+ List of paths when a glob pattern matches multiple files.
131
+ Single-item list for directory browsing or single file favorites.
132
+ None if user cancels.
133
+ """
134
+ import asyncio
135
+
136
+ bench_config = load_bench_config()
137
+
138
+ if not bench_config.favorites:
139
+ console.print("[dim]No favorites configured[/dim]")
140
+ console.print("[dim]Add favorites to .onetool/config/bench.yaml[/dim]")
141
+ return None
142
+
143
+ async def pick_favorite() -> list[Path] | None:
144
+ favorites = bench_config.favorites
145
+
146
+ while True:
147
+ # Build choices using indices to avoid questionary value issues
148
+ choices = [
149
+ questionary.Choice(fav.name, value=str(i))
150
+ for i, fav in enumerate(favorites)
151
+ ]
152
+ choices.append(
153
+ questionary.Choice("Exit", value="__exit__", shortcut_key="e")
154
+ )
155
+
156
+ selected = await ask_select("Select benchmark:", choices)
157
+ if not selected or selected == "__exit__" or not selected.isdigit():
158
+ return None
159
+
160
+ fav = favorites[int(selected)]
161
+ fav_path_str = fav.path
162
+
163
+ # Check if path contains glob characters
164
+ has_glob = any(c in fav_path_str for c in "*?[")
165
+
166
+ if has_glob:
167
+ # Expand glob pattern - return ALL matching files
168
+ yaml_files = expand_glob_patterns([fav_path_str])
169
+ if not yaml_files:
170
+ console.print(f"[dim]No files matched: {fav_path_str}[/dim]")
171
+ continue # Go back to favorites picker
172
+
173
+ # Return all matching files (don't prompt to pick one)
174
+ return yaml_files
175
+
176
+ fav_path = Path(fav_path_str)
177
+
178
+ # If it's a file, return it directly (as a list)
179
+ if fav_path.is_file():
180
+ return [fav_path]
181
+
182
+ # If it's a directory, scan for YAML files
183
+ if fav_path.is_dir():
184
+ yaml_files = scan_yaml_files(fav_path)
185
+ if not yaml_files:
186
+ console.print(f"[dim]No YAML files found in {fav_path}[/dim]")
187
+ continue # Go back to favorites picker
188
+
189
+ # Build choices using indices
190
+ file_choices = []
191
+ for i, f in enumerate(yaml_files):
192
+ rel_path = f.relative_to(fav_path)
193
+ desc = get_yaml_description(f)
194
+ label = f"{rel_path}" + (f" - {desc}" if desc else "")
195
+ file_choices.append(questionary.Choice(label, value=str(i)))
196
+ file_choices.append(
197
+ questionary.Choice("Back", value="__back__", shortcut_key="b")
198
+ )
199
+
200
+ file_selected = await ask_select("Select file:", file_choices)
201
+ if (
202
+ not file_selected
203
+ or file_selected == "__back__"
204
+ or not file_selected.isdigit()
205
+ ):
206
+ continue # Go back to favorites picker
207
+ return [yaml_files[int(file_selected)]]
208
+
209
+ console.print(f"[red]Path not found: {fav_path}[/red]")
210
+ continue # Go back to favorites picker
211
+
212
+ return asyncio.run(pick_favorite())
213
+
214
+
215
+ def expand_glob_patterns(patterns: list[str]) -> list[Path]:
216
+ """Expand glob patterns to list of files, preserving order."""
217
+ files: list[Path] = []
218
+ seen: set[Path] = set()
219
+ for pattern in patterns:
220
+ # Try glob expansion first
221
+ expanded = glob.glob(pattern) # noqa: PTH207 - glob.glob handles string patterns directly
222
+ if expanded:
223
+ for f in sorted(expanded):
224
+ path = Path(f)
225
+ if path.is_file() and path not in seen:
226
+ files.append(path)
227
+ seen.add(path)
228
+ else:
229
+ # No glob match, treat as literal path
230
+ path = Path(pattern)
231
+ if path not in seen:
232
+ files.append(path)
233
+ seen.add(path)
234
+ return files
235
+
236
+
237
+ def run_single_benchmark(
238
+ config_file: Path,
239
+ console: Console,
240
+ scenario: str | None,
241
+ task: str | None,
242
+ tag: list[str] | None,
243
+ dry_run: bool,
244
+ verbose: bool,
245
+ trace: bool,
246
+ no_color: bool,
247
+ ) -> tuple[list, bool]:
248
+ """Run a single benchmark file.
249
+
250
+ Returns:
251
+ Tuple of (results, success) where:
252
+ - results: List of ScenarioResult objects
253
+ - success: True if completed without runtime errors or interrupts.
254
+ Test evaluation failures (PASS/FAIL) don't affect this.
255
+ """
256
+ with LogSpan(span="bench.config.load", path=str(config_file)) as span:
257
+ try:
258
+ config = load_config(config_file)
259
+ span.add(scenarios=len(config.scenarios), servers=len(config.servers))
260
+ except FileNotFoundError as e:
261
+ span.add(error="file_not_found")
262
+ console.print(f"[red]Error:[/red] {e}")
263
+ return [], False
264
+ except Exception as e:
265
+ span.add(error=str(e))
266
+ console.print(f"[red]Configuration error:[/red] {e}")
267
+ return [], False
268
+
269
+ console.print(f"Loaded config: {config_file}")
270
+ console.print(f" Scenarios: {len(config.scenarios)}")
271
+ console.print(f" Servers: {list(config.servers.keys())}")
272
+
273
+ reporter = ConsoleReporter(
274
+ console=console,
275
+ config=config,
276
+ verbose=verbose,
277
+ trace=trace,
278
+ no_color=no_color,
279
+ )
280
+
281
+ runner = AgenticRunner(
282
+ config,
283
+ dry_run=dry_run,
284
+ verbose=verbose,
285
+ on_progress=reporter.on_event,
286
+ )
287
+
288
+ interrupted = False
289
+ try:
290
+ results = run_async(
291
+ runner.run_scenario(scenario_name=scenario, task_name=task, tags=tag)
292
+ )
293
+ except KeyboardInterrupt:
294
+ console.print("\n[yellow]Interrupted by user[/yellow]")
295
+ results = runner.partial_results
296
+ if results:
297
+ task_count = sum(len(s.tasks) for s in results)
298
+ console.print(f"[dim]Showing {task_count} completed task(s)[/dim]")
299
+ interrupted = True
300
+ except Exception as e:
301
+ console.print(f"[red]Runtime error:[/red] {e}")
302
+ return [], False
303
+
304
+ # Output results for this file (even if interrupted)
305
+ if results:
306
+ reporter.print_results_header()
307
+ for scenario_result in results:
308
+ reporter.print_results_table(scenario_result)
309
+ reporter.print_validation_errors()
310
+
311
+ return results or [], not interrupted
312
+
313
+
314
+ @app.command()
315
+ def run(
316
+ config_files: list[str] = typer.Argument(
317
+ None,
318
+ help="Path(s) to YAML config file(s). Supports glob patterns (e.g., *.yaml).",
319
+ ),
320
+ scenario: str | None = typer.Option(
321
+ None,
322
+ "--scenario",
323
+ "-s",
324
+ help="Run only scenarios matching this pattern (supports wildcards).",
325
+ ),
326
+ task: str | None = typer.Option(
327
+ None,
328
+ "--task",
329
+ "-t",
330
+ help="Run only tasks matching this pattern (supports wildcards: direct*, *:sha256:*).",
331
+ ),
332
+ tag: list[str] | None = typer.Option(
333
+ None,
334
+ "--tag",
335
+ help="Run only tasks with the specified tag(s). Can be specified multiple times.",
336
+ ),
337
+ output: Path | None = typer.Option(
338
+ None,
339
+ "--output",
340
+ "-o",
341
+ help="Path to write results YAML file.",
342
+ ),
343
+ dry_run: bool = typer.Option(
344
+ False,
345
+ "--dry-run",
346
+ help="Validate config without making API calls.",
347
+ ),
348
+ verbose: bool = typer.Option(
349
+ False,
350
+ "--verbose",
351
+ "-v",
352
+ help="Enable verbose output with full content.",
353
+ ),
354
+ trace: bool = typer.Option(
355
+ False,
356
+ "--trace",
357
+ help="Show timestamped request/response cycle for debugging timing.",
358
+ ),
359
+ no_color: bool = typer.Option(
360
+ False,
361
+ "--no-color",
362
+ help="Disable colored output (for CI/CD compatibility).",
363
+ ),
364
+ tui: bool = typer.Option(
365
+ False,
366
+ "--tui",
367
+ help="Interactive TUI mode for selecting from favorites.",
368
+ ),
369
+ csv: bool = typer.Option(
370
+ False,
371
+ "--csv",
372
+ help="Write results to CSV file with per-call metrics breakdown.",
373
+ ),
374
+ ) -> None:
375
+ """Run tasks (direct MCP calls or agent benchmarks).
376
+
377
+ Task types:
378
+ type: direct - Direct MCP tool invocation (no LLM)
379
+ type: harness - LLM benchmark with MCP servers (default)
380
+
381
+ Examples:
382
+ bench run config.yaml
383
+ bench run examples/bench/*.yaml
384
+ bench run file1.yaml file2.yaml
385
+ bench run config.yaml --scenario "Tool Tests"
386
+ bench run config.yaml --task "direct*"
387
+ bench run config.yaml --tag focus
388
+ bench run config.yaml --verbose --trace
389
+ bench run config.yaml --dry-run
390
+ bench run config.yaml --output results.yaml
391
+ bench run --tui
392
+ """
393
+ # Initialize console with no_color option and no auto-highlighting
394
+ console = Console(no_color=no_color, force_terminal=not no_color, highlight=False)
395
+
396
+ # Initialize logging inside command to avoid module-level side effects
397
+ configure_logging(log_name="bench")
398
+
399
+ # Print startup banner
400
+ _print_startup_banner(console)
401
+
402
+ # Handle TUI mode
403
+ if tui:
404
+ selected_files = run_tui_picker(console)
405
+ if not selected_files:
406
+ raise typer.Exit(EXIT_SUCCESS)
407
+ files_to_run = selected_files
408
+ elif not config_files:
409
+ console.print(
410
+ "[red]Error:[/red] Missing config file. Use --tui or provide a path."
411
+ )
412
+ raise typer.Exit(EXIT_CONFIG_ERROR)
413
+ else:
414
+ # Expand glob patterns
415
+ files_to_run = expand_glob_patterns(config_files)
416
+ if not files_to_run:
417
+ console.print("[red]Error:[/red] No files matched the provided pattern(s).")
418
+ raise typer.Exit(EXIT_FILE_NOT_FOUND)
419
+
420
+ # Validate all files exist
421
+ missing = [f for f in files_to_run if not f.exists()]
422
+ if missing:
423
+ for f in missing:
424
+ console.print(f"[red]Error:[/red] File not found: {f}")
425
+ raise typer.Exit(EXIT_FILE_NOT_FOUND)
426
+
427
+ if dry_run:
428
+ console.print("[yellow]Dry run mode - no API calls will be made[/yellow]")
429
+
430
+ if len(files_to_run) > 1:
431
+ console.print(f"[cyan]Running {len(files_to_run)} benchmark files[/cyan]\n")
432
+
433
+ # Run each benchmark file
434
+ # Note: runtime_error tracks exceptions/interrupts, NOT test evaluation failures.
435
+ # Test failures (PASS/FAIL) don't affect exit code - only runtime errors do.
436
+ all_results = []
437
+ runtime_error = False
438
+
439
+ for i, config_file in enumerate(files_to_run):
440
+ if len(files_to_run) > 1:
441
+ console.print(
442
+ f"\n[bold cyan]═══ File {i + 1}/{len(files_to_run)}: {config_file} ═══[/bold cyan]\n"
443
+ )
444
+
445
+ results, success = run_single_benchmark(
446
+ config_file=config_file,
447
+ console=console,
448
+ scenario=scenario,
449
+ task=task,
450
+ tag=tag,
451
+ dry_run=dry_run,
452
+ verbose=verbose,
453
+ trace=trace,
454
+ no_color=no_color,
455
+ )
456
+ all_results.extend(results)
457
+ if not success:
458
+ runtime_error = True
459
+
460
+ if not all_results:
461
+ console.print("[yellow]No results to report[/yellow]")
462
+ raise typer.Exit(EXIT_SUCCESS if not runtime_error else EXIT_RUNTIME_ERROR)
463
+
464
+ # Write aggregated results to file if specified
465
+ if output:
466
+ try:
467
+ output_data = {"results": [r.to_dict() for r in all_results]}
468
+ with output.open("w") as f:
469
+ yaml.dump(output_data, f, default_flow_style=False, sort_keys=False)
470
+ console.print(f"\nResults written to: {output}")
471
+ except OSError as e:
472
+ console.print(f"[red]Error writing results:[/red] {e}")
473
+ raise typer.Exit(EXIT_RUNTIME_ERROR) from e
474
+
475
+ # Write CSV with per-call metrics if requested
476
+ if csv:
477
+ try:
478
+ csv_path = write_results_csv(all_results)
479
+ console.print(f"CSV results written to: {csv_path}")
480
+ except OSError as e:
481
+ console.print(f"[red]Error writing CSV:[/red] {e}")
482
+ raise typer.Exit(EXIT_RUNTIME_ERROR) from e
483
+
484
+ # Exit 1 only for runtime errors (exceptions, config errors, interrupts)
485
+ # Test evaluation failures (PASS/FAIL) exit 0 - they're not runtime errors
486
+ if runtime_error:
487
+ raise typer.Exit(EXIT_RUNTIME_ERROR)
bench/secrets.py ADDED
@@ -0,0 +1,101 @@
1
+ """Secrets loading for bench.
2
+
3
+ Loads bench secrets from bench-secrets.yaml, separate from onetool secrets.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from pathlib import Path
9
+
10
+ import yaml
11
+
12
+ from ot.logging import LogSpan
13
+ from ot.paths import get_effective_cwd, get_global_dir
14
+
15
+ # Cached bench secrets
16
+ _bench_secrets: dict[str, str] | None = None
17
+
18
+
19
+ def _find_bench_secrets_file() -> Path | None:
20
+ """Find bench-secrets.yaml file.
21
+
22
+ Resolution order:
23
+ 1. .onetool/config/bench-secrets.yaml (project-level, preferred)
24
+ 2. .onetool/bench-secrets.yaml (project-level, legacy)
25
+ 3. ~/.onetool/bench-secrets.yaml (global)
26
+
27
+ Returns:
28
+ Path to bench-secrets.yaml if found, None otherwise
29
+ """
30
+ cwd = get_effective_cwd()
31
+
32
+ # Project-level (preferred location)
33
+ project_config_path = cwd / ".onetool" / "config" / "bench-secrets.yaml"
34
+ if project_config_path.exists():
35
+ return project_config_path
36
+
37
+ # Project-level (legacy location)
38
+ project_path = cwd / ".onetool" / "bench-secrets.yaml"
39
+ if project_path.exists():
40
+ return project_path
41
+
42
+ # Global
43
+ global_path = get_global_dir() / "bench-secrets.yaml"
44
+ if global_path.exists():
45
+ return global_path
46
+
47
+ return None
48
+
49
+
50
+ def load_bench_secrets() -> dict[str, str]:
51
+ """Load bench secrets from bench-secrets.yaml.
52
+
53
+ Returns:
54
+ Dictionary of secret name -> value
55
+ """
56
+ global _bench_secrets
57
+
58
+ if _bench_secrets is not None:
59
+ return _bench_secrets
60
+
61
+ secrets_path = _find_bench_secrets_file()
62
+
63
+ with LogSpan(
64
+ span="bench.secrets.load",
65
+ path=str(secrets_path) if secrets_path else "not_found",
66
+ ) as span:
67
+ if secrets_path is None:
68
+ span.add(error="bench-secrets.yaml not found")
69
+ _bench_secrets = {}
70
+ return _bench_secrets
71
+
72
+ try:
73
+ with secrets_path.open() as f:
74
+ raw_data = yaml.safe_load(f)
75
+ except (yaml.YAMLError, OSError) as e:
76
+ span.add(error=str(e))
77
+ _bench_secrets = {}
78
+ return _bench_secrets
79
+
80
+ if raw_data is None:
81
+ span.add(count=0)
82
+ _bench_secrets = {}
83
+ return _bench_secrets
84
+
85
+ # Convert all values to strings
86
+ _bench_secrets = {k: str(v) for k, v in raw_data.items() if v is not None}
87
+ span.add(count=len(_bench_secrets))
88
+ return _bench_secrets
89
+
90
+
91
+ def get_bench_secret(name: str) -> str:
92
+ """Get a bench secret by name.
93
+
94
+ Args:
95
+ name: Secret name (e.g., "OPENAI_API_KEY")
96
+
97
+ Returns:
98
+ Secret value, or empty string if not found
99
+ """
100
+ secrets = load_bench_secrets()
101
+ return secrets.get(name, "")
bench/utils.py ADDED
@@ -0,0 +1,16 @@
1
+ """Shared utilities for bench CLI commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from typing import TYPE_CHECKING, Any, TypeVar
7
+
8
+ if TYPE_CHECKING:
9
+ from collections.abc import Coroutine
10
+
11
+ T = TypeVar("T")
12
+
13
+
14
+ def run_async(coro: Coroutine[Any, Any, T]) -> T:
15
+ """Run an async coroutine synchronously."""
16
+ return asyncio.new_event_loop().run_until_complete(coro)
onetool/__init__.py ADDED
@@ -0,0 +1,4 @@
1
+ """OneTool MCP server CLI package.
2
+
3
+ Entry point for the `ot` command that runs the MCP server over stdio.
4
+ """