onetool-mcp 1.0.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. bench/__init__.py +5 -0
  2. bench/cli.py +69 -0
  3. bench/harness/__init__.py +66 -0
  4. bench/harness/client.py +692 -0
  5. bench/harness/config.py +397 -0
  6. bench/harness/csv_writer.py +109 -0
  7. bench/harness/evaluate.py +512 -0
  8. bench/harness/metrics.py +283 -0
  9. bench/harness/runner.py +899 -0
  10. bench/py.typed +0 -0
  11. bench/reporter.py +629 -0
  12. bench/run.py +487 -0
  13. bench/secrets.py +101 -0
  14. bench/utils.py +16 -0
  15. onetool/__init__.py +4 -0
  16. onetool/cli.py +391 -0
  17. onetool/py.typed +0 -0
  18. onetool_mcp-1.0.0b1.dist-info/METADATA +163 -0
  19. onetool_mcp-1.0.0b1.dist-info/RECORD +132 -0
  20. onetool_mcp-1.0.0b1.dist-info/WHEEL +4 -0
  21. onetool_mcp-1.0.0b1.dist-info/entry_points.txt +3 -0
  22. onetool_mcp-1.0.0b1.dist-info/licenses/LICENSE.txt +687 -0
  23. onetool_mcp-1.0.0b1.dist-info/licenses/NOTICE.txt +64 -0
  24. ot/__init__.py +37 -0
  25. ot/__main__.py +6 -0
  26. ot/_cli.py +107 -0
  27. ot/_tui.py +53 -0
  28. ot/config/__init__.py +46 -0
  29. ot/config/defaults/bench.yaml +4 -0
  30. ot/config/defaults/diagram-templates/api-flow.mmd +33 -0
  31. ot/config/defaults/diagram-templates/c4-context.puml +30 -0
  32. ot/config/defaults/diagram-templates/class-diagram.mmd +87 -0
  33. ot/config/defaults/diagram-templates/feature-mindmap.mmd +70 -0
  34. ot/config/defaults/diagram-templates/microservices.d2 +81 -0
  35. ot/config/defaults/diagram-templates/project-gantt.mmd +37 -0
  36. ot/config/defaults/diagram-templates/state-machine.mmd +42 -0
  37. ot/config/defaults/onetool.yaml +25 -0
  38. ot/config/defaults/prompts.yaml +97 -0
  39. ot/config/defaults/servers.yaml +7 -0
  40. ot/config/defaults/snippets.yaml +4 -0
  41. ot/config/defaults/tool_templates/__init__.py +7 -0
  42. ot/config/defaults/tool_templates/extension.py +52 -0
  43. ot/config/defaults/tool_templates/isolated.py +61 -0
  44. ot/config/dynamic.py +121 -0
  45. ot/config/global_templates/__init__.py +2 -0
  46. ot/config/global_templates/bench-secrets-template.yaml +6 -0
  47. ot/config/global_templates/bench.yaml +9 -0
  48. ot/config/global_templates/onetool.yaml +27 -0
  49. ot/config/global_templates/secrets-template.yaml +44 -0
  50. ot/config/global_templates/servers.yaml +18 -0
  51. ot/config/global_templates/snippets.yaml +235 -0
  52. ot/config/loader.py +1087 -0
  53. ot/config/mcp.py +145 -0
  54. ot/config/secrets.py +190 -0
  55. ot/config/tool_config.py +125 -0
  56. ot/decorators.py +116 -0
  57. ot/executor/__init__.py +35 -0
  58. ot/executor/base.py +16 -0
  59. ot/executor/fence_processor.py +83 -0
  60. ot/executor/linter.py +142 -0
  61. ot/executor/pack_proxy.py +260 -0
  62. ot/executor/param_resolver.py +140 -0
  63. ot/executor/pep723.py +288 -0
  64. ot/executor/result_store.py +369 -0
  65. ot/executor/runner.py +496 -0
  66. ot/executor/simple.py +163 -0
  67. ot/executor/tool_loader.py +396 -0
  68. ot/executor/validator.py +398 -0
  69. ot/executor/worker_pool.py +388 -0
  70. ot/executor/worker_proxy.py +189 -0
  71. ot/http_client.py +145 -0
  72. ot/logging/__init__.py +37 -0
  73. ot/logging/config.py +315 -0
  74. ot/logging/entry.py +213 -0
  75. ot/logging/format.py +188 -0
  76. ot/logging/span.py +349 -0
  77. ot/meta.py +1555 -0
  78. ot/paths.py +453 -0
  79. ot/prompts.py +218 -0
  80. ot/proxy/__init__.py +21 -0
  81. ot/proxy/manager.py +396 -0
  82. ot/py.typed +0 -0
  83. ot/registry/__init__.py +189 -0
  84. ot/registry/models.py +57 -0
  85. ot/registry/parser.py +269 -0
  86. ot/registry/registry.py +413 -0
  87. ot/server.py +315 -0
  88. ot/shortcuts/__init__.py +15 -0
  89. ot/shortcuts/aliases.py +87 -0
  90. ot/shortcuts/snippets.py +258 -0
  91. ot/stats/__init__.py +35 -0
  92. ot/stats/html.py +250 -0
  93. ot/stats/jsonl_writer.py +283 -0
  94. ot/stats/reader.py +354 -0
  95. ot/stats/timing.py +57 -0
  96. ot/support.py +63 -0
  97. ot/tools.py +114 -0
  98. ot/utils/__init__.py +81 -0
  99. ot/utils/batch.py +161 -0
  100. ot/utils/cache.py +120 -0
  101. ot/utils/deps.py +403 -0
  102. ot/utils/exceptions.py +23 -0
  103. ot/utils/factory.py +179 -0
  104. ot/utils/format.py +65 -0
  105. ot/utils/http.py +202 -0
  106. ot/utils/platform.py +45 -0
  107. ot/utils/sanitize.py +130 -0
  108. ot/utils/truncate.py +69 -0
  109. ot_tools/__init__.py +4 -0
  110. ot_tools/_convert/__init__.py +12 -0
  111. ot_tools/_convert/excel.py +279 -0
  112. ot_tools/_convert/pdf.py +254 -0
  113. ot_tools/_convert/powerpoint.py +268 -0
  114. ot_tools/_convert/utils.py +358 -0
  115. ot_tools/_convert/word.py +283 -0
  116. ot_tools/brave_search.py +604 -0
  117. ot_tools/code_search.py +736 -0
  118. ot_tools/context7.py +495 -0
  119. ot_tools/convert.py +614 -0
  120. ot_tools/db.py +415 -0
  121. ot_tools/diagram.py +1604 -0
  122. ot_tools/diagram.yaml +167 -0
  123. ot_tools/excel.py +1372 -0
  124. ot_tools/file.py +1348 -0
  125. ot_tools/firecrawl.py +732 -0
  126. ot_tools/grounding_search.py +646 -0
  127. ot_tools/package.py +604 -0
  128. ot_tools/py.typed +0 -0
  129. ot_tools/ripgrep.py +544 -0
  130. ot_tools/scaffold.py +471 -0
  131. ot_tools/transform.py +213 -0
  132. ot_tools/web_fetch.py +384 -0
@@ -0,0 +1,736 @@
1
+ """Semantic code search using ChunkHound indexes.
2
+
3
+ Queries existing ChunkHound DuckDB databases for semantic code search.
4
+ Requires projects to be indexed externally with `chunkhound index <project>`.
5
+ Requires OPENAI_API_KEY in secrets.yaml for embedding generation.
6
+
7
+ Reference: https://github.com/chunkhound/chunkhound
8
+ """
9
+
10
+ from __future__ import annotations
11
+
12
+ import logging
13
+ import threading
14
+ from functools import lru_cache
15
+ from typing import TYPE_CHECKING, Any
16
+
17
+ # Pack for dot notation: code.search(), code.status()
18
+ pack = "code"
19
+
20
+ __all__ = ["search", "search_batch", "status"]
21
+
22
+ # Dependency declarations for CLI validation
23
+ __ot_requires__ = {
24
+ "lib": [
25
+ ("duckdb", "pip install duckdb"),
26
+ ("openai", "pip install openai"),
27
+ ],
28
+ "secrets": ["OPENAI_API_KEY"],
29
+ }
30
+
31
+ from pydantic import BaseModel, Field
32
+
33
+ from ot.config import get_tool_config
34
+ from ot.config.secrets import get_secret
35
+ from ot.logging import LogSpan
36
+ from ot.paths import resolve_cwd_path
37
+
38
+ if TYPE_CHECKING:
39
+ from pathlib import Path
40
+ from types import ModuleType
41
+
42
+ from openai import OpenAI
43
+
44
+ logger = logging.getLogger(__name__)
45
+
46
+ # Thread lock for connection cache operations
47
+ _connection_lock = threading.Lock()
48
+
49
+
50
+ class Config(BaseModel):
51
+ """Pack configuration - discovered by registry."""
52
+
53
+ limit: int = Field(
54
+ default=10,
55
+ ge=1,
56
+ le=100,
57
+ description="Maximum number of search results to return",
58
+ )
59
+ base_url: str = Field(
60
+ default="https://openrouter.ai/api/v1",
61
+ description="OpenAI-compatible API base URL for embeddings",
62
+ )
63
+ model: str = Field(
64
+ default="text-embedding-3-small",
65
+ description="Embedding model (must match ChunkHound index)",
66
+ )
67
+ db_path: str = Field(
68
+ default=".chunkhound/chunks.db",
69
+ description="Path to ChunkHound DuckDB database relative to project root",
70
+ )
71
+ provider: str = Field(
72
+ default="openai",
73
+ description="Embedding provider stored in ChunkHound index",
74
+ )
75
+ dimensions: int = Field(
76
+ default=1536,
77
+ description="Embedding dimensions (must match model)",
78
+ )
79
+ content_limit: int = Field(
80
+ default=500,
81
+ ge=100,
82
+ le=10000,
83
+ description="Maximum characters of code content to return (without expand)",
84
+ )
85
+ content_limit_expanded: int = Field(
86
+ default=2000,
87
+ ge=500,
88
+ le=20000,
89
+ description="Maximum characters of code content to return (with expand)",
90
+ )
91
+
92
+
93
+ def _get_config() -> Config:
94
+ """Get code pack configuration."""
95
+ return get_tool_config("code", Config)
96
+
97
+
98
+ def _get_db_path(path: str | None = None, db: str | None = None) -> tuple[Path, Path]:
99
+ """Get the ChunkHound DuckDB path and project root.
100
+
101
+ Uses SDK resolve_cwd_path() for consistent path resolution.
102
+
103
+ Path resolution follows project conventions:
104
+ - If path is None: uses project directory (OT_CWD)
105
+ - If path provided: resolves with prefix/tilde expansion
106
+ - If db is None: uses config.db_path (default: .chunkhound/chunks.db)
107
+ - If db provided: uses that path relative to project root
108
+
109
+ Args:
110
+ path: Path to project root (default: OT_CWD)
111
+ db: Path to database file relative to project root (default: config.db_path)
112
+
113
+ Returns:
114
+ Tuple of (db_path, project_root)
115
+ """
116
+ config = _get_config()
117
+ project_root = resolve_cwd_path(".") if path is None else resolve_cwd_path(path)
118
+ db_rel = db if db is not None else config.db_path
119
+ db_path = project_root / db_rel
120
+ return db_path, project_root
121
+
122
+
123
+ def _get_openai_client() -> OpenAI:
124
+ """Get OpenAI client for embedding generation."""
125
+ try:
126
+ from openai import OpenAI
127
+ except ImportError as e:
128
+ raise ImportError(
129
+ "openai is required for code_search. Install with: pip install openai"
130
+ ) from e
131
+
132
+ api_key = get_secret("OPENAI_API_KEY") or ""
133
+ if not api_key:
134
+ raise ValueError(
135
+ "OPENAI_API_KEY not configured in secrets.yaml (required for code search embeddings)"
136
+ )
137
+ config = _get_config()
138
+ return OpenAI(api_key=api_key, base_url=config.base_url or None)
139
+
140
+
141
+ def _import_duckdb() -> ModuleType:
142
+ """Lazy import duckdb module."""
143
+ try:
144
+ import duckdb
145
+ except ImportError as e:
146
+ raise ImportError(
147
+ "duckdb is required for code_search. Install with: pip install duckdb"
148
+ ) from e
149
+ return duckdb
150
+
151
+
152
+ @lru_cache(maxsize=4)
153
+ def _get_cached_connection(db_path: str) -> Any:
154
+ """Get cached read-only connection to ChunkHound database.
155
+
156
+ Connections are cached by path and reused. Call _clear_connection_cache()
157
+ if database is rebuilt.
158
+
159
+ Args:
160
+ db_path: Path to the DuckDB database file.
161
+
162
+ Returns:
163
+ DuckDB connection with vss extension loaded.
164
+
165
+ Raises:
166
+ RuntimeError: If VSS extension cannot be loaded.
167
+ """
168
+ duckdb = _import_duckdb()
169
+ conn = duckdb.connect(db_path, read_only=True)
170
+ try:
171
+ conn.execute("LOAD vss")
172
+ except Exception as e:
173
+ conn.close()
174
+ if "vss" in str(e).lower() or "extension" in str(e).lower():
175
+ raise RuntimeError(
176
+ "DuckDB VSS extension not available.\n"
177
+ "Install with: pip install duckdb # Version 0.9+ includes vss"
178
+ ) from e
179
+ raise
180
+ return conn
181
+
182
+
183
+ def _clear_connection_cache() -> None:
184
+ """Clear cached connections (call after index rebuild)."""
185
+ with _connection_lock:
186
+ _get_cached_connection.cache_clear()
187
+
188
+
189
+ def _validate_and_connect(
190
+ db_path: Path,
191
+ project_root: Path,
192
+ config: Config,
193
+ ) -> tuple[Any, str]:
194
+ """Validate database and return connection + embeddings table name.
195
+
196
+ Args:
197
+ db_path: Path to the DuckDB database file.
198
+ project_root: Path to the project root directory.
199
+ config: Pack configuration.
200
+
201
+ Returns:
202
+ Tuple of (connection, embeddings_table_name).
203
+
204
+ Raises:
205
+ ValueError: If validation fails with user-friendly message.
206
+ """
207
+ if not db_path.exists():
208
+ raise ValueError(
209
+ f"Project not indexed. Run: chunkhound index {project_root}\n"
210
+ f"Expected database at: {db_path}"
211
+ )
212
+
213
+ with _connection_lock:
214
+ conn = _get_cached_connection(str(db_path))
215
+
216
+ tables = [row[0] for row in conn.execute("SHOW TABLES").fetchall()]
217
+ embeddings_table = f"embeddings_{config.dimensions}"
218
+
219
+ if "chunks" not in tables:
220
+ raise ValueError(
221
+ f"Database missing 'chunks' table. Re-index with: chunkhound index {project_root}"
222
+ )
223
+ if embeddings_table not in tables:
224
+ raise ValueError(
225
+ f"Database missing '{embeddings_table}' table. Re-index with: chunkhound index {project_root}"
226
+ )
227
+
228
+ return conn, embeddings_table
229
+
230
+
231
+ def _build_search_sql(
232
+ embeddings_table: str,
233
+ dimensions: int,
234
+ provider: str,
235
+ model: str,
236
+ language: str | None = None,
237
+ chunk_type: str | None = None,
238
+ exclude: str | None = None,
239
+ ) -> tuple[str, list[Any]]:
240
+ """Build semantic search SQL query.
241
+
242
+ Args:
243
+ embeddings_table: Name of the embeddings table.
244
+ dimensions: Embedding dimensions.
245
+ provider: Embedding provider.
246
+ model: Embedding model.
247
+ language: Optional language filter.
248
+ chunk_type: Optional chunk type filter.
249
+ exclude: Optional pipe-separated exclude patterns.
250
+
251
+ Returns:
252
+ Tuple of (sql_template, params). Caller must prepend embedding param
253
+ and append limit param.
254
+ """
255
+ sql = f"""
256
+ SELECT
257
+ c.id as chunk_id,
258
+ c.symbol,
259
+ c.code as content,
260
+ c.chunk_type,
261
+ c.start_line,
262
+ c.end_line,
263
+ f.path as file_path,
264
+ f.language,
265
+ array_cosine_similarity(e.embedding, ?::FLOAT[{dimensions}]) as similarity
266
+ FROM {embeddings_table} e
267
+ JOIN chunks c ON e.chunk_id = c.id
268
+ JOIN files f ON c.file_id = f.id
269
+ WHERE e.provider = ? AND e.model = ?
270
+ """
271
+ params: list[Any] = [provider, model]
272
+
273
+ if language:
274
+ sql += " AND LOWER(f.language) = LOWER(?)"
275
+ params.append(language)
276
+
277
+ if chunk_type:
278
+ sql += " AND LOWER(c.chunk_type) = LOWER(?)"
279
+ params.append(chunk_type)
280
+
281
+ if exclude:
282
+ for pattern in (p.strip() for p in exclude.split("|") if p.strip()):
283
+ sql += " AND f.path NOT LIKE ?"
284
+ params.append(f"%{pattern}%")
285
+
286
+ return sql, params
287
+
288
+
289
+ def _row_to_result(row: tuple, matched_query: str | None = None) -> dict[str, Any]:
290
+ """Convert a database row to a result dictionary.
291
+
292
+ Args:
293
+ row: Tuple from database query.
294
+ matched_query: Optional query that matched this result (for batch).
295
+
296
+ Returns:
297
+ Result dictionary with standardized keys.
298
+ """
299
+ result = {
300
+ "chunk_id": row[0],
301
+ "symbol": row[1],
302
+ "content": row[2],
303
+ "chunk_type": row[3],
304
+ "start_line": row[4],
305
+ "end_line": row[5],
306
+ "file_path": row[6],
307
+ "language": row[7],
308
+ "similarity": row[8],
309
+ }
310
+ if matched_query is not None:
311
+ result["matched_query"] = matched_query
312
+ return result
313
+
314
+
315
+ def _generate_embedding(query: str) -> list[float]:
316
+ """Generate embedding vector for a search query."""
317
+ config = _get_config()
318
+ with LogSpan(span="code.embedding", model=config.model, queryLen=len(query)) as span:
319
+ client = _get_openai_client()
320
+ response = client.embeddings.create(
321
+ model=config.model,
322
+ input=query,
323
+ )
324
+ span.add(dimensions=len(response.data[0].embedding))
325
+ return response.data[0].embedding
326
+
327
+
328
+ def _generate_embeddings_batch(queries: list[str]) -> list[list[float]]:
329
+ """Generate embedding vectors for multiple queries in a single API call."""
330
+ config = _get_config()
331
+ with LogSpan(
332
+ span="code.embedding_batch", model=config.model, queryCount=len(queries)
333
+ ) as span:
334
+ client = _get_openai_client()
335
+ response = client.embeddings.create(
336
+ model=config.model,
337
+ input=queries,
338
+ )
339
+ embeddings = [item.embedding for item in response.data]
340
+ span.add(dimensions=len(embeddings[0]) if embeddings else 0)
341
+ return embeddings
342
+
343
+
344
+ def _format_result(
345
+ result: dict[str, Any],
346
+ project_root: Path | None = None,
347
+ expand: int | None = None,
348
+ ) -> dict[str, Any]:
349
+ """Format a search result for output.
350
+
351
+ Args:
352
+ result: Raw search result from database
353
+ project_root: Project root for file reading (needed for expand)
354
+ expand: Number of context lines to include around match
355
+
356
+ Returns:
357
+ Formatted result dict. Content is truncated to `content_limit` chars
358
+ (default 500) or `content_limit_expanded` chars (default 2000) when
359
+ expand is used. These limits are configurable via pack config.
360
+ """
361
+ config = _get_config()
362
+ content = result.get("content", "")
363
+ start_line = result.get("start_line")
364
+ end_line = result.get("end_line")
365
+
366
+ # Expand content if requested and we have valid line numbers
367
+ if expand and project_root and start_line and end_line:
368
+ file_path = project_root / result.get("file_path", "")
369
+ if file_path.exists():
370
+ try:
371
+ lines = file_path.read_text().splitlines()
372
+ # Calculate expanded range (1-indexed to 0-indexed)
373
+ exp_start = max(0, start_line - 1 - expand)
374
+ exp_end = min(len(lines), end_line + expand)
375
+ content = "\n".join(lines[exp_start:exp_end])
376
+ start_line = exp_start + 1
377
+ end_line = exp_end
378
+ except Exception as e:
379
+ # Log but don't fail - expansion is optional enhancement
380
+ logger.debug("Failed to expand content from %s: %s", file_path, e)
381
+
382
+ # Apply content truncation from config
383
+ content_limit = config.content_limit_expanded if expand else config.content_limit
384
+
385
+ return {
386
+ "file": result.get("file_path", "unknown"),
387
+ "name": result.get("symbol", ""),
388
+ "type": result.get("chunk_type", ""),
389
+ "language": result.get("language", ""),
390
+ "lines": f"{start_line or '?'}-{end_line or '?'}",
391
+ "score": round(result.get("similarity", 0.0), 4),
392
+ "content": content[:content_limit],
393
+ }
394
+
395
+
396
+ def search(
397
+ *,
398
+ query: str,
399
+ limit: int | None = None,
400
+ language: str | None = None,
401
+ chunk_type: str | None = None,
402
+ expand: int | None = None,
403
+ exclude: str | None = None,
404
+ path: str | None = None,
405
+ db: str | None = None,
406
+ ) -> str:
407
+ """Search for code semantically in a ChunkHound-indexed project.
408
+
409
+ Finds code by meaning rather than exact keyword matches. For example,
410
+ searching for "authentication" can find functions named `verify_jwt_token`.
411
+
412
+ Requires the project to be indexed first with:
413
+ chunkhound index /path/to/project
414
+
415
+ Args:
416
+ query: Natural language search query (e.g., "error handling", "database connection")
417
+ limit: Maximum number of results to return (defaults to config)
418
+ language: Filter results by language (e.g., "python", "typescript")
419
+ chunk_type: Filter by type (e.g., "function", "class", "method", "comment")
420
+ expand: Number of context lines to include around each match
421
+ exclude: Pipe-separated patterns to exclude (e.g., "test|mock|fixture")
422
+ path: Path to project root (default: cwd)
423
+ db: Path to database file relative to project root (default: .chunkhound/chunks.db)
424
+
425
+ Returns:
426
+ Formatted search results with file paths, line numbers, code snippets,
427
+ and relevance scores. Returns error message if project not indexed.
428
+
429
+ Example:
430
+ # Search in current directory
431
+ code.search(query="authentication logic")
432
+
433
+ # Find Python functions only
434
+ code.search(query="database queries", language="python", chunk_type="function")
435
+
436
+ # Get expanded context
437
+ code.search(query="error handling", expand=10)
438
+
439
+ # Exclude test files
440
+ code.search(query="validation", exclude="test|mock")
441
+ """
442
+ if limit is None:
443
+ limit = get_tool_config("code", Config).limit
444
+ db_path, project_root = _get_db_path(path, db)
445
+
446
+ with LogSpan(
447
+ span="code.search",
448
+ project=str(project_root),
449
+ query=query,
450
+ limit=limit,
451
+ language=language,
452
+ chunk_type=chunk_type,
453
+ expand=expand,
454
+ exclude=exclude,
455
+ ) as s:
456
+ try:
457
+ # Validate database and get connection
458
+ config = _get_config()
459
+ conn, embeddings_table = _validate_and_connect(db_path, project_root, config)
460
+
461
+ # Generate query embedding
462
+ embedding = _generate_embedding(query)
463
+
464
+ # Build semantic search query
465
+ sql, params = _build_search_sql(
466
+ embeddings_table=embeddings_table,
467
+ dimensions=config.dimensions,
468
+ provider=config.provider,
469
+ model=config.model,
470
+ language=language,
471
+ chunk_type=chunk_type,
472
+ exclude=exclude,
473
+ )
474
+
475
+ # Prepend embedding and append limit
476
+ params = [embedding, *params, limit]
477
+ sql += " ORDER BY similarity DESC LIMIT ?"
478
+
479
+ # Execute search
480
+ results = conn.execute(sql, params).fetchall()
481
+
482
+ if not results:
483
+ s.add("resultCount", 0)
484
+ return f"No results found for: {query}"
485
+
486
+ # Format results
487
+ formatted = [
488
+ _format_result(_row_to_result(row), project_root, expand)
489
+ for row in results
490
+ ]
491
+
492
+ # Build output
493
+ output_lines = [f"Found {len(formatted)} results for: {query}\n"]
494
+ for i, r in enumerate(formatted, 1):
495
+ output_lines.append(
496
+ f"{i}. [{r['type']}] {r['name']} ({r['language']})\n"
497
+ f" File: {r['file']}:{r['lines']}\n"
498
+ f" Score: {r['score']}\n"
499
+ f" ```\n{r['content']}\n ```\n"
500
+ )
501
+
502
+ output = "\n".join(output_lines)
503
+ s.add("resultCount", len(formatted))
504
+ s.add("outputLen", len(output))
505
+ return output
506
+
507
+ except ValueError as e:
508
+ # Validation errors (not indexed, missing tables)
509
+ s.add("error", "validation_failed")
510
+ return f"Error: {e}"
511
+ except Exception as e:
512
+ s.add("error", str(e))
513
+ return f"Error searching code: {e}"
514
+
515
+
516
+ def search_batch(
517
+ *,
518
+ queries: str,
519
+ limit: int | None = None,
520
+ language: str | None = None,
521
+ chunk_type: str | None = None,
522
+ expand: int | None = None,
523
+ exclude: str | None = None,
524
+ path: str | None = None,
525
+ db: str | None = None,
526
+ ) -> str:
527
+ """Run multiple semantic searches and return merged, deduplicated results.
528
+
529
+ Uses batch embedding API (single call) for efficiency. Results are
530
+ deduplicated by file+lines, keeping the highest score.
531
+
532
+ Args:
533
+ queries: Pipe-separated search queries (e.g., "auth logic|token validation|session")
534
+ limit: Maximum results per query (defaults to config)
535
+ language: Filter by language (e.g., "python")
536
+ chunk_type: Filter by type (e.g., "function", "class")
537
+ expand: Number of context lines to include around each match
538
+ exclude: Pipe-separated patterns to exclude (e.g., "test|mock")
539
+ path: Path to project root (default: cwd)
540
+ db: Path to database file relative to project root (default: .chunkhound/chunks.db)
541
+
542
+ Returns:
543
+ Merged results sorted by score, with duplicates removed.
544
+
545
+ Example:
546
+ # Multiple related queries
547
+ code.search_batch(queries="authentication|login|session handling")
548
+
549
+ # Exclude test files
550
+ code.search_batch(queries="error handling|validation", exclude="test|mock")
551
+ """
552
+ if limit is None:
553
+ limit = get_tool_config("code", Config).limit
554
+ db_path, project_root = _get_db_path(path, db)
555
+
556
+ # Parse pipe-separated queries
557
+ query_list = [q.strip() for q in queries.split("|") if q.strip()]
558
+ if not query_list:
559
+ return "Error: No valid queries provided"
560
+
561
+ with LogSpan(
562
+ span="code.search_batch",
563
+ project=str(project_root),
564
+ queryCount=len(query_list),
565
+ limit=limit,
566
+ exclude=exclude,
567
+ ) as s:
568
+ try:
569
+ # Validate database and get connection
570
+ config = _get_config()
571
+ conn, embeddings_table = _validate_and_connect(db_path, project_root, config)
572
+
573
+ # Generate all embeddings in a single API call
574
+ embeddings = _generate_embeddings_batch(query_list)
575
+
576
+ # Build base SQL query (reused for all queries)
577
+ base_sql, base_params = _build_search_sql(
578
+ embeddings_table=embeddings_table,
579
+ dimensions=config.dimensions,
580
+ provider=config.provider,
581
+ model=config.model,
582
+ language=language,
583
+ chunk_type=chunk_type,
584
+ exclude=exclude,
585
+ )
586
+ base_sql += " ORDER BY similarity DESC LIMIT ?"
587
+
588
+ # Collect all results
589
+ all_results: dict[str, dict[str, Any]] = {} # key: file:lines
590
+
591
+ for query, embedding in zip(query_list, embeddings, strict=True):
592
+ # Prepend embedding and append limit
593
+ params = [embedding, *base_params, limit]
594
+ results = conn.execute(base_sql, params).fetchall()
595
+
596
+ for row in results:
597
+ result = _row_to_result(row, matched_query=query)
598
+ # Dedupe key: file path + line range
599
+ key = f"{row[6]}:{row[4]}-{row[5]}"
600
+ if key not in all_results or row[8] > all_results[key]["similarity"]:
601
+ all_results[key] = result
602
+
603
+ if not all_results:
604
+ s.add("resultCount", 0)
605
+ return f"No results found for queries: {', '.join(query_list)}"
606
+
607
+ # Sort by similarity and format
608
+ sorted_results = sorted(
609
+ all_results.values(), key=lambda x: x["similarity"], reverse=True
610
+ )
611
+ formatted = [
612
+ _format_result(r, project_root, expand) for r in sorted_results
613
+ ]
614
+
615
+ # Build output
616
+ output_lines = [
617
+ f"Found {len(formatted)} results for {len(query_list)} queries\n"
618
+ ]
619
+ for i, r in enumerate(formatted, 1):
620
+ output_lines.append(
621
+ f"{i}. [{r['type']}] {r['name']} ({r['language']})\n"
622
+ f" File: {r['file']}:{r['lines']}\n"
623
+ f" Score: {r['score']}\n"
624
+ f" ```\n{r['content']}\n ```\n"
625
+ )
626
+
627
+ output = "\n".join(output_lines)
628
+ s.add("resultCount", len(formatted))
629
+ s.add("outputLen", len(output))
630
+ return output
631
+
632
+ except ValueError as e:
633
+ # Validation errors (not indexed, missing tables)
634
+ s.add("error", "validation_failed")
635
+ return f"Error: {e}"
636
+ except Exception as e:
637
+ s.add("error", str(e))
638
+ return f"Error in batch search: {e}"
639
+
640
+
641
+ def status(*, path: str | None = None, db: str | None = None) -> str:
642
+ """Check if a project has a ChunkHound index and show statistics.
643
+
644
+ Args:
645
+ path: Path to project root (default: cwd)
646
+ db: Path to database file relative to project root (default: .chunkhound/chunks.db)
647
+
648
+ Returns:
649
+ Index statistics (file count, chunk count, languages) or
650
+ instructions for indexing if not indexed.
651
+
652
+ Example:
653
+ # Current directory
654
+ code.status()
655
+
656
+ # Explicit path
657
+ code.status(path="/path/to/project")
658
+ """
659
+ db_path, project_root = _get_db_path(path, db)
660
+
661
+ with LogSpan(span="code.status", project=str(project_root)) as s:
662
+ if not db_path.exists():
663
+ s.add("indexed", False)
664
+ return (
665
+ f"Project not indexed.\n\n"
666
+ f"To enable semantic code search, run:\n"
667
+ f" chunkhound index {project_root}\n\n"
668
+ f"This creates a searchable index at:\n"
669
+ f" {db_path}"
670
+ )
671
+
672
+ try:
673
+ with _connection_lock:
674
+ conn = _get_cached_connection(str(db_path))
675
+ tables = [row[0] for row in conn.execute("SHOW TABLES").fetchall()]
676
+
677
+ stats: dict[str, object] = {"tables": tables, "indexed": True}
678
+
679
+ # Get chunk statistics
680
+ if "chunks" in tables:
681
+ chunk_count = conn.execute("SELECT COUNT(*) FROM chunks").fetchone()[0]
682
+ stats["chunk_count"] = chunk_count
683
+
684
+ # Get language distribution
685
+ try:
686
+ lang_results = conn.execute("""
687
+ SELECT f.language, COUNT(*) as cnt
688
+ FROM chunks c
689
+ JOIN files f ON c.file_id = f.id
690
+ GROUP BY f.language
691
+ ORDER BY cnt DESC
692
+ """).fetchall()
693
+ stats["languages"] = {row[0]: row[1] for row in lang_results}
694
+ except Exception:
695
+ pass # Language stats are optional
696
+
697
+ # Get file statistics
698
+ if "files" in tables:
699
+ file_count = conn.execute("SELECT COUNT(*) FROM files").fetchone()[0]
700
+ stats["file_count"] = file_count
701
+
702
+ # Get embedding statistics
703
+ # Note: embeddings_table is safe - derived from validated config.dimensions (int)
704
+ config = _get_config()
705
+ embeddings_table = f"embeddings_{config.dimensions}"
706
+ if embeddings_table in tables:
707
+ emb_count = conn.execute(
708
+ f"SELECT COUNT(*) FROM {embeddings_table}"
709
+ ).fetchone()[0]
710
+ stats["embedding_count"] = emb_count
711
+
712
+ # Format output
713
+ output_lines = [
714
+ f"Project indexed: {project_root}\n",
715
+ f"Database: {db_path}\n",
716
+ ]
717
+
718
+ if "file_count" in stats:
719
+ output_lines.append(f"Files: {stats['file_count']}")
720
+ if "chunk_count" in stats:
721
+ output_lines.append(f"Chunks: {stats['chunk_count']}")
722
+ if "embedding_count" in stats:
723
+ output_lines.append(f"Embeddings: {stats['embedding_count']}")
724
+ if "languages" in stats:
725
+ langs = ", ".join(f"{k}: {v}" for k, v in stats["languages"].items())
726
+ output_lines.append(f"Languages: {langs}")
727
+
728
+ output_lines.append(f"\nTables: {', '.join(tables)}")
729
+
730
+ for key, value in stats.items():
731
+ s.add(key, value)
732
+ return "\n".join(output_lines)
733
+
734
+ except Exception as e:
735
+ s.add("error", str(e))
736
+ return f"Error reading index: {e}"