codexlr8 0.0.2__tar.gz → 0.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. {codexlr8-0.0.2 → codexlr8-0.0.3}/PKG-INFO +23 -1
  2. {codexlr8-0.0.2 → codexlr8-0.0.3}/README.md +20 -0
  3. {codexlr8-0.0.2 → codexlr8-0.0.3}/pyproject.toml +4 -1
  4. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/__init__.py +1 -1
  5. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/cli.py +136 -0
  6. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/config.py +6 -0
  7. codexlr8-0.0.3/src/codexlr8/embeddings.py +147 -0
  8. codexlr8-0.0.3/src/codexlr8/eval.py +246 -0
  9. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/search.py +157 -4
  10. codexlr8-0.0.3/src/codexlr8/train.py +175 -0
  11. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/PKG-INFO +23 -1
  12. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/SOURCES.txt +4 -0
  13. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/requires.txt +3 -0
  14. codexlr8-0.0.3/tests/test_eval.py +209 -0
  15. {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_search.py +24 -0
  16. {codexlr8-0.0.2 → codexlr8-0.0.3}/LICENSE +0 -0
  17. {codexlr8-0.0.2 → codexlr8-0.0.3}/setup.cfg +0 -0
  18. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/mcp_server.py +0 -0
  19. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/meta.py +0 -0
  20. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/scanner.py +0 -0
  21. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/dependency_links.txt +0 -0
  22. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/entry_points.txt +0 -0
  23. {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/top_level.txt +0 -0
  24. {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_mcp_server.py +0 -0
  25. {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_meta.py +0 -0
  26. {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_scanner.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codexlr8
3
- Version: 0.0.2
3
+ Version: 0.0.3
4
4
  Summary: A codebase search engine for LLM coding agents
5
5
  Author-email: Sadig Akhund <sadigaxund@gmail.com>
6
6
  License: Apache-2.0
@@ -25,6 +25,8 @@ Requires-Dist: mcp>=1.0
25
25
  Provides-Extra: dev
26
26
  Requires-Dist: pytest>=7.0; extra == "dev"
27
27
  Requires-Dist: pytest-cov>=4.0; extra == "dev"
28
+ Provides-Extra: embeddings
29
+ Requires-Dist: sentence-transformers>=3.0; extra == "embeddings"
28
30
  Dynamic: license-file
29
31
 
30
32
  # CodeXLR8
@@ -95,6 +97,26 @@ codexlr8 search . "axes not hiding" --explain
95
97
  # Combine both — group, then scope to drill down
96
98
  ```
97
99
 
100
+ ### Search Quality & Fine-Tuning
101
+
102
+ ```bash
103
+ # Measure search accuracy against known queries
104
+ codexlr8 eval . --queries queries.json
105
+ # Precision@1: 67%, MRR: 0.83, Recall@5: 67%
106
+
107
+ # Typos are auto-corrected (fuzzy fallback on zero results)
108
+ codexlr8 search . "funtion" # → corrects to "function"
109
+
110
+ # Opt-in embeddings: hybrid BM25 + semantic search
111
+ # pip install codexlr8[embeddings]
112
+ # set embeddings.enabled: true in .codexlr8.yaml
113
+
114
+ # Fine-tune a model on YOUR codebase vocabulary
115
+ codexlr8 recommend-model . # picks the right model for your size
116
+ codexlr8 train . # TSDAE training, 5-45min on CPU
117
+ codexlr8 eval . # measure improvement
118
+ ```
119
+
98
120
  ## .meta.yaml Sidecars
99
121
 
100
122
  Optional YAML files next to source files, created by `codexlr8 init`:
@@ -66,6 +66,26 @@ codexlr8 search . "axes not hiding" --explain
66
66
  # Combine both — group, then scope to drill down
67
67
  ```
68
68
 
69
+ ### Search Quality & Fine-Tuning
70
+
71
+ ```bash
72
+ # Measure search accuracy against known queries
73
+ codexlr8 eval . --queries queries.json
74
+ # Precision@1: 67%, MRR: 0.83, Recall@5: 67%
75
+
76
+ # Typos are auto-corrected (fuzzy fallback on zero results)
77
+ codexlr8 search . "funtion" # → corrects to "function"
78
+
79
+ # Opt-in embeddings: hybrid BM25 + semantic search
80
+ # pip install codexlr8[embeddings]
81
+ # set embeddings.enabled: true in .codexlr8.yaml
82
+
83
+ # Fine-tune a model on YOUR codebase vocabulary
84
+ codexlr8 recommend-model . # picks the right model for your size
85
+ codexlr8 train . # TSDAE training, 5-45min on CPU
86
+ codexlr8 eval . # measure improvement
87
+ ```
88
+
69
89
  ## .meta.yaml Sidecars
70
90
 
71
91
  Optional YAML files next to source files, created by `codexlr8 init`:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codexlr8"
7
- version = "0.0.2"
7
+ version = "0.0.3"
8
8
  description = "A codebase search engine for LLM coding agents"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -37,6 +37,9 @@ dev = [
37
37
  "pytest>=7.0",
38
38
  "pytest-cov>=4.0",
39
39
  ]
40
+ embeddings = [
41
+ "sentence-transformers>=3.0",
42
+ ]
40
43
 
41
44
  [project.scripts]
42
45
  codexlr8 = "codexlr8.cli:main"
@@ -1,3 +1,3 @@
1
1
  """CodeXLR8 — A codebase search engine for LLM coding agents."""
2
2
 
3
- __version__ = "0.0.2"
3
+ __version__ = "0.0.3"
@@ -202,6 +202,140 @@ def status(project_path: str):
202
202
  click.secho(f" Warning: {state['warning']}", fg="yellow")
203
203
 
204
204
 
205
+ @main.command()
206
+ @click.argument("project_path", type=click.Path(exists=True, file_okay=False))
207
+ @click.option("--queries", "-q", required=True,
208
+ type=click.Path(exists=True, dir_okay=False),
209
+ help="Path to JSON file with query definitions")
210
+ @click.option("--limit", "-n", default=10,
211
+ help="Max results per query (default: 10)")
212
+ def eval_cmd(project_path: str, queries: str, limit: int):
213
+ """Evaluate search quality against a query set.
214
+
215
+ QUERIES is a JSON file with an array of query objects:
216
+ [{"query": "...", "expected": "path/to/file.py", "min_rank": 1}]
217
+
218
+ Outputs a per-query pass/fail table and aggregate metrics:
219
+ Precision@1, Mean Reciprocal Rank (MRR), Recall@5.
220
+ """
221
+ from .eval import load_queries, run_eval
222
+ import json
223
+
224
+ try:
225
+ query_defs = load_queries(queries)
226
+ except (json.JSONDecodeError, ValueError) as e:
227
+ raise click.ClickException(f"Invalid queries file: {e}")
228
+
229
+ if not query_defs:
230
+ raise click.ClickException("Queries file contains no queries.")
231
+
232
+ metrics = run_eval(project_path, query_defs, limit=limit)
233
+
234
+ # Per-query table
235
+ click.secho(" Query Expected Mode Lines Rank Score Status", fg="cyan", bold=True)
236
+ click.secho(" " + "─" * 105, fg="cyan")
237
+
238
+ for r in metrics["results"]:
239
+ query_str = f'"{r["query"]}"'.ljust(34)
240
+ expected_str = r["expected"].ljust(20)
241
+ mode_str = r.get("assert", "file").ljust(7)
242
+ lines_str = ""
243
+ if r.get("line_start"):
244
+ lines_str = f"{r['line_start']}-{r['line_end']}".ljust(8)
245
+ else:
246
+ lines_str = "—".ljust(8)
247
+ rank_str = str(r["rank"]).ljust(6) if r["rank"] else "— "
248
+ score_str = f'{r["score"]:.2f}'.ljust(8) if r["score"] else "— "
249
+ status = r["status"]
250
+
251
+ if status.startswith("pass"):
252
+ status_style = {"fg": "green"}
253
+ elif "found" in status:
254
+ status_style = {"fg": "yellow"}
255
+ else:
256
+ status_style = {"fg": "red"}
257
+
258
+ click.echo(f" {query_str} {expected_str} {mode_str} {lines_str} {rank_str} {score_str} {click.style(status, **status_style)}")
259
+
260
+ # Aggregate metrics
261
+ click.echo()
262
+ click.echo(click.style(" " + "─" * 40, fg="cyan"))
263
+ click.secho(f" Precision@1: {metrics['precision_at_1']:.2%} "
264
+ f"({metrics['passed']}/{metrics['num_queries']} passed)", fg="green")
265
+ click.secho(f" MRR: {metrics['mrr']:.4f}", fg="green")
266
+ click.secho(f" Recall@5: {metrics['recall_at_5']:.2%}", fg="green")
267
+
268
+
269
+ @main.command()
270
+ @click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
271
+ @click.option("--model", "-m", default="all-MiniLM-L6-v2",
272
+ help="Embedding model to fine-tune")
273
+ @click.option("--epochs", "-e", default=3,
274
+ help="Training epochs (default: 3)")
275
+ @click.option("--incremental", "-i", is_flag=True, default=False,
276
+ help="Fine-tune only on changed files")
277
+ def train(project_path: str, model: str, epochs: int, incremental: bool):
278
+ """Fine-tune an embedding model on this codebase for better search accuracy.
279
+
280
+ Uses TSDAE (denoising auto-encoder) to adapt a pretrained model to
281
+ your codebase's vocabulary. The fine-tuned model is saved to
282
+ .codexlr8_model/ and referenced in .codexlr8.yaml.
283
+
284
+ Requirements: pip install codexlr8[embeddings]
285
+ """
286
+ try:
287
+ from .train import train_model
288
+ except ImportError as e:
289
+ raise click.ClickException(
290
+ "Training requires 'pip install codexlr8[embeddings]'"
291
+ ) from e
292
+
293
+ click.echo()
294
+ click.secho(" Training embedding model on this codebase...", fg="cyan", bold=True)
295
+ click.echo(f" Model: {model}")
296
+ click.echo(f" Epochs: {epochs}")
297
+ click.echo()
298
+
299
+ try:
300
+ result = train_model(project_path, model_name=model,
301
+ epochs=epochs, incremental=incremental)
302
+ except ValueError as e:
303
+ raise click.ClickException(str(e))
304
+
305
+ dur = result["duration_sec"]
306
+ dur_str = f"{dur}s" if dur < 60 else f"{dur // 60}m{dur % 60}s"
307
+
308
+ click.echo()
309
+ click.secho(f" Trained on {result['num_examples']} files in {dur_str}", fg="green")
310
+ click.secho(f" Model saved to {result['model_path']}", fg="green")
311
+ click.secho(f" Embeddings enabled in .codexlr8.yaml", fg="green")
312
+ click.echo()
313
+ click.secho(" Run 'codexlr8 eval .' to measure improvement.", dim=True)
314
+
315
+
316
+ @main.command()
317
+ @click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
318
+ def recommend_model_cmd(project_path: str):
319
+ """Suggest the best embedding model for this codebase size."""
320
+ try:
321
+ from .train import recommend_model
322
+ except ImportError as e:
323
+ raise click.ClickException(
324
+ "Requires 'pip install codexlr8[embeddings]'"
325
+ ) from e
326
+
327
+ rec = recommend_model(project_path)
328
+
329
+ click.echo()
330
+ click.secho(f" Codebase: {rec['num_files']} files, ~{rec['est_tokens']:,} tokens", fg="cyan")
331
+ click.echo()
332
+ click.secho(f" Recommended: {rec['model']} ({rec['param_count']})", fg="green", bold=True)
333
+ click.echo(f" Est. training time: {rec['est_training_time']}")
334
+ click.echo(f" Expected quality gain: {rec.get('quality_gain', '+5-12% MRR')}")
335
+ click.echo()
336
+ click.secho(" Run 'codexlr8 train .' to start training.", dim=True)
337
+
338
+
205
339
  @main.command()
206
340
  @click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
207
341
  def setup(project_path: str):
@@ -702,6 +836,8 @@ Exclude patterns are globs that match file paths. Use `*` for wildcards.
702
836
  | Search within a directory | `codebase_search(query="...", scope="src/")` |
703
837
  | Cluster results by directory | Shell: `codexlr8 search . "query" --grouped` |
704
838
  | Diagnose query terms | Shell: `codexlr8 search . "query" --explain` |
839
+ | Measure search accuracy | Shell: `codexlr8 eval . --queries q.json` |
840
+ | Fine-tune embeddings | Shell: `codexlr8 train .` (needs `[embeddings]` extra) |
705
841
  | Build/update index | `codebase_index(incremental=true)` |
706
842
  | Check metadata coverage | Shell: `codexlr8 status .` |
707
843
  | Bootstrap missing sidecars | Shell: `codexlr8 init .` |
@@ -24,6 +24,12 @@ def load_config(project_path: str) -> dict:
24
24
  def _defaults() -> dict:
25
25
  return {
26
26
  "root": ".",
27
+ "fuzzy": True,
28
+ "embeddings": {
29
+ "enabled": False,
30
+ "model": "all-MiniLM-L6-v2",
31
+ "bm25_weight": 0.6,
32
+ },
27
33
  "include": [],
28
34
  "exclude": [
29
35
  "tests/*",
@@ -0,0 +1,147 @@
1
+ """Embedding provider — optional semantic search layer.
2
+
3
+ Requires optional dependencies: pip install codexlr8[embeddings]
4
+ Provides cosine-similarity reranking on top of BM25 results.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import math
11
+ import os
12
+ import sqlite3
13
+
14
+ _EMBEDDING_AVAILABLE = False
15
+ _SentenceTransformer = None
16
+
17
+
18
+ def _check_deps() -> bool:
19
+ """Lazy-import sentence-transformers. Returns True if available."""
20
+ global _EMBEDDING_AVAILABLE, _SentenceTransformer
21
+ if _EMBEDDING_AVAILABLE:
22
+ return True
23
+ try:
24
+ from sentence_transformers import SentenceTransformer as ST
25
+ _SentenceTransformer = ST
26
+ _EMBEDDING_AVAILABLE = True
27
+ return True
28
+ except ImportError:
29
+ return False
30
+
31
+
32
+ class EmbeddingProvider:
33
+ """Lazy-loading sentence-transformers model for embedding text."""
34
+
35
+ def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
36
+ self.model_name = model_name
37
+ self._model = None
38
+
39
+ @property
40
+ def model(self):
41
+ if self._model is None:
42
+ if not _check_deps():
43
+ raise ImportError(
44
+ "Embeddings require 'pip install codexlr8[embeddings]' "
45
+ "(installs sentence-transformers)"
46
+ )
47
+ self._model = _SentenceTransformer(self.model_name)
48
+ return self._model
49
+
50
+ @property
51
+ def dims(self) -> int:
52
+ return self.model.get_sentence_embedding_dimension()
53
+
54
+ def embed(self, texts: list[str], batch_size: int = 32) -> list[list[float]]:
55
+ """Encode texts into normalized embedding vectors."""
56
+ if not texts:
57
+ return []
58
+ embeddings = self.model.encode(
59
+ texts,
60
+ batch_size=batch_size,
61
+ normalize_embeddings=True,
62
+ show_progress_bar=False,
63
+ )
64
+ return embeddings.tolist()
65
+
66
+ def embed_file(self, path: str, content: str, summary: str = "",
67
+ tags: list[str] | None = None) -> list[float]:
68
+ """Embed a single file combining path, summary, tags, and content preview."""
69
+ tags_str = " ".join(tags) if tags else ""
70
+ # Combine metadata with first 2000 chars of content for context
71
+ text = f"{path} {summary} {tags_str} {content[:2000]}"
72
+ return self.embed([text])[0]
73
+
74
+
75
+ def cosine_similarity(a: list[float], b: list[float]) -> float:
76
+ """Compute cosine similarity between two normalized vectors."""
77
+ if not a or not b:
78
+ return 0.0
79
+ dot = sum(x * y for x, y in zip(a, b))
80
+ # Clamp to [-1, 1] for floating-point safety
81
+ return max(-1.0, min(1.0, dot))
82
+
83
+
84
+ def init_embeddings_table(conn: sqlite3.Connection):
85
+ """Create the embeddings table if it doesn't exist."""
86
+ conn.execute("""
87
+ CREATE TABLE IF NOT EXISTS embeddings (
88
+ path TEXT PRIMARY KEY,
89
+ vector TEXT,
90
+ dims INTEGER,
91
+ embedded_at TEXT
92
+ )
93
+ """)
94
+
95
+
96
+ def store_embeddings(conn: sqlite3.Connection, path: str,
97
+ vector: list[float], embedded_at: str):
98
+ """Store or update an embedding vector for a file."""
99
+ conn.execute(
100
+ "INSERT OR REPLACE INTO embeddings (path, vector, dims, embedded_at) "
101
+ "VALUES (?, ?, ?, ?)",
102
+ (path, json.dumps(vector), len(vector), embedded_at),
103
+ )
104
+
105
+
106
+ def load_embeddings(conn: sqlite3.Connection) -> dict[str, list[float]]:
107
+ """Load all stored embeddings from the database."""
108
+ rows = conn.execute("SELECT path, vector FROM embeddings").fetchall()
109
+ return {row["path"]: json.loads(row["vector"]) for row in rows}
110
+
111
+
112
+ def hybrid_rerank(
113
+ bm25_results: list[dict],
114
+ embedded_vectors: dict[str, list[float]],
115
+ query_vector: list[float],
116
+ bm25_weight: float = 0.6,
117
+ embed_weight: float = 0.4,
118
+ ) -> list[dict]:
119
+ """Merge BM25 scores with cosine similarity and re-rank results.
120
+
121
+ bm25_weight / embed_weight control the blending.
122
+ Default 0.6/0.4 favors BM25 (precise token matching) with semantic uplift.
123
+ """
124
+ if not query_vector or not embedded_vectors:
125
+ return bm25_results
126
+
127
+ # Normalize BM25 scores to [0, 1] range
128
+ scores = [r["score"] for r in bm25_results]
129
+ if not scores:
130
+ return bm25_results
131
+ max_score = max(scores)
132
+ min_score = min(scores)
133
+ score_range = max_score - min_score if max_score != min_score else 1.0
134
+
135
+ for r in bm25_results:
136
+ bm25_norm = (r["score"] - min_score) / score_range
137
+
138
+ vec = embedded_vectors.get(r["path"])
139
+ cosine = cosine_similarity(query_vector, vec) if vec else 0.0
140
+
141
+ # Weighted blend
142
+ r["score"] = round(bm25_weight * bm25_norm + embed_weight * cosine, 4)
143
+ r["_bm25_norm"] = round(bm25_norm, 4)
144
+ r["_cosine"] = round(cosine, 4)
145
+
146
+ bm25_results.sort(key=lambda r: r["score"], reverse=True)
147
+ return bm25_results
@@ -0,0 +1,246 @@
1
+ """Search quality evaluation — measure query-to-result accuracy."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import os
7
+
8
+ from .search import SearchEngine
9
+
10
+
11
+ def load_queries(path: str) -> list[dict]:
12
+ """Load evaluation queries from a JSON file.
13
+
14
+ Schema:
15
+ [
16
+ {"query": "login auth", "expected": "auth/session.py", "min_rank": 1},
17
+ {"query": "login auth", "expected": "auth/session.py",
18
+ "scope": {"start": 14, "end": 27}, "assert": "scope"}
19
+ ]
20
+
21
+ Fields:
22
+ query — search query string
23
+ expected — file path that should appear in results
24
+ min_rank — required ranking position (default 1)
25
+ scope — line range the result should cover: {start, end}
26
+ assert — "file" (default), "scope", or "exact"
27
+ """
28
+ with open(path, "r", encoding="utf-8") as f:
29
+ queries = json.load(f)
30
+
31
+ if not isinstance(queries, list):
32
+ raise ValueError("Queries file must be a JSON array")
33
+
34
+ for i, q in enumerate(queries):
35
+ for key in ("query", "expected"):
36
+ if key not in q:
37
+ raise ValueError(f"Query item {i}: missing required key '{key}'")
38
+ q.setdefault("min_rank", 1)
39
+ q.setdefault("assert", "file")
40
+ if q["assert"] not in ("file", "scope", "exact"):
41
+ raise ValueError(
42
+ f"Query item {i}: assert must be 'file', 'scope', or 'exact'"
43
+ )
44
+ if q["assert"] in ("scope", "exact") and "scope" not in q:
45
+ raise ValueError(
46
+ f"Query item {i}: assert='{q['assert']}' requires a 'scope' field"
47
+ )
48
+
49
+ return queries
50
+
51
+
52
+ def run_eval(project_path: str, queries: list[dict],
53
+ limit: int = 10, exclude: list[str] | None = None,
54
+ scope: str | None = None) -> dict:
55
+ """Run search evaluation and return per-query results + aggregate metrics.
56
+
57
+ Returns: {
58
+ "project_path": str,
59
+ "num_queries": int,
60
+ "results": [{query, expected, rank, score, status, mode}],
61
+ "precision_at_1": float,
62
+ "recall_at_5": float,
63
+ "mrr": float,
64
+ "passed": int,
65
+ "failed": int,
66
+ }
67
+ """
68
+ engine = SearchEngine(project_path)
69
+
70
+ query_results = []
71
+ for q in queries:
72
+ result = _eval_one(engine, q, limit, exclude, scope)
73
+ query_results.append(result)
74
+
75
+ metrics = _compute_metrics(query_results)
76
+ metrics["project_path"] = project_path
77
+ metrics["num_queries"] = len(queries)
78
+ metrics["results"] = query_results
79
+
80
+ return metrics
81
+
82
+
83
+ def _eval_one(engine: SearchEngine, query_def: dict,
84
+ limit: int, exclude: list[str] | None,
85
+ search_scope: str | None) -> dict:
86
+ """Run a single query and check if expected file/scope appears in results."""
87
+ search_results = engine.search(
88
+ query_def["query"], limit=limit, exclude=exclude, scope=search_scope
89
+ )
90
+
91
+ expected_file = query_def["expected"]
92
+ min_rank = query_def.get("min_rank", 1)
93
+ assert_mode = query_def.get("assert", "file")
94
+ expected_scope = query_def.get("scope")
95
+
96
+ found = False
97
+ rank = None
98
+ score = None
99
+ matched = []
100
+ result_lines = (0, 0)
101
+
102
+ for i, r in enumerate(search_results):
103
+ if r["path"] == expected_file:
104
+ found = True
105
+ rank = i + 1
106
+ score = r["score"]
107
+ matched = r.get("matched_tokens", [])
108
+ result_lines = (r.get("line_start", 0), r.get("line_end", 0))
109
+ break
110
+
111
+ # File-level check
112
+ if not found:
113
+ return {
114
+ "query": query_def["query"],
115
+ "expected": expected_file,
116
+ "assert": assert_mode,
117
+ "scope": expected_scope,
118
+ "min_rank": min_rank,
119
+ "rank": None,
120
+ "score": None,
121
+ "matched_tokens": [],
122
+ "line_start": 0,
123
+ "line_end": 0,
124
+ "status": "fail",
125
+ }
126
+
127
+ if found and rank > min_rank:
128
+ return {
129
+ "query": query_def["query"],
130
+ "expected": expected_file,
131
+ "assert": assert_mode,
132
+ "scope": expected_scope,
133
+ "min_rank": min_rank,
134
+ "rank": rank,
135
+ "score": score,
136
+ "matched_tokens": matched,
137
+ "line_start": result_lines[0],
138
+ "line_end": result_lines[1],
139
+ "status": f"found@{rank} (needed ≤{min_rank})",
140
+ }
141
+
142
+ # File found at correct rank. Check scope if required.
143
+ scope_status = None
144
+ if assert_mode in ("scope", "exact") and expected_scope:
145
+ scope_status = _check_scope_overlap(result_lines, expected_scope, assert_mode)
146
+
147
+ if scope_status:
148
+ return {
149
+ "query": query_def["query"],
150
+ "expected": expected_file,
151
+ "assert": assert_mode,
152
+ "scope": expected_scope,
153
+ "min_rank": min_rank,
154
+ "rank": rank,
155
+ "score": score,
156
+ "matched_tokens": matched,
157
+ "line_start": result_lines[0],
158
+ "line_end": result_lines[1],
159
+ "status": scope_status,
160
+ }
161
+
162
+ # File-level pass
163
+ suffix = f" (top-{min_rank})" if min_rank > 1 else ""
164
+ return {
165
+ "query": query_def["query"],
166
+ "expected": expected_file,
167
+ "assert": assert_mode,
168
+ "scope": expected_scope,
169
+ "min_rank": min_rank,
170
+ "rank": rank,
171
+ "score": score,
172
+ "matched_tokens": matched,
173
+ "line_start": result_lines[0],
174
+ "line_end": result_lines[1],
175
+ "status": f"pass{suffix}",
176
+ }
177
+
178
+
179
+ def _check_scope_overlap(
180
+ result_lines: tuple[int, int],
181
+ expected_scope: dict,
182
+ mode: str,
183
+ ) -> str | None:
184
+ """Check if result line range overlaps expected scope. Returns status or None."""
185
+ r_start, r_end = result_lines
186
+ e_start = expected_scope.get("start", 0)
187
+ e_end = expected_scope.get("end", 0)
188
+
189
+ if r_start == 0 or e_start == 0:
190
+ return "fail (no line data)"
191
+
192
+ overlap_start = max(r_start, e_start)
193
+ overlap_end = min(r_end, e_end)
194
+
195
+ if overlap_end <= overlap_start:
196
+ return "fail (no scope overlap)"
197
+
198
+ overlap_lines = overlap_end - overlap_start
199
+ expected_lines = e_end - e_start
200
+ ratio = overlap_lines / expected_lines if expected_lines > 0 else 0
201
+
202
+ if mode == "exact":
203
+ if ratio >= 0.8:
204
+ return f"pass (scope {ratio:.0%})"
205
+ return f"found (scope {ratio:.0%} < 80%)"
206
+ elif mode == "scope":
207
+ return "pass (scope overlap)"
208
+
209
+ return None
210
+
211
+
212
+ def _compute_metrics(query_results: list[dict]) -> dict:
213
+ """Compute Precision@1, MRR, Recall@5 from per-query results."""
214
+ n = len(query_results)
215
+ if n == 0:
216
+ return {
217
+ "precision_at_1": 0.0,
218
+ "recall_at_5": 0.0,
219
+ "mrr": 0.0,
220
+ "passed": 0,
221
+ "failed": 0,
222
+ }
223
+
224
+ p1_count = 0
225
+ recall5_count = 0
226
+ reciprocal_sum = 0.0
227
+ passed = 0
228
+
229
+ for r in query_results:
230
+ rank = r["rank"]
231
+ if rank == 1:
232
+ p1_count += 1
233
+ if rank is not None and rank <= 5:
234
+ recall5_count += 1
235
+ if rank is not None:
236
+ reciprocal_sum += 1.0 / rank
237
+ if r["status"].startswith("pass"):
238
+ passed += 1
239
+
240
+ return {
241
+ "precision_at_1": round(p1_count / n, 4),
242
+ "recall_at_5": round(recall5_count / n, 4),
243
+ "mrr": round(reciprocal_sum / n, 4),
244
+ "passed": passed,
245
+ "failed": n - passed,
246
+ }