codexlr8 0.0.2__tar.gz → 0.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codexlr8-0.0.2 → codexlr8-0.0.3}/PKG-INFO +23 -1
- {codexlr8-0.0.2 → codexlr8-0.0.3}/README.md +20 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/pyproject.toml +4 -1
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/__init__.py +1 -1
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/cli.py +136 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/config.py +6 -0
- codexlr8-0.0.3/src/codexlr8/embeddings.py +147 -0
- codexlr8-0.0.3/src/codexlr8/eval.py +246 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/search.py +157 -4
- codexlr8-0.0.3/src/codexlr8/train.py +175 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/PKG-INFO +23 -1
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/SOURCES.txt +4 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/requires.txt +3 -0
- codexlr8-0.0.3/tests/test_eval.py +209 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_search.py +24 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/LICENSE +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/setup.cfg +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/mcp_server.py +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/meta.py +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8/scanner.py +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/dependency_links.txt +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/entry_points.txt +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/src/codexlr8.egg-info/top_level.txt +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_mcp_server.py +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_meta.py +0 -0
- {codexlr8-0.0.2 → codexlr8-0.0.3}/tests/test_scanner.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: codexlr8
|
|
3
|
-
Version: 0.0.
|
|
3
|
+
Version: 0.0.3
|
|
4
4
|
Summary: A codebase search engine for LLM coding agents
|
|
5
5
|
Author-email: Sadig Akhund <sadigaxund@gmail.com>
|
|
6
6
|
License: Apache-2.0
|
|
@@ -25,6 +25,8 @@ Requires-Dist: mcp>=1.0
|
|
|
25
25
|
Provides-Extra: dev
|
|
26
26
|
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
27
27
|
Requires-Dist: pytest-cov>=4.0; extra == "dev"
|
|
28
|
+
Provides-Extra: embeddings
|
|
29
|
+
Requires-Dist: sentence-transformers>=3.0; extra == "embeddings"
|
|
28
30
|
Dynamic: license-file
|
|
29
31
|
|
|
30
32
|
# CodeXLR8
|
|
@@ -95,6 +97,26 @@ codexlr8 search . "axes not hiding" --explain
|
|
|
95
97
|
# Combine both — group, then scope to drill down
|
|
96
98
|
```
|
|
97
99
|
|
|
100
|
+
### Search Quality & Fine-Tuning
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Measure search accuracy against known queries
|
|
104
|
+
codexlr8 eval . --queries queries.json
|
|
105
|
+
# Precision@1: 67%, MRR: 0.83, Recall@5: 67%
|
|
106
|
+
|
|
107
|
+
# Typos are auto-corrected (fuzzy fallback on zero results)
|
|
108
|
+
codexlr8 search . "funtion" # → corrects to "function"
|
|
109
|
+
|
|
110
|
+
# Opt-in embeddings: hybrid BM25 + semantic search
|
|
111
|
+
# pip install codexlr8[embeddings]
|
|
112
|
+
# set embeddings.enabled: true in .codexlr8.yaml
|
|
113
|
+
|
|
114
|
+
# Fine-tune a model on YOUR codebase vocabulary
|
|
115
|
+
codexlr8 recommend-model . # picks the right model for your size
|
|
116
|
+
codexlr8 train . # TSDAE training, 5-45min on CPU
|
|
117
|
+
codexlr8 eval . # measure improvement
|
|
118
|
+
```
|
|
119
|
+
|
|
98
120
|
## .meta.yaml Sidecars
|
|
99
121
|
|
|
100
122
|
Optional YAML files next to source files, created by `codexlr8 init`:
|
|
@@ -66,6 +66,26 @@ codexlr8 search . "axes not hiding" --explain
|
|
|
66
66
|
# Combine both — group, then scope to drill down
|
|
67
67
|
```
|
|
68
68
|
|
|
69
|
+
### Search Quality & Fine-Tuning
|
|
70
|
+
|
|
71
|
+
```bash
|
|
72
|
+
# Measure search accuracy against known queries
|
|
73
|
+
codexlr8 eval . --queries queries.json
|
|
74
|
+
# Precision@1: 67%, MRR: 0.83, Recall@5: 67%
|
|
75
|
+
|
|
76
|
+
# Typos are auto-corrected (fuzzy fallback on zero results)
|
|
77
|
+
codexlr8 search . "funtion" # → corrects to "function"
|
|
78
|
+
|
|
79
|
+
# Opt-in embeddings: hybrid BM25 + semantic search
|
|
80
|
+
# pip install codexlr8[embeddings]
|
|
81
|
+
# set embeddings.enabled: true in .codexlr8.yaml
|
|
82
|
+
|
|
83
|
+
# Fine-tune a model on YOUR codebase vocabulary
|
|
84
|
+
codexlr8 recommend-model . # picks the right model for your size
|
|
85
|
+
codexlr8 train . # TSDAE training, 5-45min on CPU
|
|
86
|
+
codexlr8 eval . # measure improvement
|
|
87
|
+
```
|
|
88
|
+
|
|
69
89
|
## .meta.yaml Sidecars
|
|
70
90
|
|
|
71
91
|
Optional YAML files next to source files, created by `codexlr8 init`:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "codexlr8"
|
|
7
|
-
version = "0.0.
|
|
7
|
+
version = "0.0.3"
|
|
8
8
|
description = "A codebase search engine for LLM coding agents"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.10"
|
|
@@ -37,6 +37,9 @@ dev = [
|
|
|
37
37
|
"pytest>=7.0",
|
|
38
38
|
"pytest-cov>=4.0",
|
|
39
39
|
]
|
|
40
|
+
embeddings = [
|
|
41
|
+
"sentence-transformers>=3.0",
|
|
42
|
+
]
|
|
40
43
|
|
|
41
44
|
[project.scripts]
|
|
42
45
|
codexlr8 = "codexlr8.cli:main"
|
|
@@ -202,6 +202,140 @@ def status(project_path: str):
|
|
|
202
202
|
click.secho(f" Warning: {state['warning']}", fg="yellow")
|
|
203
203
|
|
|
204
204
|
|
|
205
|
+
@main.command()
|
|
206
|
+
@click.argument("project_path", type=click.Path(exists=True, file_okay=False))
|
|
207
|
+
@click.option("--queries", "-q", required=True,
|
|
208
|
+
type=click.Path(exists=True, dir_okay=False),
|
|
209
|
+
help="Path to JSON file with query definitions")
|
|
210
|
+
@click.option("--limit", "-n", default=10,
|
|
211
|
+
help="Max results per query (default: 10)")
|
|
212
|
+
def eval_cmd(project_path: str, queries: str, limit: int):
|
|
213
|
+
"""Evaluate search quality against a query set.
|
|
214
|
+
|
|
215
|
+
QUERIES is a JSON file with an array of query objects:
|
|
216
|
+
[{"query": "...", "expected": "path/to/file.py", "min_rank": 1}]
|
|
217
|
+
|
|
218
|
+
Outputs a per-query pass/fail table and aggregate metrics:
|
|
219
|
+
Precision@1, Mean Reciprocal Rank (MRR), Recall@5.
|
|
220
|
+
"""
|
|
221
|
+
from .eval import load_queries, run_eval
|
|
222
|
+
import json
|
|
223
|
+
|
|
224
|
+
try:
|
|
225
|
+
query_defs = load_queries(queries)
|
|
226
|
+
except (json.JSONDecodeError, ValueError) as e:
|
|
227
|
+
raise click.ClickException(f"Invalid queries file: {e}")
|
|
228
|
+
|
|
229
|
+
if not query_defs:
|
|
230
|
+
raise click.ClickException("Queries file contains no queries.")
|
|
231
|
+
|
|
232
|
+
metrics = run_eval(project_path, query_defs, limit=limit)
|
|
233
|
+
|
|
234
|
+
# Per-query table
|
|
235
|
+
click.secho(" Query Expected Mode Lines Rank Score Status", fg="cyan", bold=True)
|
|
236
|
+
click.secho(" " + "─" * 105, fg="cyan")
|
|
237
|
+
|
|
238
|
+
for r in metrics["results"]:
|
|
239
|
+
query_str = f'"{r["query"]}"'.ljust(34)
|
|
240
|
+
expected_str = r["expected"].ljust(20)
|
|
241
|
+
mode_str = r.get("assert", "file").ljust(7)
|
|
242
|
+
lines_str = ""
|
|
243
|
+
if r.get("line_start"):
|
|
244
|
+
lines_str = f"{r['line_start']}-{r['line_end']}".ljust(8)
|
|
245
|
+
else:
|
|
246
|
+
lines_str = "—".ljust(8)
|
|
247
|
+
rank_str = str(r["rank"]).ljust(6) if r["rank"] else "— "
|
|
248
|
+
score_str = f'{r["score"]:.2f}'.ljust(8) if r["score"] else "— "
|
|
249
|
+
status = r["status"]
|
|
250
|
+
|
|
251
|
+
if status.startswith("pass"):
|
|
252
|
+
status_style = {"fg": "green"}
|
|
253
|
+
elif "found" in status:
|
|
254
|
+
status_style = {"fg": "yellow"}
|
|
255
|
+
else:
|
|
256
|
+
status_style = {"fg": "red"}
|
|
257
|
+
|
|
258
|
+
click.echo(f" {query_str} {expected_str} {mode_str} {lines_str} {rank_str} {score_str} {click.style(status, **status_style)}")
|
|
259
|
+
|
|
260
|
+
# Aggregate metrics
|
|
261
|
+
click.echo()
|
|
262
|
+
click.echo(click.style(" " + "─" * 40, fg="cyan"))
|
|
263
|
+
click.secho(f" Precision@1: {metrics['precision_at_1']:.2%} "
|
|
264
|
+
f"({metrics['passed']}/{metrics['num_queries']} passed)", fg="green")
|
|
265
|
+
click.secho(f" MRR: {metrics['mrr']:.4f}", fg="green")
|
|
266
|
+
click.secho(f" Recall@5: {metrics['recall_at_5']:.2%}", fg="green")
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
@main.command()
|
|
270
|
+
@click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
|
|
271
|
+
@click.option("--model", "-m", default="all-MiniLM-L6-v2",
|
|
272
|
+
help="Embedding model to fine-tune")
|
|
273
|
+
@click.option("--epochs", "-e", default=3,
|
|
274
|
+
help="Training epochs (default: 3)")
|
|
275
|
+
@click.option("--incremental", "-i", is_flag=True, default=False,
|
|
276
|
+
help="Fine-tune only on changed files")
|
|
277
|
+
def train(project_path: str, model: str, epochs: int, incremental: bool):
|
|
278
|
+
"""Fine-tune an embedding model on this codebase for better search accuracy.
|
|
279
|
+
|
|
280
|
+
Uses TSDAE (denoising auto-encoder) to adapt a pretrained model to
|
|
281
|
+
your codebase's vocabulary. The fine-tuned model is saved to
|
|
282
|
+
.codexlr8_model/ and referenced in .codexlr8.yaml.
|
|
283
|
+
|
|
284
|
+
Requirements: pip install codexlr8[embeddings]
|
|
285
|
+
"""
|
|
286
|
+
try:
|
|
287
|
+
from .train import train_model
|
|
288
|
+
except ImportError as e:
|
|
289
|
+
raise click.ClickException(
|
|
290
|
+
"Training requires 'pip install codexlr8[embeddings]'"
|
|
291
|
+
) from e
|
|
292
|
+
|
|
293
|
+
click.echo()
|
|
294
|
+
click.secho(" Training embedding model on this codebase...", fg="cyan", bold=True)
|
|
295
|
+
click.echo(f" Model: {model}")
|
|
296
|
+
click.echo(f" Epochs: {epochs}")
|
|
297
|
+
click.echo()
|
|
298
|
+
|
|
299
|
+
try:
|
|
300
|
+
result = train_model(project_path, model_name=model,
|
|
301
|
+
epochs=epochs, incremental=incremental)
|
|
302
|
+
except ValueError as e:
|
|
303
|
+
raise click.ClickException(str(e))
|
|
304
|
+
|
|
305
|
+
dur = result["duration_sec"]
|
|
306
|
+
dur_str = f"{dur}s" if dur < 60 else f"{dur // 60}m{dur % 60}s"
|
|
307
|
+
|
|
308
|
+
click.echo()
|
|
309
|
+
click.secho(f" Trained on {result['num_examples']} files in {dur_str}", fg="green")
|
|
310
|
+
click.secho(f" Model saved to {result['model_path']}", fg="green")
|
|
311
|
+
click.secho(f" Embeddings enabled in .codexlr8.yaml", fg="green")
|
|
312
|
+
click.echo()
|
|
313
|
+
click.secho(" Run 'codexlr8 eval .' to measure improvement.", dim=True)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
@main.command()
|
|
317
|
+
@click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
|
|
318
|
+
def recommend_model_cmd(project_path: str):
|
|
319
|
+
"""Suggest the best embedding model for this codebase size."""
|
|
320
|
+
try:
|
|
321
|
+
from .train import recommend_model
|
|
322
|
+
except ImportError as e:
|
|
323
|
+
raise click.ClickException(
|
|
324
|
+
"Requires 'pip install codexlr8[embeddings]'"
|
|
325
|
+
) from e
|
|
326
|
+
|
|
327
|
+
rec = recommend_model(project_path)
|
|
328
|
+
|
|
329
|
+
click.echo()
|
|
330
|
+
click.secho(f" Codebase: {rec['num_files']} files, ~{rec['est_tokens']:,} tokens", fg="cyan")
|
|
331
|
+
click.echo()
|
|
332
|
+
click.secho(f" Recommended: {rec['model']} ({rec['param_count']})", fg="green", bold=True)
|
|
333
|
+
click.echo(f" Est. training time: {rec['est_training_time']}")
|
|
334
|
+
click.echo(f" Expected quality gain: {rec.get('quality_gain', '+5-12% MRR')}")
|
|
335
|
+
click.echo()
|
|
336
|
+
click.secho(" Run 'codexlr8 train .' to start training.", dim=True)
|
|
337
|
+
|
|
338
|
+
|
|
205
339
|
@main.command()
|
|
206
340
|
@click.argument("project_path", type=click.Path(exists=True, file_okay=False), default=".")
|
|
207
341
|
def setup(project_path: str):
|
|
@@ -702,6 +836,8 @@ Exclude patterns are globs that match file paths. Use `*` for wildcards.
|
|
|
702
836
|
| Search within a directory | `codebase_search(query="...", scope="src/")` |
|
|
703
837
|
| Cluster results by directory | Shell: `codexlr8 search . "query" --grouped` |
|
|
704
838
|
| Diagnose query terms | Shell: `codexlr8 search . "query" --explain` |
|
|
839
|
+
| Measure search accuracy | Shell: `codexlr8 eval . --queries q.json` |
|
|
840
|
+
| Fine-tune embeddings | Shell: `codexlr8 train .` (needs `[embeddings]` extra) |
|
|
705
841
|
| Build/update index | `codebase_index(incremental=true)` |
|
|
706
842
|
| Check metadata coverage | Shell: `codexlr8 status .` |
|
|
707
843
|
| Bootstrap missing sidecars | Shell: `codexlr8 init .` |
|
|
@@ -24,6 +24,12 @@ def load_config(project_path: str) -> dict:
|
|
|
24
24
|
def _defaults() -> dict:
|
|
25
25
|
return {
|
|
26
26
|
"root": ".",
|
|
27
|
+
"fuzzy": True,
|
|
28
|
+
"embeddings": {
|
|
29
|
+
"enabled": False,
|
|
30
|
+
"model": "all-MiniLM-L6-v2",
|
|
31
|
+
"bm25_weight": 0.6,
|
|
32
|
+
},
|
|
27
33
|
"include": [],
|
|
28
34
|
"exclude": [
|
|
29
35
|
"tests/*",
|
|
@@ -0,0 +1,147 @@
|
|
|
1
|
+
"""Embedding provider — optional semantic search layer.
|
|
2
|
+
|
|
3
|
+
Requires optional dependencies: pip install codexlr8[embeddings]
|
|
4
|
+
Provides cosine-similarity reranking on top of BM25 results.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
import math
|
|
11
|
+
import os
|
|
12
|
+
import sqlite3
|
|
13
|
+
|
|
14
|
+
_EMBEDDING_AVAILABLE = False
|
|
15
|
+
_SentenceTransformer = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _check_deps() -> bool:
|
|
19
|
+
"""Lazy-import sentence-transformers. Returns True if available."""
|
|
20
|
+
global _EMBEDDING_AVAILABLE, _SentenceTransformer
|
|
21
|
+
if _EMBEDDING_AVAILABLE:
|
|
22
|
+
return True
|
|
23
|
+
try:
|
|
24
|
+
from sentence_transformers import SentenceTransformer as ST
|
|
25
|
+
_SentenceTransformer = ST
|
|
26
|
+
_EMBEDDING_AVAILABLE = True
|
|
27
|
+
return True
|
|
28
|
+
except ImportError:
|
|
29
|
+
return False
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class EmbeddingProvider:
|
|
33
|
+
"""Lazy-loading sentence-transformers model for embedding text."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
|
|
36
|
+
self.model_name = model_name
|
|
37
|
+
self._model = None
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def model(self):
|
|
41
|
+
if self._model is None:
|
|
42
|
+
if not _check_deps():
|
|
43
|
+
raise ImportError(
|
|
44
|
+
"Embeddings require 'pip install codexlr8[embeddings]' "
|
|
45
|
+
"(installs sentence-transformers)"
|
|
46
|
+
)
|
|
47
|
+
self._model = _SentenceTransformer(self.model_name)
|
|
48
|
+
return self._model
|
|
49
|
+
|
|
50
|
+
@property
|
|
51
|
+
def dims(self) -> int:
|
|
52
|
+
return self.model.get_sentence_embedding_dimension()
|
|
53
|
+
|
|
54
|
+
def embed(self, texts: list[str], batch_size: int = 32) -> list[list[float]]:
|
|
55
|
+
"""Encode texts into normalized embedding vectors."""
|
|
56
|
+
if not texts:
|
|
57
|
+
return []
|
|
58
|
+
embeddings = self.model.encode(
|
|
59
|
+
texts,
|
|
60
|
+
batch_size=batch_size,
|
|
61
|
+
normalize_embeddings=True,
|
|
62
|
+
show_progress_bar=False,
|
|
63
|
+
)
|
|
64
|
+
return embeddings.tolist()
|
|
65
|
+
|
|
66
|
+
def embed_file(self, path: str, content: str, summary: str = "",
|
|
67
|
+
tags: list[str] | None = None) -> list[float]:
|
|
68
|
+
"""Embed a single file combining path, summary, tags, and content preview."""
|
|
69
|
+
tags_str = " ".join(tags) if tags else ""
|
|
70
|
+
# Combine metadata with first 2000 chars of content for context
|
|
71
|
+
text = f"{path} {summary} {tags_str} {content[:2000]}"
|
|
72
|
+
return self.embed([text])[0]
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def cosine_similarity(a: list[float], b: list[float]) -> float:
|
|
76
|
+
"""Compute cosine similarity between two normalized vectors."""
|
|
77
|
+
if not a or not b:
|
|
78
|
+
return 0.0
|
|
79
|
+
dot = sum(x * y for x, y in zip(a, b))
|
|
80
|
+
# Clamp to [-1, 1] for floating-point safety
|
|
81
|
+
return max(-1.0, min(1.0, dot))
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def init_embeddings_table(conn: sqlite3.Connection):
|
|
85
|
+
"""Create the embeddings table if it doesn't exist."""
|
|
86
|
+
conn.execute("""
|
|
87
|
+
CREATE TABLE IF NOT EXISTS embeddings (
|
|
88
|
+
path TEXT PRIMARY KEY,
|
|
89
|
+
vector TEXT,
|
|
90
|
+
dims INTEGER,
|
|
91
|
+
embedded_at TEXT
|
|
92
|
+
)
|
|
93
|
+
""")
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def store_embeddings(conn: sqlite3.Connection, path: str,
|
|
97
|
+
vector: list[float], embedded_at: str):
|
|
98
|
+
"""Store or update an embedding vector for a file."""
|
|
99
|
+
conn.execute(
|
|
100
|
+
"INSERT OR REPLACE INTO embeddings (path, vector, dims, embedded_at) "
|
|
101
|
+
"VALUES (?, ?, ?, ?)",
|
|
102
|
+
(path, json.dumps(vector), len(vector), embedded_at),
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def load_embeddings(conn: sqlite3.Connection) -> dict[str, list[float]]:
|
|
107
|
+
"""Load all stored embeddings from the database."""
|
|
108
|
+
rows = conn.execute("SELECT path, vector FROM embeddings").fetchall()
|
|
109
|
+
return {row["path"]: json.loads(row["vector"]) for row in rows}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def hybrid_rerank(
|
|
113
|
+
bm25_results: list[dict],
|
|
114
|
+
embedded_vectors: dict[str, list[float]],
|
|
115
|
+
query_vector: list[float],
|
|
116
|
+
bm25_weight: float = 0.6,
|
|
117
|
+
embed_weight: float = 0.4,
|
|
118
|
+
) -> list[dict]:
|
|
119
|
+
"""Merge BM25 scores with cosine similarity and re-rank results.
|
|
120
|
+
|
|
121
|
+
bm25_weight / embed_weight control the blending.
|
|
122
|
+
Default 0.6/0.4 favors BM25 (precise token matching) with semantic uplift.
|
|
123
|
+
"""
|
|
124
|
+
if not query_vector or not embedded_vectors:
|
|
125
|
+
return bm25_results
|
|
126
|
+
|
|
127
|
+
# Normalize BM25 scores to [0, 1] range
|
|
128
|
+
scores = [r["score"] for r in bm25_results]
|
|
129
|
+
if not scores:
|
|
130
|
+
return bm25_results
|
|
131
|
+
max_score = max(scores)
|
|
132
|
+
min_score = min(scores)
|
|
133
|
+
score_range = max_score - min_score if max_score != min_score else 1.0
|
|
134
|
+
|
|
135
|
+
for r in bm25_results:
|
|
136
|
+
bm25_norm = (r["score"] - min_score) / score_range
|
|
137
|
+
|
|
138
|
+
vec = embedded_vectors.get(r["path"])
|
|
139
|
+
cosine = cosine_similarity(query_vector, vec) if vec else 0.0
|
|
140
|
+
|
|
141
|
+
# Weighted blend
|
|
142
|
+
r["score"] = round(bm25_weight * bm25_norm + embed_weight * cosine, 4)
|
|
143
|
+
r["_bm25_norm"] = round(bm25_norm, 4)
|
|
144
|
+
r["_cosine"] = round(cosine, 4)
|
|
145
|
+
|
|
146
|
+
bm25_results.sort(key=lambda r: r["score"], reverse=True)
|
|
147
|
+
return bm25_results
|
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
"""Search quality evaluation — measure query-to-result accuracy."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
|
|
8
|
+
from .search import SearchEngine
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def load_queries(path: str) -> list[dict]:
|
|
12
|
+
"""Load evaluation queries from a JSON file.
|
|
13
|
+
|
|
14
|
+
Schema:
|
|
15
|
+
[
|
|
16
|
+
{"query": "login auth", "expected": "auth/session.py", "min_rank": 1},
|
|
17
|
+
{"query": "login auth", "expected": "auth/session.py",
|
|
18
|
+
"scope": {"start": 14, "end": 27}, "assert": "scope"}
|
|
19
|
+
]
|
|
20
|
+
|
|
21
|
+
Fields:
|
|
22
|
+
query — search query string
|
|
23
|
+
expected — file path that should appear in results
|
|
24
|
+
min_rank — required ranking position (default 1)
|
|
25
|
+
scope — line range the result should cover: {start, end}
|
|
26
|
+
assert — "file" (default), "scope", or "exact"
|
|
27
|
+
"""
|
|
28
|
+
with open(path, "r", encoding="utf-8") as f:
|
|
29
|
+
queries = json.load(f)
|
|
30
|
+
|
|
31
|
+
if not isinstance(queries, list):
|
|
32
|
+
raise ValueError("Queries file must be a JSON array")
|
|
33
|
+
|
|
34
|
+
for i, q in enumerate(queries):
|
|
35
|
+
for key in ("query", "expected"):
|
|
36
|
+
if key not in q:
|
|
37
|
+
raise ValueError(f"Query item {i}: missing required key '{key}'")
|
|
38
|
+
q.setdefault("min_rank", 1)
|
|
39
|
+
q.setdefault("assert", "file")
|
|
40
|
+
if q["assert"] not in ("file", "scope", "exact"):
|
|
41
|
+
raise ValueError(
|
|
42
|
+
f"Query item {i}: assert must be 'file', 'scope', or 'exact'"
|
|
43
|
+
)
|
|
44
|
+
if q["assert"] in ("scope", "exact") and "scope" not in q:
|
|
45
|
+
raise ValueError(
|
|
46
|
+
f"Query item {i}: assert='{q['assert']}' requires a 'scope' field"
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return queries
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def run_eval(project_path: str, queries: list[dict],
|
|
53
|
+
limit: int = 10, exclude: list[str] | None = None,
|
|
54
|
+
scope: str | None = None) -> dict:
|
|
55
|
+
"""Run search evaluation and return per-query results + aggregate metrics.
|
|
56
|
+
|
|
57
|
+
Returns: {
|
|
58
|
+
"project_path": str,
|
|
59
|
+
"num_queries": int,
|
|
60
|
+
"results": [{query, expected, rank, score, status, mode}],
|
|
61
|
+
"precision_at_1": float,
|
|
62
|
+
"recall_at_5": float,
|
|
63
|
+
"mrr": float,
|
|
64
|
+
"passed": int,
|
|
65
|
+
"failed": int,
|
|
66
|
+
}
|
|
67
|
+
"""
|
|
68
|
+
engine = SearchEngine(project_path)
|
|
69
|
+
|
|
70
|
+
query_results = []
|
|
71
|
+
for q in queries:
|
|
72
|
+
result = _eval_one(engine, q, limit, exclude, scope)
|
|
73
|
+
query_results.append(result)
|
|
74
|
+
|
|
75
|
+
metrics = _compute_metrics(query_results)
|
|
76
|
+
metrics["project_path"] = project_path
|
|
77
|
+
metrics["num_queries"] = len(queries)
|
|
78
|
+
metrics["results"] = query_results
|
|
79
|
+
|
|
80
|
+
return metrics
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _eval_one(engine: SearchEngine, query_def: dict,
|
|
84
|
+
limit: int, exclude: list[str] | None,
|
|
85
|
+
search_scope: str | None) -> dict:
|
|
86
|
+
"""Run a single query and check if expected file/scope appears in results."""
|
|
87
|
+
search_results = engine.search(
|
|
88
|
+
query_def["query"], limit=limit, exclude=exclude, scope=search_scope
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
expected_file = query_def["expected"]
|
|
92
|
+
min_rank = query_def.get("min_rank", 1)
|
|
93
|
+
assert_mode = query_def.get("assert", "file")
|
|
94
|
+
expected_scope = query_def.get("scope")
|
|
95
|
+
|
|
96
|
+
found = False
|
|
97
|
+
rank = None
|
|
98
|
+
score = None
|
|
99
|
+
matched = []
|
|
100
|
+
result_lines = (0, 0)
|
|
101
|
+
|
|
102
|
+
for i, r in enumerate(search_results):
|
|
103
|
+
if r["path"] == expected_file:
|
|
104
|
+
found = True
|
|
105
|
+
rank = i + 1
|
|
106
|
+
score = r["score"]
|
|
107
|
+
matched = r.get("matched_tokens", [])
|
|
108
|
+
result_lines = (r.get("line_start", 0), r.get("line_end", 0))
|
|
109
|
+
break
|
|
110
|
+
|
|
111
|
+
# File-level check
|
|
112
|
+
if not found:
|
|
113
|
+
return {
|
|
114
|
+
"query": query_def["query"],
|
|
115
|
+
"expected": expected_file,
|
|
116
|
+
"assert": assert_mode,
|
|
117
|
+
"scope": expected_scope,
|
|
118
|
+
"min_rank": min_rank,
|
|
119
|
+
"rank": None,
|
|
120
|
+
"score": None,
|
|
121
|
+
"matched_tokens": [],
|
|
122
|
+
"line_start": 0,
|
|
123
|
+
"line_end": 0,
|
|
124
|
+
"status": "fail",
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if found and rank > min_rank:
|
|
128
|
+
return {
|
|
129
|
+
"query": query_def["query"],
|
|
130
|
+
"expected": expected_file,
|
|
131
|
+
"assert": assert_mode,
|
|
132
|
+
"scope": expected_scope,
|
|
133
|
+
"min_rank": min_rank,
|
|
134
|
+
"rank": rank,
|
|
135
|
+
"score": score,
|
|
136
|
+
"matched_tokens": matched,
|
|
137
|
+
"line_start": result_lines[0],
|
|
138
|
+
"line_end": result_lines[1],
|
|
139
|
+
"status": f"found@{rank} (needed ≤{min_rank})",
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
# File found at correct rank. Check scope if required.
|
|
143
|
+
scope_status = None
|
|
144
|
+
if assert_mode in ("scope", "exact") and expected_scope:
|
|
145
|
+
scope_status = _check_scope_overlap(result_lines, expected_scope, assert_mode)
|
|
146
|
+
|
|
147
|
+
if scope_status:
|
|
148
|
+
return {
|
|
149
|
+
"query": query_def["query"],
|
|
150
|
+
"expected": expected_file,
|
|
151
|
+
"assert": assert_mode,
|
|
152
|
+
"scope": expected_scope,
|
|
153
|
+
"min_rank": min_rank,
|
|
154
|
+
"rank": rank,
|
|
155
|
+
"score": score,
|
|
156
|
+
"matched_tokens": matched,
|
|
157
|
+
"line_start": result_lines[0],
|
|
158
|
+
"line_end": result_lines[1],
|
|
159
|
+
"status": scope_status,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
# File-level pass
|
|
163
|
+
suffix = f" (top-{min_rank})" if min_rank > 1 else ""
|
|
164
|
+
return {
|
|
165
|
+
"query": query_def["query"],
|
|
166
|
+
"expected": expected_file,
|
|
167
|
+
"assert": assert_mode,
|
|
168
|
+
"scope": expected_scope,
|
|
169
|
+
"min_rank": min_rank,
|
|
170
|
+
"rank": rank,
|
|
171
|
+
"score": score,
|
|
172
|
+
"matched_tokens": matched,
|
|
173
|
+
"line_start": result_lines[0],
|
|
174
|
+
"line_end": result_lines[1],
|
|
175
|
+
"status": f"pass{suffix}",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _check_scope_overlap(
|
|
180
|
+
result_lines: tuple[int, int],
|
|
181
|
+
expected_scope: dict,
|
|
182
|
+
mode: str,
|
|
183
|
+
) -> str | None:
|
|
184
|
+
"""Check if result line range overlaps expected scope. Returns status or None."""
|
|
185
|
+
r_start, r_end = result_lines
|
|
186
|
+
e_start = expected_scope.get("start", 0)
|
|
187
|
+
e_end = expected_scope.get("end", 0)
|
|
188
|
+
|
|
189
|
+
if r_start == 0 or e_start == 0:
|
|
190
|
+
return "fail (no line data)"
|
|
191
|
+
|
|
192
|
+
overlap_start = max(r_start, e_start)
|
|
193
|
+
overlap_end = min(r_end, e_end)
|
|
194
|
+
|
|
195
|
+
if overlap_end <= overlap_start:
|
|
196
|
+
return "fail (no scope overlap)"
|
|
197
|
+
|
|
198
|
+
overlap_lines = overlap_end - overlap_start
|
|
199
|
+
expected_lines = e_end - e_start
|
|
200
|
+
ratio = overlap_lines / expected_lines if expected_lines > 0 else 0
|
|
201
|
+
|
|
202
|
+
if mode == "exact":
|
|
203
|
+
if ratio >= 0.8:
|
|
204
|
+
return f"pass (scope {ratio:.0%})"
|
|
205
|
+
return f"found (scope {ratio:.0%} < 80%)"
|
|
206
|
+
elif mode == "scope":
|
|
207
|
+
return "pass (scope overlap)"
|
|
208
|
+
|
|
209
|
+
return None
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _compute_metrics(query_results: list[dict]) -> dict:
|
|
213
|
+
"""Compute Precision@1, MRR, Recall@5 from per-query results."""
|
|
214
|
+
n = len(query_results)
|
|
215
|
+
if n == 0:
|
|
216
|
+
return {
|
|
217
|
+
"precision_at_1": 0.0,
|
|
218
|
+
"recall_at_5": 0.0,
|
|
219
|
+
"mrr": 0.0,
|
|
220
|
+
"passed": 0,
|
|
221
|
+
"failed": 0,
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
p1_count = 0
|
|
225
|
+
recall5_count = 0
|
|
226
|
+
reciprocal_sum = 0.0
|
|
227
|
+
passed = 0
|
|
228
|
+
|
|
229
|
+
for r in query_results:
|
|
230
|
+
rank = r["rank"]
|
|
231
|
+
if rank == 1:
|
|
232
|
+
p1_count += 1
|
|
233
|
+
if rank is not None and rank <= 5:
|
|
234
|
+
recall5_count += 1
|
|
235
|
+
if rank is not None:
|
|
236
|
+
reciprocal_sum += 1.0 / rank
|
|
237
|
+
if r["status"].startswith("pass"):
|
|
238
|
+
passed += 1
|
|
239
|
+
|
|
240
|
+
return {
|
|
241
|
+
"precision_at_1": round(p1_count / n, 4),
|
|
242
|
+
"recall_at_5": round(recall5_count / n, 4),
|
|
243
|
+
"mrr": round(reciprocal_sum / n, 4),
|
|
244
|
+
"passed": passed,
|
|
245
|
+
"failed": n - passed,
|
|
246
|
+
}
|