code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,303 @@
|
|
|
1
|
+
"""PreToolUse search enrichment for Claude Code hooks.
|
|
2
|
+
|
|
3
|
+
Intercepts Grep/Glob/Bash/Read tool calls and enriches them with
|
|
4
|
+
structural context from the code knowledge graph: callers, callees,
|
|
5
|
+
execution flows, community membership, and test coverage.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import json
|
|
11
|
+
import logging
|
|
12
|
+
import os
|
|
13
|
+
import re
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
logger = logging.getLogger(__name__)
|
|
19
|
+
|
|
20
|
+
# Flags that consume the next token in grep/rg commands
|
|
21
|
+
_RG_FLAGS_WITH_VALUES = frozenset({
|
|
22
|
+
"-e", "-f", "-m", "-A", "-B", "-C", "-g", "--glob",
|
|
23
|
+
"-t", "--type", "--include", "--exclude", "--max-count",
|
|
24
|
+
"--max-depth", "--max-filesize", "--color", "--colors",
|
|
25
|
+
"--context-separator", "--field-match-separator",
|
|
26
|
+
"--path-separator", "--replace", "--sort", "--sortr",
|
|
27
|
+
})
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def extract_pattern(tool_name: str, tool_input: dict[str, Any]) -> str | None:
|
|
31
|
+
"""Extract a search pattern from a tool call's input.
|
|
32
|
+
|
|
33
|
+
Returns None if no meaningful pattern can be extracted.
|
|
34
|
+
"""
|
|
35
|
+
if tool_name == "Grep":
|
|
36
|
+
return tool_input.get("pattern")
|
|
37
|
+
|
|
38
|
+
if tool_name == "Glob":
|
|
39
|
+
raw = tool_input.get("pattern", "")
|
|
40
|
+
# Extract meaningful name from glob: "**/auth*.ts" -> "auth"
|
|
41
|
+
# Skip pure extension globs like "**/*.ts"
|
|
42
|
+
match = re.search(r"[*/]([a-zA-Z][a-zA-Z0-9_]{2,})", raw)
|
|
43
|
+
return match.group(1) if match else None
|
|
44
|
+
|
|
45
|
+
if tool_name == "Bash":
|
|
46
|
+
cmd = tool_input.get("command", "")
|
|
47
|
+
if not re.search(r"\brg\b|\bgrep\b", cmd):
|
|
48
|
+
return None
|
|
49
|
+
tokens = cmd.split()
|
|
50
|
+
found_cmd = False
|
|
51
|
+
skip_next = False
|
|
52
|
+
for token in tokens:
|
|
53
|
+
if skip_next:
|
|
54
|
+
skip_next = False
|
|
55
|
+
continue
|
|
56
|
+
if not found_cmd:
|
|
57
|
+
if re.search(r"\brg$|\bgrep$", token):
|
|
58
|
+
found_cmd = True
|
|
59
|
+
continue
|
|
60
|
+
if token.startswith("-"):
|
|
61
|
+
if token in _RG_FLAGS_WITH_VALUES:
|
|
62
|
+
skip_next = True
|
|
63
|
+
continue
|
|
64
|
+
cleaned = token.strip("'\"")
|
|
65
|
+
return cleaned if len(cleaned) >= 3 else None
|
|
66
|
+
return None
|
|
67
|
+
|
|
68
|
+
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _make_relative(file_path: str, repo_root: str) -> str:
|
|
72
|
+
"""Make a file path relative to repo_root for display."""
|
|
73
|
+
try:
|
|
74
|
+
return str(Path(file_path).relative_to(repo_root))
|
|
75
|
+
except ValueError:
|
|
76
|
+
return file_path
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _get_community_name(conn: Any, community_id: int) -> str:
|
|
80
|
+
"""Fetch a community name by ID."""
|
|
81
|
+
row = conn.execute(
|
|
82
|
+
"SELECT name FROM communities WHERE id = ?", (community_id,)
|
|
83
|
+
).fetchone()
|
|
84
|
+
return row["name"] if row else ""
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _get_flow_names_for_node(conn: Any, node_id: int) -> list[str]:
|
|
88
|
+
"""Fetch execution flow names that a node participates in (max 3)."""
|
|
89
|
+
rows = conn.execute(
|
|
90
|
+
"SELECT f.name FROM flow_memberships fm "
|
|
91
|
+
"JOIN flows f ON fm.flow_id = f.id "
|
|
92
|
+
"WHERE fm.node_id = ? LIMIT 3",
|
|
93
|
+
(node_id,),
|
|
94
|
+
).fetchall()
|
|
95
|
+
return [r["name"] for r in rows]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def _format_node_context(
|
|
99
|
+
node: Any,
|
|
100
|
+
store: Any,
|
|
101
|
+
conn: Any,
|
|
102
|
+
repo_root: str,
|
|
103
|
+
) -> list[str]:
|
|
104
|
+
"""Format a single node's structural context as plain text lines."""
|
|
105
|
+
from .graph import GraphNode
|
|
106
|
+
assert isinstance(node, GraphNode)
|
|
107
|
+
|
|
108
|
+
qn = node.qualified_name
|
|
109
|
+
loc = _make_relative(node.file_path, repo_root)
|
|
110
|
+
if node.line_start:
|
|
111
|
+
loc = f"{loc}:{node.line_start}"
|
|
112
|
+
|
|
113
|
+
header = f"{node.name} ({loc})"
|
|
114
|
+
|
|
115
|
+
# Community
|
|
116
|
+
if node.extra.get("community_id"):
|
|
117
|
+
cname = _get_community_name(conn, node.extra["community_id"])
|
|
118
|
+
if cname:
|
|
119
|
+
header += f" [{cname}]"
|
|
120
|
+
else:
|
|
121
|
+
# Check via direct query
|
|
122
|
+
row = conn.execute(
|
|
123
|
+
"SELECT community_id FROM nodes WHERE id = ?", (node.id,)
|
|
124
|
+
).fetchone()
|
|
125
|
+
if row and row["community_id"]:
|
|
126
|
+
cname = _get_community_name(conn, row["community_id"])
|
|
127
|
+
if cname:
|
|
128
|
+
header += f" [{cname}]"
|
|
129
|
+
|
|
130
|
+
lines = [header]
|
|
131
|
+
|
|
132
|
+
# Callers (max 5, deduplicated)
|
|
133
|
+
callers: list[str] = []
|
|
134
|
+
seen: set[str] = set()
|
|
135
|
+
for e in store.get_edges_by_target(qn):
|
|
136
|
+
if e.kind == "CALLS" and len(callers) < 5:
|
|
137
|
+
c = store.get_node(e.source_qualified)
|
|
138
|
+
if c and c.name not in seen:
|
|
139
|
+
seen.add(c.name)
|
|
140
|
+
callers.append(c.name)
|
|
141
|
+
if callers:
|
|
142
|
+
lines.append(f" Called by: {', '.join(callers)}")
|
|
143
|
+
|
|
144
|
+
# Callees (max 5, deduplicated)
|
|
145
|
+
callees: list[str] = []
|
|
146
|
+
seen.clear()
|
|
147
|
+
for e in store.get_edges_by_source(qn):
|
|
148
|
+
if e.kind == "CALLS" and len(callees) < 5:
|
|
149
|
+
c = store.get_node(e.target_qualified)
|
|
150
|
+
if c and c.name not in seen:
|
|
151
|
+
seen.add(c.name)
|
|
152
|
+
callees.append(c.name)
|
|
153
|
+
if callees:
|
|
154
|
+
lines.append(f" Calls: {', '.join(callees)}")
|
|
155
|
+
|
|
156
|
+
# Execution flows
|
|
157
|
+
flow_names = _get_flow_names_for_node(conn, node.id)
|
|
158
|
+
if flow_names:
|
|
159
|
+
lines.append(f" Flows: {', '.join(flow_names)}")
|
|
160
|
+
|
|
161
|
+
# Tests
|
|
162
|
+
tests: list[str] = []
|
|
163
|
+
for e in store.get_edges_by_target(qn):
|
|
164
|
+
if e.kind == "TESTED_BY" and len(tests) < 3:
|
|
165
|
+
t = store.get_node(e.source_qualified)
|
|
166
|
+
if t:
|
|
167
|
+
tests.append(t.name)
|
|
168
|
+
if tests:
|
|
169
|
+
lines.append(f" Tests: {', '.join(tests)}")
|
|
170
|
+
|
|
171
|
+
return lines
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def enrich_search(pattern: str, repo_root: str) -> str:
|
|
175
|
+
"""Search the graph for pattern and return enriched context."""
|
|
176
|
+
from .graph import GraphStore
|
|
177
|
+
from .search import _fts_search
|
|
178
|
+
|
|
179
|
+
db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
|
|
180
|
+
if not db_path.exists():
|
|
181
|
+
return ""
|
|
182
|
+
|
|
183
|
+
store = GraphStore(db_path)
|
|
184
|
+
try:
|
|
185
|
+
conn = store._conn
|
|
186
|
+
|
|
187
|
+
fts_results = _fts_search(conn, pattern, limit=8)
|
|
188
|
+
if not fts_results:
|
|
189
|
+
return ""
|
|
190
|
+
|
|
191
|
+
all_lines: list[str] = []
|
|
192
|
+
count = 0
|
|
193
|
+
for node_id, _score in fts_results:
|
|
194
|
+
if count >= 5:
|
|
195
|
+
break
|
|
196
|
+
node = store.get_node_by_id(node_id)
|
|
197
|
+
if not node or node.is_test:
|
|
198
|
+
continue
|
|
199
|
+
node_lines = _format_node_context(node, store, conn, repo_root)
|
|
200
|
+
all_lines.extend(node_lines)
|
|
201
|
+
all_lines.append("")
|
|
202
|
+
count += 1
|
|
203
|
+
|
|
204
|
+
if not all_lines:
|
|
205
|
+
return ""
|
|
206
|
+
|
|
207
|
+
header = f'[code-review-graph] {count} symbol(s) matching "{pattern}":\n'
|
|
208
|
+
return header + "\n".join(all_lines)
|
|
209
|
+
finally:
|
|
210
|
+
store.close()
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
def enrich_file_read(file_path: str, repo_root: str) -> str:
|
|
214
|
+
"""Enrich a file read with structural context for functions in that file."""
|
|
215
|
+
from .graph import GraphStore
|
|
216
|
+
|
|
217
|
+
db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
|
|
218
|
+
if not db_path.exists():
|
|
219
|
+
return ""
|
|
220
|
+
|
|
221
|
+
store = GraphStore(db_path)
|
|
222
|
+
try:
|
|
223
|
+
conn = store._conn
|
|
224
|
+
nodes = store.get_nodes_by_file(file_path)
|
|
225
|
+
if not nodes:
|
|
226
|
+
# Try with resolved path
|
|
227
|
+
try:
|
|
228
|
+
resolved = str(Path(file_path).resolve())
|
|
229
|
+
nodes = store.get_nodes_by_file(resolved)
|
|
230
|
+
except (OSError, ValueError):
|
|
231
|
+
pass
|
|
232
|
+
if not nodes:
|
|
233
|
+
return ""
|
|
234
|
+
|
|
235
|
+
# Filter to functions/classes/types (skip File nodes), limit to 10
|
|
236
|
+
interesting = [
|
|
237
|
+
n for n in nodes
|
|
238
|
+
if n.kind in ("Function", "Class", "Type", "Test")
|
|
239
|
+
][:10]
|
|
240
|
+
|
|
241
|
+
if not interesting:
|
|
242
|
+
return ""
|
|
243
|
+
|
|
244
|
+
all_lines: list[str] = []
|
|
245
|
+
for node in interesting:
|
|
246
|
+
node_lines = _format_node_context(node, store, conn, repo_root)
|
|
247
|
+
all_lines.extend(node_lines)
|
|
248
|
+
all_lines.append("")
|
|
249
|
+
|
|
250
|
+
rel_path = _make_relative(file_path, repo_root)
|
|
251
|
+
header = (
|
|
252
|
+
f"[code-review-graph] {len(interesting)} symbol(s) in {rel_path}:\n"
|
|
253
|
+
)
|
|
254
|
+
return header + "\n".join(all_lines)
|
|
255
|
+
finally:
|
|
256
|
+
store.close()
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def run_hook() -> None:
|
|
260
|
+
"""Entry point for the enrich CLI subcommand.
|
|
261
|
+
|
|
262
|
+
Reads Claude Code hook JSON from stdin, extracts the search pattern,
|
|
263
|
+
queries the graph, and outputs hookSpecificOutput JSON to stdout.
|
|
264
|
+
"""
|
|
265
|
+
try:
|
|
266
|
+
hook_input = json.load(sys.stdin)
|
|
267
|
+
except (json.JSONDecodeError, ValueError):
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
tool_name = hook_input.get("tool_name", "")
|
|
271
|
+
tool_input = hook_input.get("tool_input", {})
|
|
272
|
+
cwd = hook_input.get("cwd", os.getcwd())
|
|
273
|
+
|
|
274
|
+
# Find repo root by walking up from cwd
|
|
275
|
+
from .incremental import find_project_root
|
|
276
|
+
|
|
277
|
+
repo_root = str(find_project_root(Path(cwd)))
|
|
278
|
+
db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
|
|
279
|
+
if not db_path.exists():
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
# Dispatch
|
|
283
|
+
context = ""
|
|
284
|
+
if tool_name == "Read":
|
|
285
|
+
fp = tool_input.get("file_path", "")
|
|
286
|
+
if fp:
|
|
287
|
+
context = enrich_file_read(fp, repo_root)
|
|
288
|
+
else:
|
|
289
|
+
pattern = extract_pattern(tool_name, tool_input)
|
|
290
|
+
if not pattern or len(pattern) < 3:
|
|
291
|
+
return
|
|
292
|
+
context = enrich_search(pattern, repo_root)
|
|
293
|
+
|
|
294
|
+
if not context:
|
|
295
|
+
return
|
|
296
|
+
|
|
297
|
+
response = {
|
|
298
|
+
"hookSpecificOutput": {
|
|
299
|
+
"hookEventName": "PreToolUse",
|
|
300
|
+
"additionalContext": context,
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
json.dump(response, sys.stdout)
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Evaluation framework for code-review-graph.
|
|
2
|
+
|
|
3
|
+
Provides scoring metrics (token efficiency, MRR, precision/recall),
|
|
4
|
+
benchmark runners, and report generators for benchmarking graph-based code reviews.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .reporter import generate_full_report, generate_markdown_report, generate_readme_tables
|
|
10
|
+
from .scorer import compute_mrr, compute_precision_recall, compute_token_efficiency
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def __getattr__(name: str):
|
|
14
|
+
"""Lazy-import runner functions (require pyyaml)."""
|
|
15
|
+
_runner_names = {"load_all_configs", "load_config", "run_eval", "write_csv"}
|
|
16
|
+
if name in _runner_names:
|
|
17
|
+
from . import runner
|
|
18
|
+
return getattr(runner, name)
|
|
19
|
+
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
__all__ = [
|
|
23
|
+
"compute_mrr",
|
|
24
|
+
"compute_precision_recall",
|
|
25
|
+
"compute_token_efficiency",
|
|
26
|
+
"generate_full_report",
|
|
27
|
+
"generate_markdown_report",
|
|
28
|
+
"generate_readme_tables",
|
|
29
|
+
"load_all_configs",
|
|
30
|
+
"load_config",
|
|
31
|
+
"run_eval",
|
|
32
|
+
"write_csv",
|
|
33
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Benchmark modules for the evaluation framework."""
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
"""Agent baseline benchmark: grep-and-read-top-k versus a graph query.
|
|
2
|
+
|
|
3
|
+
The whole-corpus baseline in the standalone token benchmark is an upper
|
|
4
|
+
bound no real agent pays: a competent agent greps for identifiers from the
|
|
5
|
+
question and reads only the best-matching files. This benchmark measures
|
|
6
|
+
that realistic baseline:
|
|
7
|
+
|
|
8
|
+
1. Derive search terms from the question (identifier-shaped tokens via
|
|
9
|
+
``search.extract_query_identifiers`` plus plain keywords).
|
|
10
|
+
2. Pure-python grep over the corpus (no external ``rg``/``grep`` binary),
|
|
11
|
+
ranking files by total case-insensitive match count.
|
|
12
|
+
3. Read the top-k files (k=3) and token-count them with the chars/4 utility
|
|
13
|
+
(``token_benchmark.estimate_tokens``) as ``baseline_tokens``.
|
|
14
|
+
4. Compare against the graph-query cost for the same question — hybrid
|
|
15
|
+
search hits plus one hop of neighbor edges, the same accounting used by
|
|
16
|
+
``code_review_graph/token_benchmark.py``.
|
|
17
|
+
|
|
18
|
+
Questions come from ``agent_questions:`` in the repo config, falling back to
|
|
19
|
+
the ``search_queries`` query strings when absent.
|
|
20
|
+
|
|
21
|
+
Failure semantics match the other benchmarks: a thrown search is recorded
|
|
22
|
+
with ``status="error"`` and excluded from aggregates; rows where either side
|
|
23
|
+
of the ratio is zero get ``status="no_graph_results"`` /
|
|
24
|
+
``status="no_baseline_match"`` and are likewise excluded.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
from __future__ import annotations
|
|
28
|
+
|
|
29
|
+
import logging
|
|
30
|
+
import statistics
|
|
31
|
+
from collections.abc import Iterator
|
|
32
|
+
from pathlib import Path
|
|
33
|
+
|
|
34
|
+
from code_review_graph.token_benchmark import estimate_tokens
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger(__name__)
|
|
37
|
+
|
|
38
|
+
DEFAULT_TOP_K = 3
|
|
39
|
+
|
|
40
|
+
_SOURCE_EXTS = (
|
|
41
|
+
".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java",
|
|
42
|
+
".c", ".cpp", ".h", ".rb", ".php", ".swift", ".kt",
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
_SKIP_DIRS = {
|
|
46
|
+
".git", ".hg", ".svn", "node_modules", "__pycache__",
|
|
47
|
+
".code-review-graph", ".venv", "venv", "dist", "build",
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
_STOPWORDS = {
|
|
51
|
+
"how", "does", "do", "the", "a", "an", "is", "are", "was", "what",
|
|
52
|
+
"where", "when", "which", "who", "why", "and", "or", "in", "on", "of",
|
|
53
|
+
"to", "for", "with", "via", "into", "from", "this", "that", "it", "its",
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def derive_search_terms(question: str) -> list[str]:
|
|
58
|
+
"""Derive lowercase grep terms: identifiers first, then plain keywords.
|
|
59
|
+
|
|
60
|
+
Identifier-shaped tokens (``Client.request``, ``get_users``, ``APIRoute``)
|
|
61
|
+
are extracted via ``search.extract_query_identifiers``; remaining words of
|
|
62
|
+
3+ characters that are not stopwords are appended. Order is deterministic.
|
|
63
|
+
"""
|
|
64
|
+
from code_review_graph.search import extract_query_identifiers
|
|
65
|
+
|
|
66
|
+
terms: list[str] = []
|
|
67
|
+
seen: set[str] = set()
|
|
68
|
+
for ident in extract_query_identifiers(question):
|
|
69
|
+
if ident not in seen:
|
|
70
|
+
seen.add(ident)
|
|
71
|
+
terms.append(ident)
|
|
72
|
+
for word in question.split():
|
|
73
|
+
w = word.strip(".,;:!?\"'()[]{}`").lower()
|
|
74
|
+
if len(w) >= 3 and w not in _STOPWORDS and w not in seen:
|
|
75
|
+
seen.add(w)
|
|
76
|
+
terms.append(w)
|
|
77
|
+
return terms
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def iter_source_files(repo_path: Path) -> Iterator[Path]:
|
|
81
|
+
"""Yield source files under *repo_path*, skipping vendored/VCS dirs."""
|
|
82
|
+
for path in sorted(repo_path.rglob("*")):
|
|
83
|
+
if path.suffix not in _SOURCE_EXTS or not path.is_file():
|
|
84
|
+
continue
|
|
85
|
+
if any(part in _SKIP_DIRS for part in path.parts):
|
|
86
|
+
continue
|
|
87
|
+
yield path
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def grep_rank(
|
|
91
|
+
repo_path: Path, terms: list[str], k: int = DEFAULT_TOP_K,
|
|
92
|
+
) -> list[tuple[str, int]]:
|
|
93
|
+
"""Rank source files by total case-insensitive term matches; take top-k.
|
|
94
|
+
|
|
95
|
+
Pure python — no external grep/rg dependency. Deterministic: ties break
|
|
96
|
+
on the relative path. Files with zero matches are dropped.
|
|
97
|
+
"""
|
|
98
|
+
lowered = [t.lower() for t in terms if t]
|
|
99
|
+
if not lowered:
|
|
100
|
+
return []
|
|
101
|
+
scores: list[tuple[str, int]] = []
|
|
102
|
+
for path in iter_source_files(repo_path):
|
|
103
|
+
try:
|
|
104
|
+
text = path.read_text(encoding="utf-8", errors="replace").lower()
|
|
105
|
+
except OSError:
|
|
106
|
+
continue
|
|
107
|
+
count = sum(text.count(term) for term in lowered)
|
|
108
|
+
if count > 0:
|
|
109
|
+
scores.append((str(path.relative_to(repo_path)), count))
|
|
110
|
+
scores.sort(key=lambda item: (-item[1], item[0]))
|
|
111
|
+
return scores[:k]
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def run(repo_path: Path, store, config: dict) -> list[dict]:
|
|
115
|
+
"""Run the agent baseline benchmark for one repo."""
|
|
116
|
+
questions = list(config.get("agent_questions") or [])
|
|
117
|
+
if not questions:
|
|
118
|
+
questions = [sq["query"] for sq in config.get("search_queries", [])]
|
|
119
|
+
|
|
120
|
+
k = int(config.get("agent_baseline_top_k", DEFAULT_TOP_K))
|
|
121
|
+
results: list[dict] = []
|
|
122
|
+
|
|
123
|
+
for question in questions:
|
|
124
|
+
terms = derive_search_terms(question)
|
|
125
|
+
top = grep_rank(repo_path, terms, k=k)
|
|
126
|
+
baseline_tokens = 0
|
|
127
|
+
for rel, _count in top:
|
|
128
|
+
try:
|
|
129
|
+
baseline_tokens += estimate_tokens(
|
|
130
|
+
(repo_path / rel).read_text(encoding="utf-8", errors="replace")
|
|
131
|
+
)
|
|
132
|
+
except OSError:
|
|
133
|
+
continue
|
|
134
|
+
|
|
135
|
+
row: dict = {
|
|
136
|
+
"repo": config["name"],
|
|
137
|
+
"question": question,
|
|
138
|
+
"terms": " ".join(terms),
|
|
139
|
+
"files_matched": len(top),
|
|
140
|
+
"top_files": ";".join(rel for rel, _ in top),
|
|
141
|
+
"baseline_tokens": baseline_tokens,
|
|
142
|
+
"graph_tokens": "",
|
|
143
|
+
"baseline_to_graph_ratio": "",
|
|
144
|
+
"status": "ok",
|
|
145
|
+
"error": "",
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
try:
|
|
149
|
+
from code_review_graph.search import hybrid_search
|
|
150
|
+
hits = hybrid_search(store, question, limit=5)
|
|
151
|
+
except Exception as exc:
|
|
152
|
+
logger.warning("hybrid_search failed on %r: %s", question, exc)
|
|
153
|
+
row["status"] = "error"
|
|
154
|
+
row["error"] = str(exc)[:200]
|
|
155
|
+
results.append(row)
|
|
156
|
+
continue
|
|
157
|
+
|
|
158
|
+
# Same accounting as the standalone token benchmark: search hits
|
|
159
|
+
# plus up to 5 outgoing edges of neighbor context per hit.
|
|
160
|
+
graph_tokens = 0
|
|
161
|
+
for hit in hits:
|
|
162
|
+
graph_tokens += estimate_tokens(str(hit))
|
|
163
|
+
qn = hit.get("qualified_name", "")
|
|
164
|
+
for edge in store.get_edges_by_source(qn)[:5]:
|
|
165
|
+
graph_tokens += estimate_tokens(str(edge))
|
|
166
|
+
|
|
167
|
+
row["graph_tokens"] = graph_tokens
|
|
168
|
+
if baseline_tokens > 0 and graph_tokens > 0:
|
|
169
|
+
row["baseline_to_graph_ratio"] = round(baseline_tokens / graph_tokens, 1)
|
|
170
|
+
elif graph_tokens == 0:
|
|
171
|
+
row["status"] = "no_graph_results"
|
|
172
|
+
else:
|
|
173
|
+
row["status"] = "no_baseline_match"
|
|
174
|
+
results.append(row)
|
|
175
|
+
|
|
176
|
+
return results
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def aggregate(results: list[dict]) -> dict:
|
|
180
|
+
"""Aggregate over rows where both sides of the comparison exist."""
|
|
181
|
+
ok = [r for r in results if r.get("status") == "ok"]
|
|
182
|
+
ratios = [float(r["baseline_to_graph_ratio"]) for r in ok]
|
|
183
|
+
return {
|
|
184
|
+
"total_rows": len(results),
|
|
185
|
+
"ok_rows": len(ok),
|
|
186
|
+
"error_rows": sum(1 for r in results if r.get("status") == "error"),
|
|
187
|
+
"median_baseline_to_graph_ratio": (
|
|
188
|
+
round(statistics.median(ratios), 1) if ratios else None
|
|
189
|
+
),
|
|
190
|
+
"mean_baseline_to_graph_ratio": (
|
|
191
|
+
round(statistics.mean(ratios), 1) if ratios else None
|
|
192
|
+
),
|
|
193
|
+
}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
"""Build performance benchmark: measures timing of graph operations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
|
|
9
|
+
logger = logging.getLogger(__name__)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def run(repo_path: Path, store, config: dict) -> list[dict]:
|
|
13
|
+
"""Run build performance benchmark."""
|
|
14
|
+
stats = store.get_stats()
|
|
15
|
+
|
|
16
|
+
# Time flow detection
|
|
17
|
+
try:
|
|
18
|
+
from code_review_graph.flows import store_flows, trace_flows
|
|
19
|
+
t0 = time.perf_counter()
|
|
20
|
+
flows = trace_flows(store)
|
|
21
|
+
store_flows(store, flows)
|
|
22
|
+
flow_time = time.perf_counter() - t0
|
|
23
|
+
except Exception as exc:
|
|
24
|
+
logger.warning("Flow detection failed: %s", exc)
|
|
25
|
+
flow_time = 0.0
|
|
26
|
+
|
|
27
|
+
# Time community detection
|
|
28
|
+
try:
|
|
29
|
+
from code_review_graph.communities import detect_communities, store_communities
|
|
30
|
+
t0 = time.perf_counter()
|
|
31
|
+
comms = detect_communities(store)
|
|
32
|
+
store_communities(store, comms)
|
|
33
|
+
community_time = time.perf_counter() - t0
|
|
34
|
+
except Exception as exc:
|
|
35
|
+
logger.warning("Community detection failed: %s", exc)
|
|
36
|
+
community_time = 0.0
|
|
37
|
+
|
|
38
|
+
# Time search (average of queries)
|
|
39
|
+
search_times: list[float] = []
|
|
40
|
+
for sq in config.get("search_queries", [])[:10]:
|
|
41
|
+
t0 = time.perf_counter()
|
|
42
|
+
store.search_nodes(sq["query"], limit=20)
|
|
43
|
+
search_times.append(time.perf_counter() - t0)
|
|
44
|
+
|
|
45
|
+
avg_search_ms = round(
|
|
46
|
+
sum(search_times) / max(len(search_times), 1) * 1000, 1
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return [{
|
|
50
|
+
"repo": config["name"],
|
|
51
|
+
"file_count": stats.files_count,
|
|
52
|
+
"node_count": stats.total_nodes,
|
|
53
|
+
"edge_count": stats.total_edges,
|
|
54
|
+
"flow_detection_seconds": round(flow_time, 3),
|
|
55
|
+
"community_detection_seconds": round(community_time, 3),
|
|
56
|
+
"search_avg_ms": avg_search_ms,
|
|
57
|
+
"nodes_per_second": round(
|
|
58
|
+
stats.total_nodes / max(flow_time, 0.001)
|
|
59
|
+
),
|
|
60
|
+
}]
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Flow completeness benchmark: evaluates entry point detection and flow tracing."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
logger = logging.getLogger(__name__)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def run(repo_path: Path, store, config: dict) -> list[dict]:
|
|
12
|
+
"""Run flow completeness benchmark."""
|
|
13
|
+
from code_review_graph.flows import store_flows, trace_flows
|
|
14
|
+
|
|
15
|
+
flows = trace_flows(store)
|
|
16
|
+
count = store_flows(store, flows)
|
|
17
|
+
|
|
18
|
+
# Get detected entry point names
|
|
19
|
+
detected_entries = set()
|
|
20
|
+
for flow in flows:
|
|
21
|
+
detected_entries.add(flow.get("entry_point") or flow.get("name", ""))
|
|
22
|
+
|
|
23
|
+
known = set(config.get("entry_points", []))
|
|
24
|
+
found = sum(1 for ep in known if any(ep in d for d in detected_entries))
|
|
25
|
+
|
|
26
|
+
depths = [f.get("depth", 0) for f in flows]
|
|
27
|
+
|
|
28
|
+
return [{
|
|
29
|
+
"repo": config["name"],
|
|
30
|
+
"known_entry_points": len(known),
|
|
31
|
+
"detected_entry_points": found,
|
|
32
|
+
"recall": round(found / max(len(known), 1), 3),
|
|
33
|
+
"detected_flows": count,
|
|
34
|
+
"avg_flow_depth": round(sum(depths) / max(len(depths), 1), 1),
|
|
35
|
+
"max_flow_depth": max(depths, default=0),
|
|
36
|
+
}]
|