code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,303 @@
1
+ """PreToolUse search enrichment for Claude Code hooks.
2
+
3
+ Intercepts Grep/Glob/Bash/Read tool calls and enriches them with
4
+ structural context from the code knowledge graph: callers, callees,
5
+ execution flows, community membership, and test coverage.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import json
11
+ import logging
12
+ import os
13
+ import re
14
+ import sys
15
+ from pathlib import Path
16
+ from typing import Any
17
+
18
+ logger = logging.getLogger(__name__)
19
+
20
+ # Flags that consume the next token in grep/rg commands
21
+ _RG_FLAGS_WITH_VALUES = frozenset({
22
+ "-e", "-f", "-m", "-A", "-B", "-C", "-g", "--glob",
23
+ "-t", "--type", "--include", "--exclude", "--max-count",
24
+ "--max-depth", "--max-filesize", "--color", "--colors",
25
+ "--context-separator", "--field-match-separator",
26
+ "--path-separator", "--replace", "--sort", "--sortr",
27
+ })
28
+
29
+
30
+ def extract_pattern(tool_name: str, tool_input: dict[str, Any]) -> str | None:
31
+ """Extract a search pattern from a tool call's input.
32
+
33
+ Returns None if no meaningful pattern can be extracted.
34
+ """
35
+ if tool_name == "Grep":
36
+ return tool_input.get("pattern")
37
+
38
+ if tool_name == "Glob":
39
+ raw = tool_input.get("pattern", "")
40
+ # Extract meaningful name from glob: "**/auth*.ts" -> "auth"
41
+ # Skip pure extension globs like "**/*.ts"
42
+ match = re.search(r"[*/]([a-zA-Z][a-zA-Z0-9_]{2,})", raw)
43
+ return match.group(1) if match else None
44
+
45
+ if tool_name == "Bash":
46
+ cmd = tool_input.get("command", "")
47
+ if not re.search(r"\brg\b|\bgrep\b", cmd):
48
+ return None
49
+ tokens = cmd.split()
50
+ found_cmd = False
51
+ skip_next = False
52
+ for token in tokens:
53
+ if skip_next:
54
+ skip_next = False
55
+ continue
56
+ if not found_cmd:
57
+ if re.search(r"\brg$|\bgrep$", token):
58
+ found_cmd = True
59
+ continue
60
+ if token.startswith("-"):
61
+ if token in _RG_FLAGS_WITH_VALUES:
62
+ skip_next = True
63
+ continue
64
+ cleaned = token.strip("'\"")
65
+ return cleaned if len(cleaned) >= 3 else None
66
+ return None
67
+
68
+ return None
69
+
70
+
71
+ def _make_relative(file_path: str, repo_root: str) -> str:
72
+ """Make a file path relative to repo_root for display."""
73
+ try:
74
+ return str(Path(file_path).relative_to(repo_root))
75
+ except ValueError:
76
+ return file_path
77
+
78
+
79
+ def _get_community_name(conn: Any, community_id: int) -> str:
80
+ """Fetch a community name by ID."""
81
+ row = conn.execute(
82
+ "SELECT name FROM communities WHERE id = ?", (community_id,)
83
+ ).fetchone()
84
+ return row["name"] if row else ""
85
+
86
+
87
+ def _get_flow_names_for_node(conn: Any, node_id: int) -> list[str]:
88
+ """Fetch execution flow names that a node participates in (max 3)."""
89
+ rows = conn.execute(
90
+ "SELECT f.name FROM flow_memberships fm "
91
+ "JOIN flows f ON fm.flow_id = f.id "
92
+ "WHERE fm.node_id = ? LIMIT 3",
93
+ (node_id,),
94
+ ).fetchall()
95
+ return [r["name"] for r in rows]
96
+
97
+
98
+ def _format_node_context(
99
+ node: Any,
100
+ store: Any,
101
+ conn: Any,
102
+ repo_root: str,
103
+ ) -> list[str]:
104
+ """Format a single node's structural context as plain text lines."""
105
+ from .graph import GraphNode
106
+ assert isinstance(node, GraphNode)
107
+
108
+ qn = node.qualified_name
109
+ loc = _make_relative(node.file_path, repo_root)
110
+ if node.line_start:
111
+ loc = f"{loc}:{node.line_start}"
112
+
113
+ header = f"{node.name} ({loc})"
114
+
115
+ # Community
116
+ if node.extra.get("community_id"):
117
+ cname = _get_community_name(conn, node.extra["community_id"])
118
+ if cname:
119
+ header += f" [{cname}]"
120
+ else:
121
+ # Check via direct query
122
+ row = conn.execute(
123
+ "SELECT community_id FROM nodes WHERE id = ?", (node.id,)
124
+ ).fetchone()
125
+ if row and row["community_id"]:
126
+ cname = _get_community_name(conn, row["community_id"])
127
+ if cname:
128
+ header += f" [{cname}]"
129
+
130
+ lines = [header]
131
+
132
+ # Callers (max 5, deduplicated)
133
+ callers: list[str] = []
134
+ seen: set[str] = set()
135
+ for e in store.get_edges_by_target(qn):
136
+ if e.kind == "CALLS" and len(callers) < 5:
137
+ c = store.get_node(e.source_qualified)
138
+ if c and c.name not in seen:
139
+ seen.add(c.name)
140
+ callers.append(c.name)
141
+ if callers:
142
+ lines.append(f" Called by: {', '.join(callers)}")
143
+
144
+ # Callees (max 5, deduplicated)
145
+ callees: list[str] = []
146
+ seen.clear()
147
+ for e in store.get_edges_by_source(qn):
148
+ if e.kind == "CALLS" and len(callees) < 5:
149
+ c = store.get_node(e.target_qualified)
150
+ if c and c.name not in seen:
151
+ seen.add(c.name)
152
+ callees.append(c.name)
153
+ if callees:
154
+ lines.append(f" Calls: {', '.join(callees)}")
155
+
156
+ # Execution flows
157
+ flow_names = _get_flow_names_for_node(conn, node.id)
158
+ if flow_names:
159
+ lines.append(f" Flows: {', '.join(flow_names)}")
160
+
161
+ # Tests
162
+ tests: list[str] = []
163
+ for e in store.get_edges_by_target(qn):
164
+ if e.kind == "TESTED_BY" and len(tests) < 3:
165
+ t = store.get_node(e.source_qualified)
166
+ if t:
167
+ tests.append(t.name)
168
+ if tests:
169
+ lines.append(f" Tests: {', '.join(tests)}")
170
+
171
+ return lines
172
+
173
+
174
+ def enrich_search(pattern: str, repo_root: str) -> str:
175
+ """Search the graph for pattern and return enriched context."""
176
+ from .graph import GraphStore
177
+ from .search import _fts_search
178
+
179
+ db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
180
+ if not db_path.exists():
181
+ return ""
182
+
183
+ store = GraphStore(db_path)
184
+ try:
185
+ conn = store._conn
186
+
187
+ fts_results = _fts_search(conn, pattern, limit=8)
188
+ if not fts_results:
189
+ return ""
190
+
191
+ all_lines: list[str] = []
192
+ count = 0
193
+ for node_id, _score in fts_results:
194
+ if count >= 5:
195
+ break
196
+ node = store.get_node_by_id(node_id)
197
+ if not node or node.is_test:
198
+ continue
199
+ node_lines = _format_node_context(node, store, conn, repo_root)
200
+ all_lines.extend(node_lines)
201
+ all_lines.append("")
202
+ count += 1
203
+
204
+ if not all_lines:
205
+ return ""
206
+
207
+ header = f'[code-review-graph] {count} symbol(s) matching "{pattern}":\n'
208
+ return header + "\n".join(all_lines)
209
+ finally:
210
+ store.close()
211
+
212
+
213
+ def enrich_file_read(file_path: str, repo_root: str) -> str:
214
+ """Enrich a file read with structural context for functions in that file."""
215
+ from .graph import GraphStore
216
+
217
+ db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
218
+ if not db_path.exists():
219
+ return ""
220
+
221
+ store = GraphStore(db_path)
222
+ try:
223
+ conn = store._conn
224
+ nodes = store.get_nodes_by_file(file_path)
225
+ if not nodes:
226
+ # Try with resolved path
227
+ try:
228
+ resolved = str(Path(file_path).resolve())
229
+ nodes = store.get_nodes_by_file(resolved)
230
+ except (OSError, ValueError):
231
+ pass
232
+ if not nodes:
233
+ return ""
234
+
235
+ # Filter to functions/classes/types (skip File nodes), limit to 10
236
+ interesting = [
237
+ n for n in nodes
238
+ if n.kind in ("Function", "Class", "Type", "Test")
239
+ ][:10]
240
+
241
+ if not interesting:
242
+ return ""
243
+
244
+ all_lines: list[str] = []
245
+ for node in interesting:
246
+ node_lines = _format_node_context(node, store, conn, repo_root)
247
+ all_lines.extend(node_lines)
248
+ all_lines.append("")
249
+
250
+ rel_path = _make_relative(file_path, repo_root)
251
+ header = (
252
+ f"[code-review-graph] {len(interesting)} symbol(s) in {rel_path}:\n"
253
+ )
254
+ return header + "\n".join(all_lines)
255
+ finally:
256
+ store.close()
257
+
258
+
259
+ def run_hook() -> None:
260
+ """Entry point for the enrich CLI subcommand.
261
+
262
+ Reads Claude Code hook JSON from stdin, extracts the search pattern,
263
+ queries the graph, and outputs hookSpecificOutput JSON to stdout.
264
+ """
265
+ try:
266
+ hook_input = json.load(sys.stdin)
267
+ except (json.JSONDecodeError, ValueError):
268
+ return
269
+
270
+ tool_name = hook_input.get("tool_name", "")
271
+ tool_input = hook_input.get("tool_input", {})
272
+ cwd = hook_input.get("cwd", os.getcwd())
273
+
274
+ # Find repo root by walking up from cwd
275
+ from .incremental import find_project_root
276
+
277
+ repo_root = str(find_project_root(Path(cwd)))
278
+ db_path = Path(repo_root) / ".code-review-graph" / "graph.db"
279
+ if not db_path.exists():
280
+ return
281
+
282
+ # Dispatch
283
+ context = ""
284
+ if tool_name == "Read":
285
+ fp = tool_input.get("file_path", "")
286
+ if fp:
287
+ context = enrich_file_read(fp, repo_root)
288
+ else:
289
+ pattern = extract_pattern(tool_name, tool_input)
290
+ if not pattern or len(pattern) < 3:
291
+ return
292
+ context = enrich_search(pattern, repo_root)
293
+
294
+ if not context:
295
+ return
296
+
297
+ response = {
298
+ "hookSpecificOutput": {
299
+ "hookEventName": "PreToolUse",
300
+ "additionalContext": context,
301
+ }
302
+ }
303
+ json.dump(response, sys.stdout)
@@ -0,0 +1,33 @@
1
+ """Evaluation framework for code-review-graph.
2
+
3
+ Provides scoring metrics (token efficiency, MRR, precision/recall),
4
+ benchmark runners, and report generators for benchmarking graph-based code reviews.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from .reporter import generate_full_report, generate_markdown_report, generate_readme_tables
10
+ from .scorer import compute_mrr, compute_precision_recall, compute_token_efficiency
11
+
12
+
13
+ def __getattr__(name: str):
14
+ """Lazy-import runner functions (require pyyaml)."""
15
+ _runner_names = {"load_all_configs", "load_config", "run_eval", "write_csv"}
16
+ if name in _runner_names:
17
+ from . import runner
18
+ return getattr(runner, name)
19
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
20
+
21
+
22
+ __all__ = [
23
+ "compute_mrr",
24
+ "compute_precision_recall",
25
+ "compute_token_efficiency",
26
+ "generate_full_report",
27
+ "generate_markdown_report",
28
+ "generate_readme_tables",
29
+ "load_all_configs",
30
+ "load_config",
31
+ "run_eval",
32
+ "write_csv",
33
+ ]
@@ -0,0 +1 @@
1
+ """Benchmark modules for the evaluation framework."""
@@ -0,0 +1,193 @@
1
+ """Agent baseline benchmark: grep-and-read-top-k versus a graph query.
2
+
3
+ The whole-corpus baseline in the standalone token benchmark is an upper
4
+ bound no real agent pays: a competent agent greps for identifiers from the
5
+ question and reads only the best-matching files. This benchmark measures
6
+ that realistic baseline:
7
+
8
+ 1. Derive search terms from the question (identifier-shaped tokens via
9
+ ``search.extract_query_identifiers`` plus plain keywords).
10
+ 2. Pure-python grep over the corpus (no external ``rg``/``grep`` binary),
11
+ ranking files by total case-insensitive match count.
12
+ 3. Read the top-k files (k=3) and token-count them with the chars/4 utility
13
+ (``token_benchmark.estimate_tokens``) as ``baseline_tokens``.
14
+ 4. Compare against the graph-query cost for the same question — hybrid
15
+ search hits plus one hop of neighbor edges, the same accounting used by
16
+ ``code_review_graph/token_benchmark.py``.
17
+
18
+ Questions come from ``agent_questions:`` in the repo config, falling back to
19
+ the ``search_queries`` query strings when absent.
20
+
21
+ Failure semantics match the other benchmarks: a thrown search is recorded
22
+ with ``status="error"`` and excluded from aggregates; rows where either side
23
+ of the ratio is zero get ``status="no_graph_results"`` /
24
+ ``status="no_baseline_match"`` and are likewise excluded.
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ import logging
30
+ import statistics
31
+ from collections.abc import Iterator
32
+ from pathlib import Path
33
+
34
+ from code_review_graph.token_benchmark import estimate_tokens
35
+
36
+ logger = logging.getLogger(__name__)
37
+
38
+ DEFAULT_TOP_K = 3
39
+
40
+ _SOURCE_EXTS = (
41
+ ".py", ".js", ".ts", ".tsx", ".go", ".rs", ".java",
42
+ ".c", ".cpp", ".h", ".rb", ".php", ".swift", ".kt",
43
+ )
44
+
45
+ _SKIP_DIRS = {
46
+ ".git", ".hg", ".svn", "node_modules", "__pycache__",
47
+ ".code-review-graph", ".venv", "venv", "dist", "build",
48
+ }
49
+
50
+ _STOPWORDS = {
51
+ "how", "does", "do", "the", "a", "an", "is", "are", "was", "what",
52
+ "where", "when", "which", "who", "why", "and", "or", "in", "on", "of",
53
+ "to", "for", "with", "via", "into", "from", "this", "that", "it", "its",
54
+ }
55
+
56
+
57
+ def derive_search_terms(question: str) -> list[str]:
58
+ """Derive lowercase grep terms: identifiers first, then plain keywords.
59
+
60
+ Identifier-shaped tokens (``Client.request``, ``get_users``, ``APIRoute``)
61
+ are extracted via ``search.extract_query_identifiers``; remaining words of
62
+ 3+ characters that are not stopwords are appended. Order is deterministic.
63
+ """
64
+ from code_review_graph.search import extract_query_identifiers
65
+
66
+ terms: list[str] = []
67
+ seen: set[str] = set()
68
+ for ident in extract_query_identifiers(question):
69
+ if ident not in seen:
70
+ seen.add(ident)
71
+ terms.append(ident)
72
+ for word in question.split():
73
+ w = word.strip(".,;:!?\"'()[]{}`").lower()
74
+ if len(w) >= 3 and w not in _STOPWORDS and w not in seen:
75
+ seen.add(w)
76
+ terms.append(w)
77
+ return terms
78
+
79
+
80
+ def iter_source_files(repo_path: Path) -> Iterator[Path]:
81
+ """Yield source files under *repo_path*, skipping vendored/VCS dirs."""
82
+ for path in sorted(repo_path.rglob("*")):
83
+ if path.suffix not in _SOURCE_EXTS or not path.is_file():
84
+ continue
85
+ if any(part in _SKIP_DIRS for part in path.parts):
86
+ continue
87
+ yield path
88
+
89
+
90
+ def grep_rank(
91
+ repo_path: Path, terms: list[str], k: int = DEFAULT_TOP_K,
92
+ ) -> list[tuple[str, int]]:
93
+ """Rank source files by total case-insensitive term matches; take top-k.
94
+
95
+ Pure python — no external grep/rg dependency. Deterministic: ties break
96
+ on the relative path. Files with zero matches are dropped.
97
+ """
98
+ lowered = [t.lower() for t in terms if t]
99
+ if not lowered:
100
+ return []
101
+ scores: list[tuple[str, int]] = []
102
+ for path in iter_source_files(repo_path):
103
+ try:
104
+ text = path.read_text(encoding="utf-8", errors="replace").lower()
105
+ except OSError:
106
+ continue
107
+ count = sum(text.count(term) for term in lowered)
108
+ if count > 0:
109
+ scores.append((str(path.relative_to(repo_path)), count))
110
+ scores.sort(key=lambda item: (-item[1], item[0]))
111
+ return scores[:k]
112
+
113
+
114
+ def run(repo_path: Path, store, config: dict) -> list[dict]:
115
+ """Run the agent baseline benchmark for one repo."""
116
+ questions = list(config.get("agent_questions") or [])
117
+ if not questions:
118
+ questions = [sq["query"] for sq in config.get("search_queries", [])]
119
+
120
+ k = int(config.get("agent_baseline_top_k", DEFAULT_TOP_K))
121
+ results: list[dict] = []
122
+
123
+ for question in questions:
124
+ terms = derive_search_terms(question)
125
+ top = grep_rank(repo_path, terms, k=k)
126
+ baseline_tokens = 0
127
+ for rel, _count in top:
128
+ try:
129
+ baseline_tokens += estimate_tokens(
130
+ (repo_path / rel).read_text(encoding="utf-8", errors="replace")
131
+ )
132
+ except OSError:
133
+ continue
134
+
135
+ row: dict = {
136
+ "repo": config["name"],
137
+ "question": question,
138
+ "terms": " ".join(terms),
139
+ "files_matched": len(top),
140
+ "top_files": ";".join(rel for rel, _ in top),
141
+ "baseline_tokens": baseline_tokens,
142
+ "graph_tokens": "",
143
+ "baseline_to_graph_ratio": "",
144
+ "status": "ok",
145
+ "error": "",
146
+ }
147
+
148
+ try:
149
+ from code_review_graph.search import hybrid_search
150
+ hits = hybrid_search(store, question, limit=5)
151
+ except Exception as exc:
152
+ logger.warning("hybrid_search failed on %r: %s", question, exc)
153
+ row["status"] = "error"
154
+ row["error"] = str(exc)[:200]
155
+ results.append(row)
156
+ continue
157
+
158
+ # Same accounting as the standalone token benchmark: search hits
159
+ # plus up to 5 outgoing edges of neighbor context per hit.
160
+ graph_tokens = 0
161
+ for hit in hits:
162
+ graph_tokens += estimate_tokens(str(hit))
163
+ qn = hit.get("qualified_name", "")
164
+ for edge in store.get_edges_by_source(qn)[:5]:
165
+ graph_tokens += estimate_tokens(str(edge))
166
+
167
+ row["graph_tokens"] = graph_tokens
168
+ if baseline_tokens > 0 and graph_tokens > 0:
169
+ row["baseline_to_graph_ratio"] = round(baseline_tokens / graph_tokens, 1)
170
+ elif graph_tokens == 0:
171
+ row["status"] = "no_graph_results"
172
+ else:
173
+ row["status"] = "no_baseline_match"
174
+ results.append(row)
175
+
176
+ return results
177
+
178
+
179
+ def aggregate(results: list[dict]) -> dict:
180
+ """Aggregate over rows where both sides of the comparison exist."""
181
+ ok = [r for r in results if r.get("status") == "ok"]
182
+ ratios = [float(r["baseline_to_graph_ratio"]) for r in ok]
183
+ return {
184
+ "total_rows": len(results),
185
+ "ok_rows": len(ok),
186
+ "error_rows": sum(1 for r in results if r.get("status") == "error"),
187
+ "median_baseline_to_graph_ratio": (
188
+ round(statistics.median(ratios), 1) if ratios else None
189
+ ),
190
+ "mean_baseline_to_graph_ratio": (
191
+ round(statistics.mean(ratios), 1) if ratios else None
192
+ ),
193
+ }
@@ -0,0 +1,60 @@
1
+ """Build performance benchmark: measures timing of graph operations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from pathlib import Path
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ def run(repo_path: Path, store, config: dict) -> list[dict]:
13
+ """Run build performance benchmark."""
14
+ stats = store.get_stats()
15
+
16
+ # Time flow detection
17
+ try:
18
+ from code_review_graph.flows import store_flows, trace_flows
19
+ t0 = time.perf_counter()
20
+ flows = trace_flows(store)
21
+ store_flows(store, flows)
22
+ flow_time = time.perf_counter() - t0
23
+ except Exception as exc:
24
+ logger.warning("Flow detection failed: %s", exc)
25
+ flow_time = 0.0
26
+
27
+ # Time community detection
28
+ try:
29
+ from code_review_graph.communities import detect_communities, store_communities
30
+ t0 = time.perf_counter()
31
+ comms = detect_communities(store)
32
+ store_communities(store, comms)
33
+ community_time = time.perf_counter() - t0
34
+ except Exception as exc:
35
+ logger.warning("Community detection failed: %s", exc)
36
+ community_time = 0.0
37
+
38
+ # Time search (average of queries)
39
+ search_times: list[float] = []
40
+ for sq in config.get("search_queries", [])[:10]:
41
+ t0 = time.perf_counter()
42
+ store.search_nodes(sq["query"], limit=20)
43
+ search_times.append(time.perf_counter() - t0)
44
+
45
+ avg_search_ms = round(
46
+ sum(search_times) / max(len(search_times), 1) * 1000, 1
47
+ )
48
+
49
+ return [{
50
+ "repo": config["name"],
51
+ "file_count": stats.files_count,
52
+ "node_count": stats.total_nodes,
53
+ "edge_count": stats.total_edges,
54
+ "flow_detection_seconds": round(flow_time, 3),
55
+ "community_detection_seconds": round(community_time, 3),
56
+ "search_avg_ms": avg_search_ms,
57
+ "nodes_per_second": round(
58
+ stats.total_nodes / max(flow_time, 0.001)
59
+ ),
60
+ }]
@@ -0,0 +1,36 @@
1
+ """Flow completeness benchmark: evaluates entry point detection and flow tracing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ logger = logging.getLogger(__name__)
9
+
10
+
11
+ def run(repo_path: Path, store, config: dict) -> list[dict]:
12
+ """Run flow completeness benchmark."""
13
+ from code_review_graph.flows import store_flows, trace_flows
14
+
15
+ flows = trace_flows(store)
16
+ count = store_flows(store, flows)
17
+
18
+ # Get detected entry point names
19
+ detected_entries = set()
20
+ for flow in flows:
21
+ detected_entries.add(flow.get("entry_point") or flow.get("name", ""))
22
+
23
+ known = set(config.get("entry_points", []))
24
+ found = sum(1 for ep in known if any(ep in d for d in detected_entries))
25
+
26
+ depths = [f.get("depth", 0) for f in flows]
27
+
28
+ return [{
29
+ "repo": config["name"],
30
+ "known_entry_points": len(known),
31
+ "detected_entry_points": found,
32
+ "recall": round(found / max(len(known), 1), 3),
33
+ "detected_flows": count,
34
+ "avg_flow_depth": round(sum(depths) / max(len(depths), 1), 1),
35
+ "max_flow_depth": max(depths, default=0),
36
+ }]