code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. code_review_graph/__init__.py +20 -0
  2. code_review_graph/__main__.py +4 -0
  3. code_review_graph/analysis.py +410 -0
  4. code_review_graph/changes.py +409 -0
  5. code_review_graph/cli.py +1255 -0
  6. code_review_graph/communities.py +874 -0
  7. code_review_graph/constants.py +23 -0
  8. code_review_graph/context_savings.py +317 -0
  9. code_review_graph/custom_languages.py +322 -0
  10. code_review_graph/daemon.py +1009 -0
  11. code_review_graph/daemon_cli.py +320 -0
  12. code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
  13. code_review_graph/embeddings.py +1006 -0
  14. code_review_graph/enrich.py +303 -0
  15. code_review_graph/eval/__init__.py +33 -0
  16. code_review_graph/eval/benchmarks/__init__.py +1 -0
  17. code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
  18. code_review_graph/eval/benchmarks/build_performance.py +60 -0
  19. code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
  20. code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
  21. code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
  22. code_review_graph/eval/benchmarks/search_quality.py +59 -0
  23. code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
  24. code_review_graph/eval/configs/code-review-graph.yaml +50 -0
  25. code_review_graph/eval/configs/express.yaml +45 -0
  26. code_review_graph/eval/configs/fastapi.yaml +48 -0
  27. code_review_graph/eval/configs/flask.yaml +50 -0
  28. code_review_graph/eval/configs/gin.yaml +51 -0
  29. code_review_graph/eval/configs/httpx.yaml +48 -0
  30. code_review_graph/eval/reporter.py +301 -0
  31. code_review_graph/eval/runner.py +211 -0
  32. code_review_graph/eval/scorer.py +85 -0
  33. code_review_graph/eval/token_benchmark.py +182 -0
  34. code_review_graph/exports.py +409 -0
  35. code_review_graph/flows.py +698 -0
  36. code_review_graph/graph.py +1427 -0
  37. code_review_graph/graph_diff.py +122 -0
  38. code_review_graph/hints.py +384 -0
  39. code_review_graph/incremental.py +1245 -0
  40. code_review_graph/jedi_resolver.py +303 -0
  41. code_review_graph/main.py +1079 -0
  42. code_review_graph/memory.py +142 -0
  43. code_review_graph/migrations.py +284 -0
  44. code_review_graph/parser.py +6957 -0
  45. code_review_graph/postprocessing.py +134 -0
  46. code_review_graph/prompts.py +159 -0
  47. code_review_graph/refactor.py +852 -0
  48. code_review_graph/registry.py +319 -0
  49. code_review_graph/rescript_resolver.py +206 -0
  50. code_review_graph/search.py +447 -0
  51. code_review_graph/skills.py +1481 -0
  52. code_review_graph/spring_resolver.py +200 -0
  53. code_review_graph/temporal_resolver.py +199 -0
  54. code_review_graph/token_benchmark.py +125 -0
  55. code_review_graph/tools/__init__.py +156 -0
  56. code_review_graph/tools/_common.py +176 -0
  57. code_review_graph/tools/analysis_tools.py +184 -0
  58. code_review_graph/tools/build.py +541 -0
  59. code_review_graph/tools/community_tools.py +246 -0
  60. code_review_graph/tools/context.py +152 -0
  61. code_review_graph/tools/docs.py +274 -0
  62. code_review_graph/tools/flows_tools.py +176 -0
  63. code_review_graph/tools/query.py +692 -0
  64. code_review_graph/tools/refactor_tools.py +168 -0
  65. code_review_graph/tools/registry_tools.py +125 -0
  66. code_review_graph/tools/review.py +477 -0
  67. code_review_graph/tsconfig_resolver.py +257 -0
  68. code_review_graph/visualization.py +2184 -0
  69. code_review_graph/wiki.py +305 -0
  70. code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
  71. code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
  72. code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
  73. code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
  74. code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,200 @@
1
+ """Post-build Spring DI call resolver.
2
+
3
+ After tree-sitter parsing, Java CALLS edges whose target is a bare method
4
+ name (e.g. ``calculate``) carry ``extra.receiver`` naming the local variable
5
+ that was called on (e.g. ``invoiceCalculationService``). This module
6
+ resolves those receivers through the INJECTS map to their declared type, then
7
+ optionally to the unique concrete implementation via INHERITS edges.
8
+
9
+ Resolution chain:
10
+ receiver variable name
11
+ → injected interface/class (from INJECTS.extra.field_name)
12
+ → concrete implementation (from INHERITS, when unique)
13
+
14
+ Only Java files are processed. Edges that are already qualified (contain
15
+ ``::``) or have no ``receiver`` extra key are skipped.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ from typing import TYPE_CHECKING
23
+
24
+ if TYPE_CHECKING:
25
+ from .graph import GraphStore
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ def resolve_spring_di_calls(store: GraphStore) -> dict:
31
+ """Resolve Java CALLS edges whose receiver is a Spring-injected field.
32
+
33
+ Safe to call multiple times — already-resolved edges (targets containing
34
+ ``::``) are skipped.
35
+
36
+ Returns a dict with resolution counts for telemetry.
37
+ """
38
+ conn = store._conn
39
+
40
+ # Only process Java files
41
+ java_files: set[str] = {
42
+ row["file_path"]
43
+ for row in conn.execute(
44
+ "SELECT DISTINCT file_path FROM nodes WHERE language = 'java'"
45
+ ).fetchall()
46
+ }
47
+ if not java_files:
48
+ return {"files_indexed": 0, "calls_resolved": 0}
49
+
50
+ # -----------------------------------------------------------------------
51
+ # Build field_map: (source_qualified_class, field_name) → injected_type
52
+ # from INJECTS edges that carry extra.field_name
53
+ # -----------------------------------------------------------------------
54
+ field_map: dict[tuple[str, str], str] = {}
55
+ injects_rows = conn.execute(
56
+ "SELECT source_qualified, target_qualified, extra FROM edges WHERE kind = 'INJECTS'"
57
+ ).fetchall()
58
+ for row in injects_rows:
59
+ try:
60
+ extra = json.loads(row["extra"] or "{}")
61
+ except (json.JSONDecodeError, TypeError):
62
+ extra = {}
63
+ fname = extra.get("field_name")
64
+ if not fname:
65
+ continue
66
+ # source_qualified is the full class qualified name
67
+ class_qual = row["source_qualified"]
68
+ field_map[(class_qual, fname)] = row["target_qualified"]
69
+
70
+ if not field_map:
71
+ logger.info("Spring resolver: no INJECTS edges with field_name found, skipping")
72
+ return {"files_indexed": len(java_files), "calls_resolved": 0}
73
+
74
+ # -----------------------------------------------------------------------
75
+ # Build class_name → qualified_name lookup from nodes.
76
+ # Keyed by bare class name; value is the full "file_path::ClassName" form
77
+ # that callers_of uses for its target_qualified exact-match lookup.
78
+ # When a name appears in multiple files (e.g. same interface in several
79
+ # services), we keep the entry with the shortest path as a tiebreaker —
80
+ # this is overridden by the concrete-implementation lookup below.
81
+ # -----------------------------------------------------------------------
82
+ name_to_qual: dict[str, str] = {}
83
+ for row in conn.execute(
84
+ "SELECT name, qualified_name FROM nodes WHERE kind = 'Class' AND language = 'java'"
85
+ ).fetchall():
86
+ bare = row["name"]
87
+ qual = row["qualified_name"]
88
+ if bare not in name_to_qual or len(qual) < len(name_to_qual[bare]):
89
+ name_to_qual[bare] = qual
90
+
91
+ # Also index Function nodes so we can build "file::Class.method" targets.
92
+ # key: (class_name, method_name) → full qualified_name of the method node
93
+ method_to_qual: dict[tuple[str, str], str] = {}
94
+ for row in conn.execute(
95
+ "SELECT name, qualified_name, parent_name FROM nodes "
96
+ "WHERE kind IN ('Function', 'Test') AND language = 'java' AND parent_name IS NOT NULL"
97
+ ).fetchall():
98
+ method_to_qual[(row["parent_name"], row["name"])] = row["qualified_name"]
99
+
100
+ # -----------------------------------------------------------------------
101
+ # Build implementors: bare interface name → list of implementing class quals
102
+ # from INHERITS edges (Java uses INHERITS for both extends and implements)
103
+ # -----------------------------------------------------------------------
104
+ implementors: dict[str, list[str]] = {}
105
+ for row in conn.execute(
106
+ "SELECT source_qualified, target_qualified FROM edges WHERE kind = 'INHERITS'"
107
+ ).fetchall():
108
+ iface = row["target_qualified"]
109
+ impl = row["source_qualified"]
110
+ if any(impl.startswith(f) for f in java_files) or "::" in impl:
111
+ implementors.setdefault(iface, []).append(impl)
112
+
113
+ # -----------------------------------------------------------------------
114
+ # Resolve CALLS edges
115
+ # -----------------------------------------------------------------------
116
+ calls_rows = conn.execute(
117
+ "SELECT id, source_qualified, target_qualified, extra, file_path "
118
+ "FROM edges WHERE kind = 'CALLS'"
119
+ ).fetchall()
120
+
121
+ resolved = 0
122
+
123
+ for row in calls_rows:
124
+ if row["file_path"] not in java_files:
125
+ continue
126
+
127
+ try:
128
+ extra = json.loads(row["extra"] or "{}")
129
+ except (json.JSONDecodeError, TypeError):
130
+ extra = {}
131
+
132
+ receiver = extra.get("receiver")
133
+ if not receiver:
134
+ continue
135
+
136
+ # Skip edges already spring-resolved in a previous pass
137
+ if extra.get("spring_resolved"):
138
+ continue
139
+
140
+ # Strip any prior (possibly wrong) qualification — we have a receiver so
141
+ # we can do a better resolution. E.g. "file::ClassName.method" → "method"
142
+ raw_target = row["target_qualified"]
143
+ if "::" in raw_target:
144
+ after = raw_target.split("::", 1)[1]
145
+ method_name = after.split(".")[-1] if "." in after else after
146
+ else:
147
+ method_name = raw_target
148
+ source_qual = row["source_qualified"]
149
+
150
+ # Derive the enclosing class qualified name from source
151
+ # source_qual format: "file_path::ClassName.method_name"
152
+ enclosing_class_qual: str | None = None
153
+ if "::" in source_qual:
154
+ after_sep = source_qual.split("::", 1)[1]
155
+ if "." in after_sep:
156
+ class_part = after_sep.split(".")[0]
157
+ prefix = source_qual.split("::")[0]
158
+ enclosing_class_qual = f"{prefix}::{class_part}"
159
+ else:
160
+ enclosing_class_qual = source_qual
161
+
162
+ if not enclosing_class_qual:
163
+ continue
164
+
165
+ # Look up receiver in field_map for this class
166
+ injected_type = field_map.get((enclosing_class_qual, receiver))
167
+ if not injected_type:
168
+ continue
169
+
170
+ # Resolve to concrete implementation if unique
171
+ impls = implementors.get(injected_type, [])
172
+ if len(impls) == 1:
173
+ concrete_class = impls[0].split("::")[-1]
174
+ fallback = f"{impls[0]}.{method_name}"
175
+ new_target = method_to_qual.get((concrete_class, method_name)) or fallback
176
+ else:
177
+ type_bare = injected_type.rsplit(".", 1)[-1]
178
+ fallback = f"{injected_type}.{method_name}"
179
+ new_target = method_to_qual.get((type_bare, method_name)) or fallback
180
+
181
+ extra["spring_resolved"] = True
182
+ extra["injected_type"] = injected_type
183
+ new_extra = json.dumps(extra)
184
+
185
+ conn.execute(
186
+ "UPDATE edges SET target_qualified = ?, extra = ? WHERE id = ?",
187
+ (new_target, new_extra, row["id"]),
188
+ )
189
+ resolved += 1
190
+ logger.debug(
191
+ "Spring resolved: %s → %s (was %s, receiver=%s)",
192
+ source_qual, new_target, method_name, receiver,
193
+ )
194
+
195
+ if resolved:
196
+ conn.commit()
197
+
198
+ logger.info("Spring DI resolver: resolved %d CALLS edges in %d Java files",
199
+ resolved, len(java_files))
200
+ return {"files_indexed": len(java_files), "calls_resolved": resolved}
@@ -0,0 +1,199 @@
1
+ """Post-build Temporal workflow/activity call resolver.
2
+
3
+ After tree-sitter parsing, Java CALLS edges whose target is a bare method
4
+ name carry ``extra.receiver`` naming the local variable called on. This
5
+ module resolves those receivers through the TEMPORAL_STUB map to their
6
+ declared Temporal interface type, then optionally to the unique concrete
7
+ implementation via INHERITS edges.
8
+
9
+ Resolution chain:
10
+ receiver variable name
11
+ → temporal stub field type (from TEMPORAL_STUB.extra.field_name)
12
+ → concrete implementation (from INHERITS, when unique)
13
+
14
+ Only Java files are processed. TEMPORAL_STUB edges whose target is not a
15
+ node with ``temporal_role`` in extra are silently skipped (they may be
16
+ non-Temporal types that happen to end in 'Activity'/'Workflow').
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import logging
23
+ from typing import TYPE_CHECKING
24
+
25
+ if TYPE_CHECKING:
26
+ from .graph import GraphStore
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ def resolve_temporal_calls(store: GraphStore) -> dict:
32
+ """Resolve Java CALLS edges whose receiver is a Temporal activity/workflow stub.
33
+
34
+ Safe to call multiple times — already-resolved edges (with
35
+ ``extra.temporal_resolved``) are skipped.
36
+
37
+ Returns a dict with resolution counts for telemetry.
38
+ """
39
+ conn = store._conn
40
+
41
+ java_files: set[str] = {
42
+ row["file_path"]
43
+ for row in conn.execute(
44
+ "SELECT DISTINCT file_path FROM nodes WHERE language = 'java'"
45
+ ).fetchall()
46
+ }
47
+ if not java_files:
48
+ return {"files_indexed": 0, "calls_resolved": 0}
49
+
50
+ # -----------------------------------------------------------------------
51
+ # Collect Temporal interface nodes: bare name → qualified_name
52
+ # (nodes whose extra contains temporal_role = workflow_interface|activity_interface)
53
+ # -----------------------------------------------------------------------
54
+ temporal_interfaces: dict[str, str] = {} # bare_name → qualified_name
55
+ for row in conn.execute(
56
+ "SELECT name, qualified_name, extra FROM nodes "
57
+ "WHERE language = 'java' AND extra IS NOT NULL AND extra LIKE '%temporal_role%'"
58
+ ).fetchall():
59
+ try:
60
+ ex = json.loads(row["extra"] or "{}")
61
+ except (json.JSONDecodeError, TypeError):
62
+ ex = {}
63
+ if ex.get("temporal_role") in ("workflow_interface", "activity_interface"):
64
+ temporal_interfaces[row["name"]] = row["qualified_name"]
65
+
66
+ if not temporal_interfaces:
67
+ logger.info("Temporal resolver: no Workflow/ActivityInterface nodes, skipping")
68
+ return {"files_indexed": len(java_files), "calls_resolved": 0}
69
+
70
+ # -----------------------------------------------------------------------
71
+ # Build field_map: (source_qualified_class, field_name) → interface_type
72
+ # from TEMPORAL_STUB edges whose target is a known Temporal interface
73
+ # -----------------------------------------------------------------------
74
+ field_map: dict[tuple[str, str], str] = {}
75
+ for row in conn.execute(
76
+ "SELECT source_qualified, target_qualified, extra FROM edges WHERE kind = 'TEMPORAL_STUB'"
77
+ ).fetchall():
78
+ bare_target = row["target_qualified"]
79
+ if bare_target not in temporal_interfaces:
80
+ continue
81
+ try:
82
+ extra = json.loads(row["extra"] or "{}")
83
+ except (json.JSONDecodeError, TypeError):
84
+ extra = {}
85
+ fname = extra.get("field_name")
86
+ if not fname:
87
+ continue
88
+ field_map[(row["source_qualified"], fname)] = bare_target
89
+
90
+ if not field_map:
91
+ logger.info("Temporal resolver: no TEMPORAL_STUB edges found, skipping")
92
+ return {"files_indexed": len(java_files), "calls_resolved": 0}
93
+
94
+ # -----------------------------------------------------------------------
95
+ # method_to_qual: (class_name, method_name) → full qualified_name
96
+ # -----------------------------------------------------------------------
97
+ method_to_qual: dict[tuple[str, str], str] = {}
98
+ for row in conn.execute(
99
+ "SELECT name, qualified_name, parent_name FROM nodes "
100
+ "WHERE kind IN ('Function', 'Test') AND language = 'java' AND parent_name IS NOT NULL"
101
+ ).fetchall():
102
+ method_to_qual[(row["parent_name"], row["name"])] = row["qualified_name"]
103
+
104
+ # -----------------------------------------------------------------------
105
+ # implementors: bare interface name → list of implementing class quals
106
+ # -----------------------------------------------------------------------
107
+ implementors: dict[str, list[str]] = {}
108
+ for row in conn.execute(
109
+ "SELECT source_qualified, target_qualified FROM edges WHERE kind = 'INHERITS'"
110
+ ).fetchall():
111
+ iface = row["target_qualified"]
112
+ impl = row["source_qualified"]
113
+ if any(impl.startswith(f) for f in java_files) or "::" in impl:
114
+ implementors.setdefault(iface, []).append(impl)
115
+
116
+ # -----------------------------------------------------------------------
117
+ # Resolve CALLS edges
118
+ # -----------------------------------------------------------------------
119
+ calls_rows = conn.execute(
120
+ "SELECT id, source_qualified, target_qualified, extra, file_path "
121
+ "FROM edges WHERE kind = 'CALLS'"
122
+ ).fetchall()
123
+
124
+ resolved = 0
125
+
126
+ for row in calls_rows:
127
+ if row["file_path"] not in java_files:
128
+ continue
129
+
130
+ try:
131
+ extra = json.loads(row["extra"] or "{}")
132
+ except (json.JSONDecodeError, TypeError):
133
+ extra = {}
134
+
135
+ receiver = extra.get("receiver")
136
+ if not receiver:
137
+ continue
138
+
139
+ if extra.get("temporal_resolved") or extra.get("spring_resolved"):
140
+ continue
141
+
142
+ raw_target = row["target_qualified"]
143
+ if "::" in raw_target:
144
+ after = raw_target.split("::", 1)[1]
145
+ method_name = after.split(".")[-1] if "." in after else after
146
+ else:
147
+ method_name = raw_target
148
+
149
+ source_qual = row["source_qualified"]
150
+
151
+ # Derive enclosing class qualified name
152
+ enclosing_class_qual: str | None = None
153
+ if "::" in source_qual:
154
+ after_sep = source_qual.split("::", 1)[1]
155
+ if "." in after_sep:
156
+ class_part = after_sep.split(".")[0]
157
+ prefix = source_qual.split("::")[0]
158
+ enclosing_class_qual = f"{prefix}::{class_part}"
159
+ else:
160
+ enclosing_class_qual = source_qual
161
+
162
+ if not enclosing_class_qual:
163
+ continue
164
+
165
+ interface_bare = field_map.get((enclosing_class_qual, receiver))
166
+ if not interface_bare:
167
+ continue
168
+
169
+ interface_qual = temporal_interfaces.get(interface_bare, interface_bare)
170
+
171
+ impls = implementors.get(interface_qual, [])
172
+ if len(impls) == 1:
173
+ concrete_class = impls[0].split("::")[-1]
174
+ fallback = f"{impls[0]}.{method_name}"
175
+ new_target = method_to_qual.get((concrete_class, method_name)) or fallback
176
+ else:
177
+ fallback = f"{interface_qual}.{method_name}"
178
+ new_target = method_to_qual.get((interface_bare, method_name)) or fallback
179
+
180
+ extra["temporal_resolved"] = True
181
+ extra["temporal_interface"] = interface_bare
182
+ new_extra = json.dumps(extra)
183
+
184
+ conn.execute(
185
+ "UPDATE edges SET target_qualified = ?, extra = ? WHERE id = ?",
186
+ (new_target, new_extra, row["id"]),
187
+ )
188
+ resolved += 1
189
+ logger.debug(
190
+ "Temporal resolved: %s → %s (receiver=%s, interface=%s)",
191
+ source_qual, new_target, receiver, interface_bare,
192
+ )
193
+
194
+ if resolved:
195
+ conn.commit()
196
+
197
+ logger.info("Temporal resolver: resolved %d CALLS edges in %d Java files",
198
+ resolved, len(java_files))
199
+ return {"files_indexed": len(java_files), "calls_resolved": resolved}
@@ -0,0 +1,125 @@
1
+ """Token reduction benchmark -- measures graph query efficiency vs naive file reading."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import sqlite3
7
+ from pathlib import Path
8
+ from typing import Any
9
+
10
+ from .graph import GraphStore
11
+ from .search import hybrid_search
12
+
13
+ logger = logging.getLogger(__name__)
14
+
15
+ # Sample questions for benchmarking
16
+ _SAMPLE_QUESTIONS = [
17
+ "how does authentication work",
18
+ "what is the main entry point",
19
+ "how are database connections managed",
20
+ "what error handling patterns are used",
21
+ "how do tests verify core functionality",
22
+ ]
23
+
24
+
25
+ def estimate_tokens(text: str) -> int:
26
+ """Rough token estimate: ~4 chars per token."""
27
+ return max(1, len(text) // 4)
28
+
29
+
30
+ def compute_naive_tokens(repo_root: Path) -> int:
31
+ """Count tokens in all parseable source files."""
32
+ total = 0
33
+ exts = (
34
+ ".py", ".js", ".ts", ".go", ".rs", ".java",
35
+ ".c", ".cpp", ".rb", ".php", ".swift", ".kt",
36
+ )
37
+ for ext in exts:
38
+ for f in repo_root.rglob(f"*{ext}"):
39
+ try:
40
+ total += estimate_tokens(
41
+ f.read_text(errors="replace")
42
+ )
43
+ except OSError:
44
+ continue
45
+ return total
46
+
47
+
48
+ def run_token_benchmark(
49
+ store: GraphStore,
50
+ repo_root: Path,
51
+ questions: list[str] | None = None,
52
+ ) -> dict[str, Any]:
53
+ """Run token reduction benchmark.
54
+
55
+ Compares naive full-corpus token cost vs graph query token
56
+ cost for a set of sample questions.
57
+
58
+ The default sample questions are natural language and require semantic
59
+ search to match. If no embeddings are present in the graph, ``hybrid_search``
60
+ falls back to FTS5/LIKE matching on node names, which produces no hits for
61
+ questions like "how does authentication work" — every per-question ratio
62
+ becomes 0 and the benchmark silently appears to fail. We log a clear
63
+ warning when that is the case so callers know to run ``embed_graph`` first
64
+ (or to pass keyword-matching questions).
65
+ """
66
+ if questions is None:
67
+ questions = _SAMPLE_QUESTIONS
68
+
69
+ using_default_questions = questions is _SAMPLE_QUESTIONS
70
+ try:
71
+ cur = store._conn.execute("SELECT count(*) FROM embeddings")
72
+ embedding_count = cur.fetchone()[0]
73
+ except sqlite3.OperationalError:
74
+ embedding_count = 0
75
+ if embedding_count == 0 and using_default_questions:
76
+ logger.warning(
77
+ "No embeddings found in this graph. The default sample questions "
78
+ "are natural language and will not match via FTS5/LIKE alone — "
79
+ "every reduction ratio is likely to be 0. Run "
80
+ "`code-review-graph embed` first, or pass keyword-matching `questions=`."
81
+ )
82
+
83
+ naive_total = compute_naive_tokens(repo_root)
84
+
85
+ results = []
86
+ for q in questions:
87
+ search_results = hybrid_search(store, q, limit=5)
88
+ # Simulate graph context: search results + neighbors
89
+ graph_tokens = 0
90
+ for r in search_results:
91
+ graph_tokens += estimate_tokens(str(r))
92
+ # Add approximate neighbor context
93
+ qn = r.get("qualified_name", "")
94
+ edges = store.get_edges_by_source(qn)[:5]
95
+ for e in edges:
96
+ graph_tokens += estimate_tokens(str(e))
97
+
98
+ if graph_tokens > 0:
99
+ ratio = naive_total / graph_tokens
100
+ else:
101
+ ratio = 0
102
+ results.append({
103
+ "question": q,
104
+ "naive_tokens": naive_total,
105
+ "graph_tokens": graph_tokens,
106
+ "reduction_ratio": round(ratio, 1),
107
+ })
108
+
109
+ if results:
110
+ total = sum(
111
+ r["reduction_ratio"] for r in results # type: ignore[misc]
112
+ )
113
+ avg_ratio = float(total) / len(results) # type: ignore[arg-type]
114
+ else:
115
+ avg_ratio = 0.0
116
+
117
+ return {
118
+ "naive_corpus_tokens": naive_total,
119
+ "per_question": results,
120
+ "average_reduction_ratio": round(avg_ratio, 1),
121
+ "summary": (
122
+ f"Graph queries use ~{avg_ratio:.0f}x fewer tokens "
123
+ f"than reading all source files"
124
+ ),
125
+ }
@@ -0,0 +1,156 @@
1
+ """MCP tool definitions for the Code Review Graph server.
2
+
3
+ Exposes 27 tools:
4
+ 1. build_or_update_graph - full or incremental build
5
+ 2. get_impact_radius - blast radius from changed files
6
+ 3. query_graph - predefined graph queries
7
+ 4. get_review_context - focused subgraph + review prompt
8
+ 5. semantic_search_nodes - keyword + vector search across nodes
9
+ 6. list_graph_stats - aggregate statistics
10
+ 7. embed_graph - compute vector embeddings for semantic search
11
+ 8. get_docs_section - token-optimized documentation retrieval
12
+ 9. find_large_functions - find oversized functions/classes by line count
13
+ 10. list_flows - list execution flows sorted by criticality
14
+ 11. get_flow - get details of a single execution flow
15
+ 12. get_affected_flows - find flows affected by changed files
16
+ 13. list_communities - list detected code communities
17
+ 14. get_community - get details of a single community
18
+ 15. get_architecture_overview - architecture overview from community structure
19
+ 16. detect_changes - risk-scored change impact analysis for code review
20
+ 17. refactor_tool - unified refactoring (rename preview, dead code, suggestions)
21
+ 18. apply_refactor_tool - apply a previously previewed refactoring
22
+ 19. generate_wiki - generate markdown wiki from community structure
23
+ 20. get_wiki_page - retrieve a specific wiki page
24
+ 21. list_repos - list registered repositories
25
+ 22. cross_repo_search - search across all registered repositories
26
+ 23. get_hub_nodes - find most connected nodes (architectural hotspots)
27
+ 24. get_bridge_nodes - find architectural chokepoints (betweenness centrality)
28
+ 25. get_knowledge_gaps - identify structural weaknesses
29
+ 26. get_surprising_connections - find unexpected architectural coupling
30
+ 27. get_suggested_questions - auto-generated review questions from graph analysis
31
+ 28. traverse_graph - BFS/DFS traversal from best-matching node
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ # Re-export names that external code may patch via "code_review_graph.tools.*"
37
+ from ..changes import parse_diff_ranges as parse_diff_ranges
38
+ from ..changes import parse_git_diff_ranges as parse_git_diff_ranges
39
+ from ..changes import parse_svn_diff_ranges as parse_svn_diff_ranges
40
+ from ..incremental import (
41
+ get_changed_files as get_changed_files,
42
+ )
43
+ from ..incremental import (
44
+ get_staged_and_unstaged as get_staged_and_unstaged,
45
+ )
46
+
47
+ # -- _common ----------------------------------------------------------------
48
+ from ._common import (
49
+ _BUILTIN_CALL_NAMES,
50
+ _get_store,
51
+ _validate_repo_root,
52
+ )
53
+
54
+ # -- analysis_tools ---------------------------------------------------------
55
+ from .analysis_tools import (
56
+ get_bridge_nodes_func,
57
+ get_hub_nodes_func,
58
+ get_knowledge_gaps_func,
59
+ get_suggested_questions_func,
60
+ get_surprising_connections_func,
61
+ )
62
+
63
+ # -- build ------------------------------------------------------------------
64
+ from .build import build_or_update_graph, run_postprocess
65
+
66
+ # -- community_tools --------------------------------------------------------
67
+ from .community_tools import (
68
+ get_architecture_overview_func,
69
+ get_community_func,
70
+ list_communities_func,
71
+ )
72
+
73
+ # -- context ----------------------------------------------------------------
74
+ from .context import get_minimal_context
75
+
76
+ # -- docs -------------------------------------------------------------------
77
+ from .docs import embed_graph, generate_wiki_func, get_docs_section, get_wiki_page_func
78
+
79
+ # -- flows_tools ------------------------------------------------------------
80
+ from .flows_tools import get_flow, list_flows
81
+
82
+ # -- query ------------------------------------------------------------------
83
+ from .query import (
84
+ find_large_functions,
85
+ get_impact_radius,
86
+ list_graph_stats,
87
+ query_graph,
88
+ semantic_search_nodes,
89
+ traverse_graph_func,
90
+ )
91
+
92
+ # -- refactor_tools ---------------------------------------------------------
93
+ from .refactor_tools import apply_refactor_func, refactor_func
94
+
95
+ # -- registry_tools ---------------------------------------------------------
96
+ from .registry_tools import cross_repo_search_func, list_repos_func
97
+
98
+ # -- review -----------------------------------------------------------------
99
+ from .review import (
100
+ detect_changes_func,
101
+ get_affected_flows_func,
102
+ get_review_context,
103
+ )
104
+
105
+ __all__ = [
106
+ # _common
107
+ "_BUILTIN_CALL_NAMES",
108
+ "_get_store",
109
+ "_validate_repo_root",
110
+ # build
111
+ "build_or_update_graph",
112
+ "run_postprocess",
113
+ # context
114
+ "get_minimal_context",
115
+ # community_tools
116
+ "get_architecture_overview_func",
117
+ "get_community_func",
118
+ "list_communities_func",
119
+ # docs
120
+ "embed_graph",
121
+ "generate_wiki_func",
122
+ "get_docs_section",
123
+ "get_wiki_page_func",
124
+ # flows_tools
125
+ "get_flow",
126
+ "list_flows",
127
+ # query
128
+ "find_large_functions",
129
+ "get_impact_radius",
130
+ "list_graph_stats",
131
+ "query_graph",
132
+ "semantic_search_nodes",
133
+ "traverse_graph_func",
134
+ # refactor_tools
135
+ "apply_refactor_func",
136
+ "refactor_func",
137
+ # registry_tools
138
+ "cross_repo_search_func",
139
+ "list_repos_func",
140
+ # review
141
+ "detect_changes_func",
142
+ "get_affected_flows_func",
143
+ "get_review_context",
144
+ # analysis_tools
145
+ "get_bridge_nodes_func",
146
+ "get_hub_nodes_func",
147
+ "get_knowledge_gaps_func",
148
+ "get_suggested_questions_func",
149
+ "get_surprising_connections_func",
150
+ # re-exported for backward compat (used in test patches)
151
+ "get_changed_files",
152
+ "get_staged_and_unstaged",
153
+ "parse_git_diff_ranges",
154
+ "parse_svn_diff_ranges",
155
+ "parse_diff_ranges",
156
+ ]