code-review-graph-codeblackwell 2.3.6.post1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- code_review_graph/__init__.py +20 -0
- code_review_graph/__main__.py +4 -0
- code_review_graph/analysis.py +410 -0
- code_review_graph/changes.py +409 -0
- code_review_graph/cli.py +1255 -0
- code_review_graph/communities.py +874 -0
- code_review_graph/constants.py +23 -0
- code_review_graph/context_savings.py +317 -0
- code_review_graph/custom_languages.py +322 -0
- code_review_graph/daemon.py +1009 -0
- code_review_graph/daemon_cli.py +320 -0
- code_review_graph/docs/LLM-OPTIMIZED-REFERENCE.md +71 -0
- code_review_graph/embeddings.py +1006 -0
- code_review_graph/enrich.py +303 -0
- code_review_graph/eval/__init__.py +33 -0
- code_review_graph/eval/benchmarks/__init__.py +1 -0
- code_review_graph/eval/benchmarks/agent_baseline.py +193 -0
- code_review_graph/eval/benchmarks/build_performance.py +60 -0
- code_review_graph/eval/benchmarks/flow_completeness.py +36 -0
- code_review_graph/eval/benchmarks/impact_accuracy.py +220 -0
- code_review_graph/eval/benchmarks/multi_hop_retrieval.py +125 -0
- code_review_graph/eval/benchmarks/search_quality.py +59 -0
- code_review_graph/eval/benchmarks/token_efficiency.py +143 -0
- code_review_graph/eval/configs/code-review-graph.yaml +50 -0
- code_review_graph/eval/configs/express.yaml +45 -0
- code_review_graph/eval/configs/fastapi.yaml +48 -0
- code_review_graph/eval/configs/flask.yaml +50 -0
- code_review_graph/eval/configs/gin.yaml +51 -0
- code_review_graph/eval/configs/httpx.yaml +48 -0
- code_review_graph/eval/reporter.py +301 -0
- code_review_graph/eval/runner.py +211 -0
- code_review_graph/eval/scorer.py +85 -0
- code_review_graph/eval/token_benchmark.py +182 -0
- code_review_graph/exports.py +409 -0
- code_review_graph/flows.py +698 -0
- code_review_graph/graph.py +1427 -0
- code_review_graph/graph_diff.py +122 -0
- code_review_graph/hints.py +384 -0
- code_review_graph/incremental.py +1245 -0
- code_review_graph/jedi_resolver.py +303 -0
- code_review_graph/main.py +1079 -0
- code_review_graph/memory.py +142 -0
- code_review_graph/migrations.py +284 -0
- code_review_graph/parser.py +6957 -0
- code_review_graph/postprocessing.py +134 -0
- code_review_graph/prompts.py +159 -0
- code_review_graph/refactor.py +852 -0
- code_review_graph/registry.py +319 -0
- code_review_graph/rescript_resolver.py +206 -0
- code_review_graph/search.py +447 -0
- code_review_graph/skills.py +1481 -0
- code_review_graph/spring_resolver.py +200 -0
- code_review_graph/temporal_resolver.py +199 -0
- code_review_graph/token_benchmark.py +125 -0
- code_review_graph/tools/__init__.py +156 -0
- code_review_graph/tools/_common.py +176 -0
- code_review_graph/tools/analysis_tools.py +184 -0
- code_review_graph/tools/build.py +541 -0
- code_review_graph/tools/community_tools.py +246 -0
- code_review_graph/tools/context.py +152 -0
- code_review_graph/tools/docs.py +274 -0
- code_review_graph/tools/flows_tools.py +176 -0
- code_review_graph/tools/query.py +692 -0
- code_review_graph/tools/refactor_tools.py +168 -0
- code_review_graph/tools/registry_tools.py +125 -0
- code_review_graph/tools/review.py +477 -0
- code_review_graph/tsconfig_resolver.py +257 -0
- code_review_graph/visualization.py +2184 -0
- code_review_graph/wiki.py +305 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/METADATA +718 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/RECORD +74 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/WHEEL +4 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/entry_points.txt +3 -0
- code_review_graph_codeblackwell-2.3.6.post1.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,200 @@
|
|
|
1
|
+
"""Post-build Spring DI call resolver.
|
|
2
|
+
|
|
3
|
+
After tree-sitter parsing, Java CALLS edges whose target is a bare method
|
|
4
|
+
name (e.g. ``calculate``) carry ``extra.receiver`` naming the local variable
|
|
5
|
+
that was called on (e.g. ``invoiceCalculationService``). This module
|
|
6
|
+
resolves those receivers through the INJECTS map to their declared type, then
|
|
7
|
+
optionally to the unique concrete implementation via INHERITS edges.
|
|
8
|
+
|
|
9
|
+
Resolution chain:
|
|
10
|
+
receiver variable name
|
|
11
|
+
→ injected interface/class (from INJECTS.extra.field_name)
|
|
12
|
+
→ concrete implementation (from INHERITS, when unique)
|
|
13
|
+
|
|
14
|
+
Only Java files are processed. Edges that are already qualified (contain
|
|
15
|
+
``::``) or have no ``receiver`` extra key are skipped.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import json
|
|
21
|
+
import logging
|
|
22
|
+
from typing import TYPE_CHECKING
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from .graph import GraphStore
|
|
26
|
+
|
|
27
|
+
logger = logging.getLogger(__name__)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def resolve_spring_di_calls(store: GraphStore) -> dict:
|
|
31
|
+
"""Resolve Java CALLS edges whose receiver is a Spring-injected field.
|
|
32
|
+
|
|
33
|
+
Safe to call multiple times — already-resolved edges (targets containing
|
|
34
|
+
``::``) are skipped.
|
|
35
|
+
|
|
36
|
+
Returns a dict with resolution counts for telemetry.
|
|
37
|
+
"""
|
|
38
|
+
conn = store._conn
|
|
39
|
+
|
|
40
|
+
# Only process Java files
|
|
41
|
+
java_files: set[str] = {
|
|
42
|
+
row["file_path"]
|
|
43
|
+
for row in conn.execute(
|
|
44
|
+
"SELECT DISTINCT file_path FROM nodes WHERE language = 'java'"
|
|
45
|
+
).fetchall()
|
|
46
|
+
}
|
|
47
|
+
if not java_files:
|
|
48
|
+
return {"files_indexed": 0, "calls_resolved": 0}
|
|
49
|
+
|
|
50
|
+
# -----------------------------------------------------------------------
|
|
51
|
+
# Build field_map: (source_qualified_class, field_name) → injected_type
|
|
52
|
+
# from INJECTS edges that carry extra.field_name
|
|
53
|
+
# -----------------------------------------------------------------------
|
|
54
|
+
field_map: dict[tuple[str, str], str] = {}
|
|
55
|
+
injects_rows = conn.execute(
|
|
56
|
+
"SELECT source_qualified, target_qualified, extra FROM edges WHERE kind = 'INJECTS'"
|
|
57
|
+
).fetchall()
|
|
58
|
+
for row in injects_rows:
|
|
59
|
+
try:
|
|
60
|
+
extra = json.loads(row["extra"] or "{}")
|
|
61
|
+
except (json.JSONDecodeError, TypeError):
|
|
62
|
+
extra = {}
|
|
63
|
+
fname = extra.get("field_name")
|
|
64
|
+
if not fname:
|
|
65
|
+
continue
|
|
66
|
+
# source_qualified is the full class qualified name
|
|
67
|
+
class_qual = row["source_qualified"]
|
|
68
|
+
field_map[(class_qual, fname)] = row["target_qualified"]
|
|
69
|
+
|
|
70
|
+
if not field_map:
|
|
71
|
+
logger.info("Spring resolver: no INJECTS edges with field_name found, skipping")
|
|
72
|
+
return {"files_indexed": len(java_files), "calls_resolved": 0}
|
|
73
|
+
|
|
74
|
+
# -----------------------------------------------------------------------
|
|
75
|
+
# Build class_name → qualified_name lookup from nodes.
|
|
76
|
+
# Keyed by bare class name; value is the full "file_path::ClassName" form
|
|
77
|
+
# that callers_of uses for its target_qualified exact-match lookup.
|
|
78
|
+
# When a name appears in multiple files (e.g. same interface in several
|
|
79
|
+
# services), we keep the entry with the shortest path as a tiebreaker —
|
|
80
|
+
# this is overridden by the concrete-implementation lookup below.
|
|
81
|
+
# -----------------------------------------------------------------------
|
|
82
|
+
name_to_qual: dict[str, str] = {}
|
|
83
|
+
for row in conn.execute(
|
|
84
|
+
"SELECT name, qualified_name FROM nodes WHERE kind = 'Class' AND language = 'java'"
|
|
85
|
+
).fetchall():
|
|
86
|
+
bare = row["name"]
|
|
87
|
+
qual = row["qualified_name"]
|
|
88
|
+
if bare not in name_to_qual or len(qual) < len(name_to_qual[bare]):
|
|
89
|
+
name_to_qual[bare] = qual
|
|
90
|
+
|
|
91
|
+
# Also index Function nodes so we can build "file::Class.method" targets.
|
|
92
|
+
# key: (class_name, method_name) → full qualified_name of the method node
|
|
93
|
+
method_to_qual: dict[tuple[str, str], str] = {}
|
|
94
|
+
for row in conn.execute(
|
|
95
|
+
"SELECT name, qualified_name, parent_name FROM nodes "
|
|
96
|
+
"WHERE kind IN ('Function', 'Test') AND language = 'java' AND parent_name IS NOT NULL"
|
|
97
|
+
).fetchall():
|
|
98
|
+
method_to_qual[(row["parent_name"], row["name"])] = row["qualified_name"]
|
|
99
|
+
|
|
100
|
+
# -----------------------------------------------------------------------
|
|
101
|
+
# Build implementors: bare interface name → list of implementing class quals
|
|
102
|
+
# from INHERITS edges (Java uses INHERITS for both extends and implements)
|
|
103
|
+
# -----------------------------------------------------------------------
|
|
104
|
+
implementors: dict[str, list[str]] = {}
|
|
105
|
+
for row in conn.execute(
|
|
106
|
+
"SELECT source_qualified, target_qualified FROM edges WHERE kind = 'INHERITS'"
|
|
107
|
+
).fetchall():
|
|
108
|
+
iface = row["target_qualified"]
|
|
109
|
+
impl = row["source_qualified"]
|
|
110
|
+
if any(impl.startswith(f) for f in java_files) or "::" in impl:
|
|
111
|
+
implementors.setdefault(iface, []).append(impl)
|
|
112
|
+
|
|
113
|
+
# -----------------------------------------------------------------------
|
|
114
|
+
# Resolve CALLS edges
|
|
115
|
+
# -----------------------------------------------------------------------
|
|
116
|
+
calls_rows = conn.execute(
|
|
117
|
+
"SELECT id, source_qualified, target_qualified, extra, file_path "
|
|
118
|
+
"FROM edges WHERE kind = 'CALLS'"
|
|
119
|
+
).fetchall()
|
|
120
|
+
|
|
121
|
+
resolved = 0
|
|
122
|
+
|
|
123
|
+
for row in calls_rows:
|
|
124
|
+
if row["file_path"] not in java_files:
|
|
125
|
+
continue
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
extra = json.loads(row["extra"] or "{}")
|
|
129
|
+
except (json.JSONDecodeError, TypeError):
|
|
130
|
+
extra = {}
|
|
131
|
+
|
|
132
|
+
receiver = extra.get("receiver")
|
|
133
|
+
if not receiver:
|
|
134
|
+
continue
|
|
135
|
+
|
|
136
|
+
# Skip edges already spring-resolved in a previous pass
|
|
137
|
+
if extra.get("spring_resolved"):
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Strip any prior (possibly wrong) qualification — we have a receiver so
|
|
141
|
+
# we can do a better resolution. E.g. "file::ClassName.method" → "method"
|
|
142
|
+
raw_target = row["target_qualified"]
|
|
143
|
+
if "::" in raw_target:
|
|
144
|
+
after = raw_target.split("::", 1)[1]
|
|
145
|
+
method_name = after.split(".")[-1] if "." in after else after
|
|
146
|
+
else:
|
|
147
|
+
method_name = raw_target
|
|
148
|
+
source_qual = row["source_qualified"]
|
|
149
|
+
|
|
150
|
+
# Derive the enclosing class qualified name from source
|
|
151
|
+
# source_qual format: "file_path::ClassName.method_name"
|
|
152
|
+
enclosing_class_qual: str | None = None
|
|
153
|
+
if "::" in source_qual:
|
|
154
|
+
after_sep = source_qual.split("::", 1)[1]
|
|
155
|
+
if "." in after_sep:
|
|
156
|
+
class_part = after_sep.split(".")[0]
|
|
157
|
+
prefix = source_qual.split("::")[0]
|
|
158
|
+
enclosing_class_qual = f"{prefix}::{class_part}"
|
|
159
|
+
else:
|
|
160
|
+
enclosing_class_qual = source_qual
|
|
161
|
+
|
|
162
|
+
if not enclosing_class_qual:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
# Look up receiver in field_map for this class
|
|
166
|
+
injected_type = field_map.get((enclosing_class_qual, receiver))
|
|
167
|
+
if not injected_type:
|
|
168
|
+
continue
|
|
169
|
+
|
|
170
|
+
# Resolve to concrete implementation if unique
|
|
171
|
+
impls = implementors.get(injected_type, [])
|
|
172
|
+
if len(impls) == 1:
|
|
173
|
+
concrete_class = impls[0].split("::")[-1]
|
|
174
|
+
fallback = f"{impls[0]}.{method_name}"
|
|
175
|
+
new_target = method_to_qual.get((concrete_class, method_name)) or fallback
|
|
176
|
+
else:
|
|
177
|
+
type_bare = injected_type.rsplit(".", 1)[-1]
|
|
178
|
+
fallback = f"{injected_type}.{method_name}"
|
|
179
|
+
new_target = method_to_qual.get((type_bare, method_name)) or fallback
|
|
180
|
+
|
|
181
|
+
extra["spring_resolved"] = True
|
|
182
|
+
extra["injected_type"] = injected_type
|
|
183
|
+
new_extra = json.dumps(extra)
|
|
184
|
+
|
|
185
|
+
conn.execute(
|
|
186
|
+
"UPDATE edges SET target_qualified = ?, extra = ? WHERE id = ?",
|
|
187
|
+
(new_target, new_extra, row["id"]),
|
|
188
|
+
)
|
|
189
|
+
resolved += 1
|
|
190
|
+
logger.debug(
|
|
191
|
+
"Spring resolved: %s → %s (was %s, receiver=%s)",
|
|
192
|
+
source_qual, new_target, method_name, receiver,
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if resolved:
|
|
196
|
+
conn.commit()
|
|
197
|
+
|
|
198
|
+
logger.info("Spring DI resolver: resolved %d CALLS edges in %d Java files",
|
|
199
|
+
resolved, len(java_files))
|
|
200
|
+
return {"files_indexed": len(java_files), "calls_resolved": resolved}
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""Post-build Temporal workflow/activity call resolver.
|
|
2
|
+
|
|
3
|
+
After tree-sitter parsing, Java CALLS edges whose target is a bare method
|
|
4
|
+
name carry ``extra.receiver`` naming the local variable called on. This
|
|
5
|
+
module resolves those receivers through the TEMPORAL_STUB map to their
|
|
6
|
+
declared Temporal interface type, then optionally to the unique concrete
|
|
7
|
+
implementation via INHERITS edges.
|
|
8
|
+
|
|
9
|
+
Resolution chain:
|
|
10
|
+
receiver variable name
|
|
11
|
+
→ temporal stub field type (from TEMPORAL_STUB.extra.field_name)
|
|
12
|
+
→ concrete implementation (from INHERITS, when unique)
|
|
13
|
+
|
|
14
|
+
Only Java files are processed. TEMPORAL_STUB edges whose target is not a
|
|
15
|
+
node with ``temporal_role`` in extra are silently skipped (they may be
|
|
16
|
+
non-Temporal types that happen to end in 'Activity'/'Workflow').
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
import json
|
|
22
|
+
import logging
|
|
23
|
+
from typing import TYPE_CHECKING
|
|
24
|
+
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from .graph import GraphStore
|
|
27
|
+
|
|
28
|
+
logger = logging.getLogger(__name__)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def resolve_temporal_calls(store: GraphStore) -> dict:
|
|
32
|
+
"""Resolve Java CALLS edges whose receiver is a Temporal activity/workflow stub.
|
|
33
|
+
|
|
34
|
+
Safe to call multiple times — already-resolved edges (with
|
|
35
|
+
``extra.temporal_resolved``) are skipped.
|
|
36
|
+
|
|
37
|
+
Returns a dict with resolution counts for telemetry.
|
|
38
|
+
"""
|
|
39
|
+
conn = store._conn
|
|
40
|
+
|
|
41
|
+
java_files: set[str] = {
|
|
42
|
+
row["file_path"]
|
|
43
|
+
for row in conn.execute(
|
|
44
|
+
"SELECT DISTINCT file_path FROM nodes WHERE language = 'java'"
|
|
45
|
+
).fetchall()
|
|
46
|
+
}
|
|
47
|
+
if not java_files:
|
|
48
|
+
return {"files_indexed": 0, "calls_resolved": 0}
|
|
49
|
+
|
|
50
|
+
# -----------------------------------------------------------------------
|
|
51
|
+
# Collect Temporal interface nodes: bare name → qualified_name
|
|
52
|
+
# (nodes whose extra contains temporal_role = workflow_interface|activity_interface)
|
|
53
|
+
# -----------------------------------------------------------------------
|
|
54
|
+
temporal_interfaces: dict[str, str] = {} # bare_name → qualified_name
|
|
55
|
+
for row in conn.execute(
|
|
56
|
+
"SELECT name, qualified_name, extra FROM nodes "
|
|
57
|
+
"WHERE language = 'java' AND extra IS NOT NULL AND extra LIKE '%temporal_role%'"
|
|
58
|
+
).fetchall():
|
|
59
|
+
try:
|
|
60
|
+
ex = json.loads(row["extra"] or "{}")
|
|
61
|
+
except (json.JSONDecodeError, TypeError):
|
|
62
|
+
ex = {}
|
|
63
|
+
if ex.get("temporal_role") in ("workflow_interface", "activity_interface"):
|
|
64
|
+
temporal_interfaces[row["name"]] = row["qualified_name"]
|
|
65
|
+
|
|
66
|
+
if not temporal_interfaces:
|
|
67
|
+
logger.info("Temporal resolver: no Workflow/ActivityInterface nodes, skipping")
|
|
68
|
+
return {"files_indexed": len(java_files), "calls_resolved": 0}
|
|
69
|
+
|
|
70
|
+
# -----------------------------------------------------------------------
|
|
71
|
+
# Build field_map: (source_qualified_class, field_name) → interface_type
|
|
72
|
+
# from TEMPORAL_STUB edges whose target is a known Temporal interface
|
|
73
|
+
# -----------------------------------------------------------------------
|
|
74
|
+
field_map: dict[tuple[str, str], str] = {}
|
|
75
|
+
for row in conn.execute(
|
|
76
|
+
"SELECT source_qualified, target_qualified, extra FROM edges WHERE kind = 'TEMPORAL_STUB'"
|
|
77
|
+
).fetchall():
|
|
78
|
+
bare_target = row["target_qualified"]
|
|
79
|
+
if bare_target not in temporal_interfaces:
|
|
80
|
+
continue
|
|
81
|
+
try:
|
|
82
|
+
extra = json.loads(row["extra"] or "{}")
|
|
83
|
+
except (json.JSONDecodeError, TypeError):
|
|
84
|
+
extra = {}
|
|
85
|
+
fname = extra.get("field_name")
|
|
86
|
+
if not fname:
|
|
87
|
+
continue
|
|
88
|
+
field_map[(row["source_qualified"], fname)] = bare_target
|
|
89
|
+
|
|
90
|
+
if not field_map:
|
|
91
|
+
logger.info("Temporal resolver: no TEMPORAL_STUB edges found, skipping")
|
|
92
|
+
return {"files_indexed": len(java_files), "calls_resolved": 0}
|
|
93
|
+
|
|
94
|
+
# -----------------------------------------------------------------------
|
|
95
|
+
# method_to_qual: (class_name, method_name) → full qualified_name
|
|
96
|
+
# -----------------------------------------------------------------------
|
|
97
|
+
method_to_qual: dict[tuple[str, str], str] = {}
|
|
98
|
+
for row in conn.execute(
|
|
99
|
+
"SELECT name, qualified_name, parent_name FROM nodes "
|
|
100
|
+
"WHERE kind IN ('Function', 'Test') AND language = 'java' AND parent_name IS NOT NULL"
|
|
101
|
+
).fetchall():
|
|
102
|
+
method_to_qual[(row["parent_name"], row["name"])] = row["qualified_name"]
|
|
103
|
+
|
|
104
|
+
# -----------------------------------------------------------------------
|
|
105
|
+
# implementors: bare interface name → list of implementing class quals
|
|
106
|
+
# -----------------------------------------------------------------------
|
|
107
|
+
implementors: dict[str, list[str]] = {}
|
|
108
|
+
for row in conn.execute(
|
|
109
|
+
"SELECT source_qualified, target_qualified FROM edges WHERE kind = 'INHERITS'"
|
|
110
|
+
).fetchall():
|
|
111
|
+
iface = row["target_qualified"]
|
|
112
|
+
impl = row["source_qualified"]
|
|
113
|
+
if any(impl.startswith(f) for f in java_files) or "::" in impl:
|
|
114
|
+
implementors.setdefault(iface, []).append(impl)
|
|
115
|
+
|
|
116
|
+
# -----------------------------------------------------------------------
|
|
117
|
+
# Resolve CALLS edges
|
|
118
|
+
# -----------------------------------------------------------------------
|
|
119
|
+
calls_rows = conn.execute(
|
|
120
|
+
"SELECT id, source_qualified, target_qualified, extra, file_path "
|
|
121
|
+
"FROM edges WHERE kind = 'CALLS'"
|
|
122
|
+
).fetchall()
|
|
123
|
+
|
|
124
|
+
resolved = 0
|
|
125
|
+
|
|
126
|
+
for row in calls_rows:
|
|
127
|
+
if row["file_path"] not in java_files:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
try:
|
|
131
|
+
extra = json.loads(row["extra"] or "{}")
|
|
132
|
+
except (json.JSONDecodeError, TypeError):
|
|
133
|
+
extra = {}
|
|
134
|
+
|
|
135
|
+
receiver = extra.get("receiver")
|
|
136
|
+
if not receiver:
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
if extra.get("temporal_resolved") or extra.get("spring_resolved"):
|
|
140
|
+
continue
|
|
141
|
+
|
|
142
|
+
raw_target = row["target_qualified"]
|
|
143
|
+
if "::" in raw_target:
|
|
144
|
+
after = raw_target.split("::", 1)[1]
|
|
145
|
+
method_name = after.split(".")[-1] if "." in after else after
|
|
146
|
+
else:
|
|
147
|
+
method_name = raw_target
|
|
148
|
+
|
|
149
|
+
source_qual = row["source_qualified"]
|
|
150
|
+
|
|
151
|
+
# Derive enclosing class qualified name
|
|
152
|
+
enclosing_class_qual: str | None = None
|
|
153
|
+
if "::" in source_qual:
|
|
154
|
+
after_sep = source_qual.split("::", 1)[1]
|
|
155
|
+
if "." in after_sep:
|
|
156
|
+
class_part = after_sep.split(".")[0]
|
|
157
|
+
prefix = source_qual.split("::")[0]
|
|
158
|
+
enclosing_class_qual = f"{prefix}::{class_part}"
|
|
159
|
+
else:
|
|
160
|
+
enclosing_class_qual = source_qual
|
|
161
|
+
|
|
162
|
+
if not enclosing_class_qual:
|
|
163
|
+
continue
|
|
164
|
+
|
|
165
|
+
interface_bare = field_map.get((enclosing_class_qual, receiver))
|
|
166
|
+
if not interface_bare:
|
|
167
|
+
continue
|
|
168
|
+
|
|
169
|
+
interface_qual = temporal_interfaces.get(interface_bare, interface_bare)
|
|
170
|
+
|
|
171
|
+
impls = implementors.get(interface_qual, [])
|
|
172
|
+
if len(impls) == 1:
|
|
173
|
+
concrete_class = impls[0].split("::")[-1]
|
|
174
|
+
fallback = f"{impls[0]}.{method_name}"
|
|
175
|
+
new_target = method_to_qual.get((concrete_class, method_name)) or fallback
|
|
176
|
+
else:
|
|
177
|
+
fallback = f"{interface_qual}.{method_name}"
|
|
178
|
+
new_target = method_to_qual.get((interface_bare, method_name)) or fallback
|
|
179
|
+
|
|
180
|
+
extra["temporal_resolved"] = True
|
|
181
|
+
extra["temporal_interface"] = interface_bare
|
|
182
|
+
new_extra = json.dumps(extra)
|
|
183
|
+
|
|
184
|
+
conn.execute(
|
|
185
|
+
"UPDATE edges SET target_qualified = ?, extra = ? WHERE id = ?",
|
|
186
|
+
(new_target, new_extra, row["id"]),
|
|
187
|
+
)
|
|
188
|
+
resolved += 1
|
|
189
|
+
logger.debug(
|
|
190
|
+
"Temporal resolved: %s → %s (receiver=%s, interface=%s)",
|
|
191
|
+
source_qual, new_target, receiver, interface_bare,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
if resolved:
|
|
195
|
+
conn.commit()
|
|
196
|
+
|
|
197
|
+
logger.info("Temporal resolver: resolved %d CALLS edges in %d Java files",
|
|
198
|
+
resolved, len(java_files))
|
|
199
|
+
return {"files_indexed": len(java_files), "calls_resolved": resolved}
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
"""Token reduction benchmark -- measures graph query efficiency vs naive file reading."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import sqlite3
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from .graph import GraphStore
|
|
11
|
+
from .search import hybrid_search
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
# Sample questions for benchmarking
|
|
16
|
+
_SAMPLE_QUESTIONS = [
|
|
17
|
+
"how does authentication work",
|
|
18
|
+
"what is the main entry point",
|
|
19
|
+
"how are database connections managed",
|
|
20
|
+
"what error handling patterns are used",
|
|
21
|
+
"how do tests verify core functionality",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def estimate_tokens(text: str) -> int:
|
|
26
|
+
"""Rough token estimate: ~4 chars per token."""
|
|
27
|
+
return max(1, len(text) // 4)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def compute_naive_tokens(repo_root: Path) -> int:
|
|
31
|
+
"""Count tokens in all parseable source files."""
|
|
32
|
+
total = 0
|
|
33
|
+
exts = (
|
|
34
|
+
".py", ".js", ".ts", ".go", ".rs", ".java",
|
|
35
|
+
".c", ".cpp", ".rb", ".php", ".swift", ".kt",
|
|
36
|
+
)
|
|
37
|
+
for ext in exts:
|
|
38
|
+
for f in repo_root.rglob(f"*{ext}"):
|
|
39
|
+
try:
|
|
40
|
+
total += estimate_tokens(
|
|
41
|
+
f.read_text(errors="replace")
|
|
42
|
+
)
|
|
43
|
+
except OSError:
|
|
44
|
+
continue
|
|
45
|
+
return total
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def run_token_benchmark(
|
|
49
|
+
store: GraphStore,
|
|
50
|
+
repo_root: Path,
|
|
51
|
+
questions: list[str] | None = None,
|
|
52
|
+
) -> dict[str, Any]:
|
|
53
|
+
"""Run token reduction benchmark.
|
|
54
|
+
|
|
55
|
+
Compares naive full-corpus token cost vs graph query token
|
|
56
|
+
cost for a set of sample questions.
|
|
57
|
+
|
|
58
|
+
The default sample questions are natural language and require semantic
|
|
59
|
+
search to match. If no embeddings are present in the graph, ``hybrid_search``
|
|
60
|
+
falls back to FTS5/LIKE matching on node names, which produces no hits for
|
|
61
|
+
questions like "how does authentication work" — every per-question ratio
|
|
62
|
+
becomes 0 and the benchmark silently appears to fail. We log a clear
|
|
63
|
+
warning when that is the case so callers know to run ``embed_graph`` first
|
|
64
|
+
(or to pass keyword-matching questions).
|
|
65
|
+
"""
|
|
66
|
+
if questions is None:
|
|
67
|
+
questions = _SAMPLE_QUESTIONS
|
|
68
|
+
|
|
69
|
+
using_default_questions = questions is _SAMPLE_QUESTIONS
|
|
70
|
+
try:
|
|
71
|
+
cur = store._conn.execute("SELECT count(*) FROM embeddings")
|
|
72
|
+
embedding_count = cur.fetchone()[0]
|
|
73
|
+
except sqlite3.OperationalError:
|
|
74
|
+
embedding_count = 0
|
|
75
|
+
if embedding_count == 0 and using_default_questions:
|
|
76
|
+
logger.warning(
|
|
77
|
+
"No embeddings found in this graph. The default sample questions "
|
|
78
|
+
"are natural language and will not match via FTS5/LIKE alone — "
|
|
79
|
+
"every reduction ratio is likely to be 0. Run "
|
|
80
|
+
"`code-review-graph embed` first, or pass keyword-matching `questions=`."
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
naive_total = compute_naive_tokens(repo_root)
|
|
84
|
+
|
|
85
|
+
results = []
|
|
86
|
+
for q in questions:
|
|
87
|
+
search_results = hybrid_search(store, q, limit=5)
|
|
88
|
+
# Simulate graph context: search results + neighbors
|
|
89
|
+
graph_tokens = 0
|
|
90
|
+
for r in search_results:
|
|
91
|
+
graph_tokens += estimate_tokens(str(r))
|
|
92
|
+
# Add approximate neighbor context
|
|
93
|
+
qn = r.get("qualified_name", "")
|
|
94
|
+
edges = store.get_edges_by_source(qn)[:5]
|
|
95
|
+
for e in edges:
|
|
96
|
+
graph_tokens += estimate_tokens(str(e))
|
|
97
|
+
|
|
98
|
+
if graph_tokens > 0:
|
|
99
|
+
ratio = naive_total / graph_tokens
|
|
100
|
+
else:
|
|
101
|
+
ratio = 0
|
|
102
|
+
results.append({
|
|
103
|
+
"question": q,
|
|
104
|
+
"naive_tokens": naive_total,
|
|
105
|
+
"graph_tokens": graph_tokens,
|
|
106
|
+
"reduction_ratio": round(ratio, 1),
|
|
107
|
+
})
|
|
108
|
+
|
|
109
|
+
if results:
|
|
110
|
+
total = sum(
|
|
111
|
+
r["reduction_ratio"] for r in results # type: ignore[misc]
|
|
112
|
+
)
|
|
113
|
+
avg_ratio = float(total) / len(results) # type: ignore[arg-type]
|
|
114
|
+
else:
|
|
115
|
+
avg_ratio = 0.0
|
|
116
|
+
|
|
117
|
+
return {
|
|
118
|
+
"naive_corpus_tokens": naive_total,
|
|
119
|
+
"per_question": results,
|
|
120
|
+
"average_reduction_ratio": round(avg_ratio, 1),
|
|
121
|
+
"summary": (
|
|
122
|
+
f"Graph queries use ~{avg_ratio:.0f}x fewer tokens "
|
|
123
|
+
f"than reading all source files"
|
|
124
|
+
),
|
|
125
|
+
}
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""MCP tool definitions for the Code Review Graph server.
|
|
2
|
+
|
|
3
|
+
Exposes 27 tools:
|
|
4
|
+
1. build_or_update_graph - full or incremental build
|
|
5
|
+
2. get_impact_radius - blast radius from changed files
|
|
6
|
+
3. query_graph - predefined graph queries
|
|
7
|
+
4. get_review_context - focused subgraph + review prompt
|
|
8
|
+
5. semantic_search_nodes - keyword + vector search across nodes
|
|
9
|
+
6. list_graph_stats - aggregate statistics
|
|
10
|
+
7. embed_graph - compute vector embeddings for semantic search
|
|
11
|
+
8. get_docs_section - token-optimized documentation retrieval
|
|
12
|
+
9. find_large_functions - find oversized functions/classes by line count
|
|
13
|
+
10. list_flows - list execution flows sorted by criticality
|
|
14
|
+
11. get_flow - get details of a single execution flow
|
|
15
|
+
12. get_affected_flows - find flows affected by changed files
|
|
16
|
+
13. list_communities - list detected code communities
|
|
17
|
+
14. get_community - get details of a single community
|
|
18
|
+
15. get_architecture_overview - architecture overview from community structure
|
|
19
|
+
16. detect_changes - risk-scored change impact analysis for code review
|
|
20
|
+
17. refactor_tool - unified refactoring (rename preview, dead code, suggestions)
|
|
21
|
+
18. apply_refactor_tool - apply a previously previewed refactoring
|
|
22
|
+
19. generate_wiki - generate markdown wiki from community structure
|
|
23
|
+
20. get_wiki_page - retrieve a specific wiki page
|
|
24
|
+
21. list_repos - list registered repositories
|
|
25
|
+
22. cross_repo_search - search across all registered repositories
|
|
26
|
+
23. get_hub_nodes - find most connected nodes (architectural hotspots)
|
|
27
|
+
24. get_bridge_nodes - find architectural chokepoints (betweenness centrality)
|
|
28
|
+
25. get_knowledge_gaps - identify structural weaknesses
|
|
29
|
+
26. get_surprising_connections - find unexpected architectural coupling
|
|
30
|
+
27. get_suggested_questions - auto-generated review questions from graph analysis
|
|
31
|
+
28. traverse_graph - BFS/DFS traversal from best-matching node
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
# Re-export names that external code may patch via "code_review_graph.tools.*"
|
|
37
|
+
from ..changes import parse_diff_ranges as parse_diff_ranges
|
|
38
|
+
from ..changes import parse_git_diff_ranges as parse_git_diff_ranges
|
|
39
|
+
from ..changes import parse_svn_diff_ranges as parse_svn_diff_ranges
|
|
40
|
+
from ..incremental import (
|
|
41
|
+
get_changed_files as get_changed_files,
|
|
42
|
+
)
|
|
43
|
+
from ..incremental import (
|
|
44
|
+
get_staged_and_unstaged as get_staged_and_unstaged,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
# -- _common ----------------------------------------------------------------
|
|
48
|
+
from ._common import (
|
|
49
|
+
_BUILTIN_CALL_NAMES,
|
|
50
|
+
_get_store,
|
|
51
|
+
_validate_repo_root,
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
# -- analysis_tools ---------------------------------------------------------
|
|
55
|
+
from .analysis_tools import (
|
|
56
|
+
get_bridge_nodes_func,
|
|
57
|
+
get_hub_nodes_func,
|
|
58
|
+
get_knowledge_gaps_func,
|
|
59
|
+
get_suggested_questions_func,
|
|
60
|
+
get_surprising_connections_func,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# -- build ------------------------------------------------------------------
|
|
64
|
+
from .build import build_or_update_graph, run_postprocess
|
|
65
|
+
|
|
66
|
+
# -- community_tools --------------------------------------------------------
|
|
67
|
+
from .community_tools import (
|
|
68
|
+
get_architecture_overview_func,
|
|
69
|
+
get_community_func,
|
|
70
|
+
list_communities_func,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
# -- context ----------------------------------------------------------------
|
|
74
|
+
from .context import get_minimal_context
|
|
75
|
+
|
|
76
|
+
# -- docs -------------------------------------------------------------------
|
|
77
|
+
from .docs import embed_graph, generate_wiki_func, get_docs_section, get_wiki_page_func
|
|
78
|
+
|
|
79
|
+
# -- flows_tools ------------------------------------------------------------
|
|
80
|
+
from .flows_tools import get_flow, list_flows
|
|
81
|
+
|
|
82
|
+
# -- query ------------------------------------------------------------------
|
|
83
|
+
from .query import (
|
|
84
|
+
find_large_functions,
|
|
85
|
+
get_impact_radius,
|
|
86
|
+
list_graph_stats,
|
|
87
|
+
query_graph,
|
|
88
|
+
semantic_search_nodes,
|
|
89
|
+
traverse_graph_func,
|
|
90
|
+
)
|
|
91
|
+
|
|
92
|
+
# -- refactor_tools ---------------------------------------------------------
|
|
93
|
+
from .refactor_tools import apply_refactor_func, refactor_func
|
|
94
|
+
|
|
95
|
+
# -- registry_tools ---------------------------------------------------------
|
|
96
|
+
from .registry_tools import cross_repo_search_func, list_repos_func
|
|
97
|
+
|
|
98
|
+
# -- review -----------------------------------------------------------------
|
|
99
|
+
from .review import (
|
|
100
|
+
detect_changes_func,
|
|
101
|
+
get_affected_flows_func,
|
|
102
|
+
get_review_context,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
__all__ = [
|
|
106
|
+
# _common
|
|
107
|
+
"_BUILTIN_CALL_NAMES",
|
|
108
|
+
"_get_store",
|
|
109
|
+
"_validate_repo_root",
|
|
110
|
+
# build
|
|
111
|
+
"build_or_update_graph",
|
|
112
|
+
"run_postprocess",
|
|
113
|
+
# context
|
|
114
|
+
"get_minimal_context",
|
|
115
|
+
# community_tools
|
|
116
|
+
"get_architecture_overview_func",
|
|
117
|
+
"get_community_func",
|
|
118
|
+
"list_communities_func",
|
|
119
|
+
# docs
|
|
120
|
+
"embed_graph",
|
|
121
|
+
"generate_wiki_func",
|
|
122
|
+
"get_docs_section",
|
|
123
|
+
"get_wiki_page_func",
|
|
124
|
+
# flows_tools
|
|
125
|
+
"get_flow",
|
|
126
|
+
"list_flows",
|
|
127
|
+
# query
|
|
128
|
+
"find_large_functions",
|
|
129
|
+
"get_impact_radius",
|
|
130
|
+
"list_graph_stats",
|
|
131
|
+
"query_graph",
|
|
132
|
+
"semantic_search_nodes",
|
|
133
|
+
"traverse_graph_func",
|
|
134
|
+
# refactor_tools
|
|
135
|
+
"apply_refactor_func",
|
|
136
|
+
"refactor_func",
|
|
137
|
+
# registry_tools
|
|
138
|
+
"cross_repo_search_func",
|
|
139
|
+
"list_repos_func",
|
|
140
|
+
# review
|
|
141
|
+
"detect_changes_func",
|
|
142
|
+
"get_affected_flows_func",
|
|
143
|
+
"get_review_context",
|
|
144
|
+
# analysis_tools
|
|
145
|
+
"get_bridge_nodes_func",
|
|
146
|
+
"get_hub_nodes_func",
|
|
147
|
+
"get_knowledge_gaps_func",
|
|
148
|
+
"get_suggested_questions_func",
|
|
149
|
+
"get_surprising_connections_func",
|
|
150
|
+
# re-exported for backward compat (used in test patches)
|
|
151
|
+
"get_changed_files",
|
|
152
|
+
"get_staged_and_unstaged",
|
|
153
|
+
"parse_git_diff_ranges",
|
|
154
|
+
"parse_svn_diff_ranges",
|
|
155
|
+
"parse_diff_ranges",
|
|
156
|
+
]
|