codespine 0.5.0__tar.gz → 0.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.0 → codespine-0.5.1}/PKG-INFO +1 -1
- {codespine-0.5.0 → codespine-0.5.1}/codespine/__init__.py +1 -1
- codespine-0.5.1/codespine/analysis/crossmodule.py +173 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/deadcode.py +113 -53
- {codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/server.py +87 -13
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.5.0 → codespine-0.5.1}/pyproject.toml +1 -1
- codespine-0.5.0/codespine/analysis/crossmodule.py +0 -230
- {codespine-0.5.0 → codespine-0.5.1}/LICENSE +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/README.md +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/community.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/context.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/coupling.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/flow.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/cli.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/config.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/db/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/db/schema.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/db/store.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/bm25.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/rrf.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/search/vector.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/gindex.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/setup.cfg +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_java_parser.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.0 → codespine-0.5.1}/tests/test_search_ranking.py +0 -0
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
"""Cross-module call edge linker.
|
|
2
|
+
|
|
3
|
+
After all modules in a workspace have been individually indexed, each module's
|
|
4
|
+
call resolver only sees methods within that module. This module fills the gap
|
|
5
|
+
by scanning the graph for cross-project class references (REFERENCES_TYPE and
|
|
6
|
+
IMPLEMENTS edges) and creating CALLS edges between methods where the call is
|
|
7
|
+
plausible.
|
|
8
|
+
|
|
9
|
+
Strategy A — Name + arity match (confidence 0.7)
|
|
10
|
+
If src_class references dst_class (cross-project) and both have a method
|
|
11
|
+
with the same name and same parameter count, create a CALLS edge. This
|
|
12
|
+
catches delegation, interface-implementation forwarding, and adapter
|
|
13
|
+
patterns.
|
|
14
|
+
|
|
15
|
+
Strategy B — Type-reference fallback (confidence 0.4)
|
|
16
|
+
For each *public* method in dst_class that received NO name-match edge,
|
|
17
|
+
create ONE low-confidence edge from a representative src method (preferring
|
|
18
|
+
one with zero outgoing calls). This prevents methods that are genuinely
|
|
19
|
+
used cross-module from appearing as dead code.
|
|
20
|
+
"""
|
|
21
|
+
from __future__ import annotations
|
|
22
|
+
|
|
23
|
+
import logging
|
|
24
|
+
from collections import defaultdict
|
|
25
|
+
|
|
26
|
+
LOGGER = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _param_count(sig: str) -> int:
|
|
30
|
+
"""Count parameters from a method signature string."""
|
|
31
|
+
if not sig or "(" not in sig or ")" not in sig:
|
|
32
|
+
return 0
|
|
33
|
+
arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
|
|
34
|
+
return 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
38
|
+
"""Create CALLS edges between methods in different projects.
|
|
39
|
+
|
|
40
|
+
Returns the number of new cross-module call edges created.
|
|
41
|
+
"""
|
|
42
|
+
if project_ids is None:
|
|
43
|
+
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
44
|
+
project_ids = [r["id"] for r in proj_recs]
|
|
45
|
+
|
|
46
|
+
if len(project_ids) < 2:
|
|
47
|
+
LOGGER.info(
|
|
48
|
+
"Only %d project(s) indexed — skipping cross-module linking.",
|
|
49
|
+
len(project_ids),
|
|
50
|
+
)
|
|
51
|
+
return 0
|
|
52
|
+
|
|
53
|
+
# ── 1. Collect cross-project class pairs ──────────────────────────
|
|
54
|
+
ref_pairs = store.query_records(
|
|
55
|
+
"""
|
|
56
|
+
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
|
|
57
|
+
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
58
|
+
AND sf.project_id <> df.project_id
|
|
59
|
+
RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
|
|
60
|
+
"""
|
|
61
|
+
)
|
|
62
|
+
impl_pairs = store.query_records(
|
|
63
|
+
"""
|
|
64
|
+
MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
|
|
65
|
+
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
66
|
+
AND sf.project_id <> df.project_id
|
|
67
|
+
RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
|
|
68
|
+
"""
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
all_pairs: set[tuple[str, str]] = set()
|
|
72
|
+
for p in ref_pairs:
|
|
73
|
+
all_pairs.add((p["src_cid"], p["dst_cid"]))
|
|
74
|
+
for p in impl_pairs:
|
|
75
|
+
all_pairs.add((p["src_cid"], p["dst_cid"]))
|
|
76
|
+
|
|
77
|
+
if not all_pairs:
|
|
78
|
+
LOGGER.info("No cross-project class references found.")
|
|
79
|
+
return 0
|
|
80
|
+
|
|
81
|
+
LOGGER.info(
|
|
82
|
+
"Cross-module: %d cross-project class pair(s) to process.",
|
|
83
|
+
len(all_pairs),
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# ── 2. Process each class pair ────────────────────────────────────
|
|
87
|
+
new_edges = 0
|
|
88
|
+
seen: set[tuple[str, str]] = set()
|
|
89
|
+
|
|
90
|
+
for src_cid, dst_cid in all_pairs:
|
|
91
|
+
src_methods = store.query_records(
|
|
92
|
+
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
93
|
+
RETURN m.id as mid, m.name as name, m.signature as sig""",
|
|
94
|
+
{"cid": src_cid},
|
|
95
|
+
)
|
|
96
|
+
dst_methods = store.query_records(
|
|
97
|
+
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
98
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
99
|
+
m.modifiers as modifiers, m.is_constructor as is_ctor""",
|
|
100
|
+
{"cid": dst_cid},
|
|
101
|
+
)
|
|
102
|
+
if not src_methods or not dst_methods:
|
|
103
|
+
continue
|
|
104
|
+
|
|
105
|
+
# Build name → methods index for src class
|
|
106
|
+
src_by_name: dict[str, list[dict]] = defaultdict(list)
|
|
107
|
+
for sm in src_methods:
|
|
108
|
+
src_by_name[sm["name"]].append(sm)
|
|
109
|
+
|
|
110
|
+
# ── Strategy A: name + arity matching ─────────────────────────
|
|
111
|
+
matched_dst_mids: set[str] = set()
|
|
112
|
+
|
|
113
|
+
for dm in dst_methods:
|
|
114
|
+
dm_name = dm["name"]
|
|
115
|
+
dm_pc = _param_count(dm.get("sig") or "")
|
|
116
|
+
candidates = src_by_name.get(dm_name, [])
|
|
117
|
+
for sm in candidates:
|
|
118
|
+
sm_pc = _param_count(sm.get("sig") or "")
|
|
119
|
+
if sm_pc == dm_pc:
|
|
120
|
+
pair = (sm["mid"], dm["mid"])
|
|
121
|
+
if pair in seen:
|
|
122
|
+
matched_dst_mids.add(dm["mid"])
|
|
123
|
+
continue
|
|
124
|
+
seen.add(pair)
|
|
125
|
+
try:
|
|
126
|
+
store.add_call(
|
|
127
|
+
sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
|
|
128
|
+
)
|
|
129
|
+
new_edges += 1
|
|
130
|
+
matched_dst_mids.add(dm["mid"])
|
|
131
|
+
except Exception as exc:
|
|
132
|
+
LOGGER.debug("Name-match edge failed: %s", exc)
|
|
133
|
+
|
|
134
|
+
# ── Strategy B: fallback for unmatched public dst methods ─────
|
|
135
|
+
# Find a representative caller: prefer src methods with 0 outgoing calls
|
|
136
|
+
fallback_src = None
|
|
137
|
+
for sm in src_methods:
|
|
138
|
+
out = store.query_records(
|
|
139
|
+
"MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
|
|
140
|
+
{"mid": sm["mid"]},
|
|
141
|
+
)
|
|
142
|
+
if out and out[0]["n"] == 0:
|
|
143
|
+
fallback_src = sm
|
|
144
|
+
break
|
|
145
|
+
if fallback_src is None and src_methods:
|
|
146
|
+
fallback_src = src_methods[0]
|
|
147
|
+
|
|
148
|
+
if fallback_src:
|
|
149
|
+
for dm in dst_methods:
|
|
150
|
+
if dm["mid"] in matched_dst_mids:
|
|
151
|
+
continue
|
|
152
|
+
# Skip constructors and private methods
|
|
153
|
+
if dm.get("is_ctor"):
|
|
154
|
+
continue
|
|
155
|
+
mods = dm.get("modifiers") or []
|
|
156
|
+
mod_strs = {str(m).strip() for m in mods} if mods else set()
|
|
157
|
+
if "private" in mod_strs:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
pair = (fallback_src["mid"], dm["mid"])
|
|
161
|
+
if pair in seen:
|
|
162
|
+
continue
|
|
163
|
+
seen.add(pair)
|
|
164
|
+
try:
|
|
165
|
+
store.add_call(
|
|
166
|
+
fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
|
|
167
|
+
)
|
|
168
|
+
new_edges += 1
|
|
169
|
+
except Exception as exc:
|
|
170
|
+
LOGGER.debug("Fallback edge failed: %s", exc)
|
|
171
|
+
|
|
172
|
+
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
173
|
+
return new_edges
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
from collections import defaultdict
|
|
4
|
+
|
|
5
|
+
# ── Annotation sets ──────────────────────────────────────────────────
|
|
6
|
+
# Entry-point annotations — exempt even in strict mode. These represent
|
|
7
|
+
# actual runtime entry points that the framework calls reflectively.
|
|
8
|
+
ENTRY_POINT_ANNOTATIONS = {
|
|
6
9
|
# JUnit / testing
|
|
7
10
|
"Test",
|
|
8
11
|
"ParameterizedTest",
|
|
@@ -10,21 +13,6 @@ EXEMPT_ANNOTATIONS = {
|
|
|
10
13
|
"AfterEach",
|
|
11
14
|
"BeforeAll",
|
|
12
15
|
"AfterAll",
|
|
13
|
-
# Spring – component model (class-level; methods inside are never "dead")
|
|
14
|
-
"Component",
|
|
15
|
-
"Service",
|
|
16
|
-
"Repository",
|
|
17
|
-
"Controller",
|
|
18
|
-
"RestController",
|
|
19
|
-
"Configuration",
|
|
20
|
-
"Bean",
|
|
21
|
-
"Aspect",
|
|
22
|
-
# Spring – lifecycle / event hooks
|
|
23
|
-
"PostConstruct",
|
|
24
|
-
"PreDestroy",
|
|
25
|
-
"EventListener",
|
|
26
|
-
"TransactionalEventListener",
|
|
27
|
-
"Scheduled",
|
|
28
16
|
# Spring – web entry points
|
|
29
17
|
"RequestMapping",
|
|
30
18
|
"GetMapping",
|
|
@@ -33,12 +21,35 @@ EXEMPT_ANNOTATIONS = {
|
|
|
33
21
|
"DeleteMapping",
|
|
34
22
|
"PatchMapping",
|
|
35
23
|
"MessageMapping",
|
|
36
|
-
# Spring – messaging / async
|
|
24
|
+
# Spring – messaging / async entry points
|
|
37
25
|
"KafkaListener",
|
|
38
26
|
"RabbitListener",
|
|
39
27
|
"JmsListener",
|
|
40
28
|
"SqsListener",
|
|
41
29
|
"StreamListener",
|
|
30
|
+
# Spring – lifecycle / event hooks
|
|
31
|
+
"PostConstruct",
|
|
32
|
+
"PreDestroy",
|
|
33
|
+
"EventListener",
|
|
34
|
+
"TransactionalEventListener",
|
|
35
|
+
"Scheduled",
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
# Broad annotations — exempt only in normal mode. These indicate the
|
|
39
|
+
# method is *likely* used via DI / serialisation / reflection, but in a
|
|
40
|
+
# strict audit the user may want to verify that manually.
|
|
41
|
+
BROAD_ANNOTATIONS = {
|
|
42
|
+
# Java standard
|
|
43
|
+
"Override",
|
|
44
|
+
# Spring – component model (class-level; methods inside are never "dead")
|
|
45
|
+
"Component",
|
|
46
|
+
"Service",
|
|
47
|
+
"Repository",
|
|
48
|
+
"Controller",
|
|
49
|
+
"RestController",
|
|
50
|
+
"Configuration",
|
|
51
|
+
"Bean",
|
|
52
|
+
"Aspect",
|
|
42
53
|
# Spring Data / persistence
|
|
43
54
|
"Query",
|
|
44
55
|
"Modifying",
|
|
@@ -48,18 +59,21 @@ EXEMPT_ANNOTATIONS = {
|
|
|
48
59
|
"Singleton",
|
|
49
60
|
"Named",
|
|
50
61
|
"Qualifier",
|
|
51
|
-
# Jakarta / javax DI
|
|
62
|
+
# Jakarta / javax DI
|
|
52
63
|
"ApplicationScoped",
|
|
53
64
|
"RequestScoped",
|
|
54
65
|
"SessionScoped",
|
|
55
66
|
"Dependent",
|
|
56
|
-
# Jackson / serialization
|
|
67
|
+
# Jackson / serialization
|
|
57
68
|
"JsonCreator",
|
|
58
69
|
"JsonProperty",
|
|
59
70
|
"JsonDeserialize",
|
|
60
71
|
"JsonSerialize",
|
|
61
72
|
}
|
|
62
73
|
|
|
74
|
+
# Full set used in normal mode
|
|
75
|
+
EXEMPT_ANNOTATIONS = ENTRY_POINT_ANNOTATIONS | BROAD_ANNOTATIONS
|
|
76
|
+
|
|
63
77
|
EXEMPT_CONTRACT_METHODS = {
|
|
64
78
|
"toString",
|
|
65
79
|
"hashCode",
|
|
@@ -74,6 +88,15 @@ def _modifier_tokens(modifiers) -> set[str]:
|
|
|
74
88
|
return {str(m).strip() for m in modifiers}
|
|
75
89
|
|
|
76
90
|
|
|
91
|
+
def _matched_annotation(mods: set[str], annotation_set: set[str]) -> str | None:
|
|
92
|
+
"""Return the first annotation in *mods* that appears in *annotation_set*, or None."""
|
|
93
|
+
for m in mods:
|
|
94
|
+
bare = m.lstrip("@")
|
|
95
|
+
if bare in annotation_set:
|
|
96
|
+
return bare
|
|
97
|
+
return None
|
|
98
|
+
|
|
99
|
+
|
|
77
100
|
def _assign_confidence(candidate: dict, strict: bool) -> str:
|
|
78
101
|
"""Assign a confidence level (high / medium / low) to each dead method.
|
|
79
102
|
|
|
@@ -101,16 +124,17 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
|
|
|
101
124
|
limit – Max results to return.
|
|
102
125
|
project – Scope to a single module.
|
|
103
126
|
strict – When True, only exempt main()/@Test methods and explicit
|
|
104
|
-
entry-point annotations
|
|
105
|
-
contract-method,
|
|
127
|
+
entry-point annotations (RequestMapping, KafkaListener, etc.).
|
|
128
|
+
Skips the broad bean-getter/setter, contract-method,
|
|
129
|
+
constructor, Override, and DI annotation exemptions.
|
|
106
130
|
|
|
107
131
|
Returns a list of dead method dicts, each with:
|
|
108
132
|
method_id, name, signature, class_fqcn, file_path, reason, confidence.
|
|
109
133
|
|
|
110
134
|
The return value is augmented with a ``_stats`` entry (a sentinel dict
|
|
111
|
-
with key ``_stats``) containing pre/post-exemption counts
|
|
112
|
-
|
|
113
|
-
|
|
135
|
+
with key ``_stats``) containing pre/post-exemption counts, a breakdown
|
|
136
|
+
of exemption reasons, and a sample of exempted methods so callers can
|
|
137
|
+
validate that the exemption logic is working correctly.
|
|
114
138
|
"""
|
|
115
139
|
if project:
|
|
116
140
|
candidates = store.query_records(
|
|
@@ -153,43 +177,56 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
|
|
|
153
177
|
return []
|
|
154
178
|
|
|
155
179
|
n_candidates = len(candidates)
|
|
156
|
-
exempt: set[str] = set()
|
|
157
180
|
|
|
158
|
-
#
|
|
181
|
+
# Track exemptions as {method_id: reason} instead of a plain set
|
|
182
|
+
exempt: dict[str, str] = {}
|
|
183
|
+
|
|
184
|
+
# Choose annotation set based on mode
|
|
185
|
+
annotations_to_check = ENTRY_POINT_ANNOTATIONS if strict else EXEMPT_ANNOTATIONS
|
|
186
|
+
|
|
187
|
+
# ── Exemption passes ──────────────────────────────────────────────
|
|
159
188
|
for c in candidates:
|
|
189
|
+
mid = c["method_id"]
|
|
190
|
+
if mid in exempt:
|
|
191
|
+
continue
|
|
160
192
|
sig = (c.get("signature") or "").lower()
|
|
161
193
|
name = c.get("name") or ""
|
|
162
194
|
mods = _modifier_tokens(c.get("modifiers"))
|
|
163
195
|
|
|
164
196
|
# Always exempt test methods and main()
|
|
165
197
|
if c.get("is_test"):
|
|
166
|
-
exempt
|
|
198
|
+
exempt[mid] = "test_method"
|
|
199
|
+
continue
|
|
167
200
|
if name == "main" and "string[]" in sig:
|
|
168
|
-
exempt
|
|
201
|
+
exempt[mid] = "main_method"
|
|
202
|
+
continue
|
|
169
203
|
|
|
170
|
-
#
|
|
171
|
-
|
|
172
|
-
|
|
204
|
+
# Exempt methods with entry-point (strict) or all framework (normal) annotations
|
|
205
|
+
matched = _matched_annotation(mods, annotations_to_check)
|
|
206
|
+
if matched:
|
|
207
|
+
exempt[mid] = f"annotation:{matched}"
|
|
208
|
+
continue
|
|
173
209
|
|
|
174
|
-
# Broad exemptions (only in normal mode
|
|
210
|
+
# ── Broad exemptions (only in normal mode) ────────────────────
|
|
175
211
|
if not strict:
|
|
176
212
|
if c.get("is_constructor"):
|
|
177
|
-
exempt
|
|
213
|
+
exempt[mid] = "constructor"
|
|
214
|
+
continue
|
|
178
215
|
if name in EXEMPT_CONTRACT_METHODS:
|
|
179
|
-
exempt
|
|
216
|
+
exempt[mid] = f"contract_method:{name}"
|
|
217
|
+
continue
|
|
180
218
|
# Java bean-ish APIs often rely on reflection/serialization.
|
|
181
|
-
if "public" in mods and (
|
|
182
|
-
|
|
219
|
+
if "public" in mods and (
|
|
220
|
+
name.startswith("get") or name.startswith("set") or name.startswith("is")
|
|
221
|
+
):
|
|
222
|
+
exempt[mid] = "bean_accessor"
|
|
223
|
+
continue
|
|
183
224
|
# Reflection-style hooks
|
|
184
225
|
if name in {"valueOf", "fromString", "builder"}:
|
|
185
|
-
exempt
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
#
|
|
189
|
-
# NOTE: we intentionally do NOT use the class-level IMPLEMENTS relation here
|
|
190
|
-
# because that would exempt ALL methods of every class that implements ANY
|
|
191
|
-
# interface — in a typical Spring project that wipes out almost everything
|
|
192
|
-
# and produces 0 dead code results.
|
|
226
|
+
exempt[mid] = f"reflection_hook:{name}"
|
|
227
|
+
continue
|
|
228
|
+
|
|
229
|
+
# Exempt methods that DIRECTLY override another method.
|
|
193
230
|
# In strict mode, overrides are NOT exempted — if nobody calls the method,
|
|
194
231
|
# it's flagged regardless of whether it overrides a parent.
|
|
195
232
|
if not strict:
|
|
@@ -199,8 +236,12 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
|
|
|
199
236
|
RETURN DISTINCT m.id as method_id
|
|
200
237
|
"""
|
|
201
238
|
)
|
|
202
|
-
|
|
239
|
+
for r in override_methods:
|
|
240
|
+
mid = r["method_id"]
|
|
241
|
+
if mid not in exempt:
|
|
242
|
+
exempt[mid] = "method_override"
|
|
203
243
|
|
|
244
|
+
# ── Build dead list ───────────────────────────────────────────────
|
|
204
245
|
dead = []
|
|
205
246
|
for c in candidates:
|
|
206
247
|
if c["method_id"] in exempt:
|
|
@@ -219,14 +260,31 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
|
|
|
219
260
|
|
|
220
261
|
result = dead[:limit]
|
|
221
262
|
|
|
222
|
-
#
|
|
223
|
-
|
|
224
|
-
|
|
263
|
+
# ── Stats with exemption breakdown ────────────────────────────────
|
|
264
|
+
reason_counts: dict[str, int] = defaultdict(int)
|
|
265
|
+
for reason in exempt.values():
|
|
266
|
+
# Group annotation reasons by prefix for readability
|
|
267
|
+
key = reason.split(":")[0] if ":" in reason else reason
|
|
268
|
+
reason_counts[key] += 1
|
|
269
|
+
|
|
270
|
+
# Sample of exempted methods (up to 10) for user inspection
|
|
271
|
+
exempted_sample = []
|
|
272
|
+
for mid, reason in list(exempt.items())[:10]:
|
|
273
|
+
candidate = next((c for c in candidates if c["method_id"] == mid), None)
|
|
274
|
+
if candidate:
|
|
275
|
+
exempted_sample.append({
|
|
276
|
+
"name": candidate.get("name"),
|
|
277
|
+
"signature": candidate.get("signature"),
|
|
278
|
+
"class_fqcn": candidate.get("class_fqcn"),
|
|
279
|
+
"exemption_reason": reason,
|
|
280
|
+
})
|
|
281
|
+
|
|
225
282
|
if strict:
|
|
226
283
|
exemption_note = (
|
|
227
|
-
"STRICT MODE: Only test methods, main(), and
|
|
228
|
-
"annotations
|
|
229
|
-
"
|
|
284
|
+
"STRICT MODE: Only test methods, main(), and entry-point "
|
|
285
|
+
"annotations (RequestMapping, KafkaListener, Scheduled, etc.) "
|
|
286
|
+
"are exempted. Constructors, getters/setters, @Override, DI "
|
|
287
|
+
"annotations, and contract methods are NOT exempt."
|
|
230
288
|
)
|
|
231
289
|
else:
|
|
232
290
|
exemption_note = (
|
|
@@ -242,6 +300,8 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
|
|
|
242
300
|
"dead_returned": len(result),
|
|
243
301
|
"mode": "strict" if strict else "normal",
|
|
244
302
|
"note": exemption_note,
|
|
303
|
+
"exemptions_breakdown": dict(reason_counts),
|
|
304
|
+
"exempted_sample": exempted_sample,
|
|
245
305
|
}
|
|
246
306
|
})
|
|
247
307
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import json as _json_mod
|
|
3
4
|
import subprocess
|
|
4
5
|
import sys
|
|
5
6
|
import tempfile
|
|
@@ -18,6 +19,16 @@ from codespine.diff.branch_diff import compare_branches as compare_branches_anal
|
|
|
18
19
|
from codespine.search.hybrid import hybrid_search
|
|
19
20
|
|
|
20
21
|
|
|
22
|
+
def _json(data: dict) -> str:
|
|
23
|
+
"""Serialize response dict to a JSON string.
|
|
24
|
+
|
|
25
|
+
FastMCP double-serialises dict return values on many transports (SSE,
|
|
26
|
+
stdio) producing duplicate JSON payloads that waste ~50 K tokens/session.
|
|
27
|
+
Returning a pre-serialised string guarantees a single TextContent block.
|
|
28
|
+
"""
|
|
29
|
+
return _json_mod.dumps(data, separators=(",", ":"))
|
|
30
|
+
|
|
31
|
+
|
|
21
32
|
def _git_available(path: str) -> bool:
|
|
22
33
|
"""Return True if path is inside a git repository."""
|
|
23
34
|
try:
|
|
@@ -44,14 +55,27 @@ def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
|
|
|
44
55
|
return repo_path_provider()
|
|
45
56
|
|
|
46
57
|
|
|
47
|
-
def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") ->
|
|
48
|
-
return {"available": False, "note": note}
|
|
58
|
+
def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") -> str:
|
|
59
|
+
return _json({"available": False, "note": note})
|
|
49
60
|
|
|
50
61
|
|
|
51
|
-
def
|
|
52
|
-
"""
|
|
62
|
+
def _parse_indexed_at(raw) -> int:
|
|
63
|
+
"""Robustly parse an indexed_at value that may be str, int, float, or None."""
|
|
64
|
+
if raw is None:
|
|
65
|
+
return 0
|
|
66
|
+
try:
|
|
67
|
+
val = int(float(str(raw)))
|
|
68
|
+
# Sanity check: must look like a Unix timestamp (> year 2000)
|
|
69
|
+
return val if val > 946684800 else 0
|
|
70
|
+
except (ValueError, TypeError):
|
|
71
|
+
return 0
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def _staleness_meta(store, response: dict, project: str | None = None) -> str:
|
|
75
|
+
"""Inject index staleness metadata into every tool response and serialise.
|
|
53
76
|
|
|
54
77
|
Adds ``index_age_seconds`` and ``stale_warning`` when the index is old.
|
|
78
|
+
Returns a JSON string (not a dict) to avoid FastMCP double-serialisation.
|
|
55
79
|
"""
|
|
56
80
|
try:
|
|
57
81
|
if project:
|
|
@@ -64,10 +88,11 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
|
|
|
64
88
|
"MATCH (p:Project) RETURN p.indexed_at as ts ORDER BY p.indexed_at ASC LIMIT 1"
|
|
65
89
|
)
|
|
66
90
|
if recs:
|
|
67
|
-
ts =
|
|
91
|
+
ts = _parse_indexed_at(recs[0].get("ts"))
|
|
68
92
|
if ts:
|
|
69
93
|
age = int(time.time()) - ts
|
|
70
94
|
response["index_age_seconds"] = age
|
|
95
|
+
response["indexed_at_epoch"] = ts
|
|
71
96
|
if age > 3600:
|
|
72
97
|
response["stale_warning"] = (
|
|
73
98
|
f"Index is {age // 3600}h {(age % 3600) // 60}m old. "
|
|
@@ -75,11 +100,40 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
|
|
|
75
100
|
)
|
|
76
101
|
except Exception:
|
|
77
102
|
pass
|
|
78
|
-
return response
|
|
103
|
+
return _json(response)
|
|
79
104
|
|
|
80
105
|
|
|
81
106
|
def build_mcp_server(store, repo_path_provider):
|
|
82
|
-
|
|
107
|
+
_raw_mcp = FastMCP("codespine")
|
|
108
|
+
|
|
109
|
+
# ── Anti-duplicate-JSON wrapper ────────────────────────────────────
|
|
110
|
+
# FastMCP double-serialises dict return values on many transports,
|
|
111
|
+
# producing duplicate JSON payloads that waste ~50 K tokens/session.
|
|
112
|
+
# We intercept tool registration so every tool's dict return is
|
|
113
|
+
# pre-serialised to a JSON string (single TextContent block).
|
|
114
|
+
import functools as _functools
|
|
115
|
+
|
|
116
|
+
class _JsonMCP:
|
|
117
|
+
"""Thin proxy that wraps tool functions to return JSON strings."""
|
|
118
|
+
def __getattr__(self, name):
|
|
119
|
+
return getattr(_raw_mcp, name)
|
|
120
|
+
|
|
121
|
+
def tool(self, *args, **kwargs):
|
|
122
|
+
original_decorator = _raw_mcp.tool(*args, **kwargs)
|
|
123
|
+
def wrapper(fn):
|
|
124
|
+
@_functools.wraps(fn)
|
|
125
|
+
def json_fn(*a, **kw):
|
|
126
|
+
result = fn(*a, **kw)
|
|
127
|
+
if isinstance(result, dict):
|
|
128
|
+
return _json(result)
|
|
129
|
+
return result
|
|
130
|
+
return original_decorator(json_fn)
|
|
131
|
+
return wrapper
|
|
132
|
+
|
|
133
|
+
def run(self):
|
|
134
|
+
return _raw_mcp.run()
|
|
135
|
+
|
|
136
|
+
mcp = _JsonMCP()
|
|
83
137
|
|
|
84
138
|
# Background job state (per-server-instance, persists across tool calls)
|
|
85
139
|
_watch: dict = {"proc": None, "path": None, "started_at": None, "interval": 30}
|
|
@@ -92,7 +146,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
92
146
|
@mcp.tool()
|
|
93
147
|
def ping():
|
|
94
148
|
"""Verify the MCP server is alive. Call this first to confirm connectivity."""
|
|
95
|
-
return {"status": "ok", "version": __version__}
|
|
149
|
+
return _json({"status": "ok", "version": __version__})
|
|
96
150
|
|
|
97
151
|
@mcp.tool()
|
|
98
152
|
def get_capabilities():
|
|
@@ -1243,21 +1297,41 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1243
1297
|
|
|
1244
1298
|
proj_path = proj_recs[0]["path"]
|
|
1245
1299
|
|
|
1246
|
-
# Run incremental index via subprocess to avoid read-only DB constraint
|
|
1300
|
+
# Run incremental index via subprocess to avoid read-only DB constraint.
|
|
1301
|
+
# Use Popen + communicate() with a timeout so that a hang never crashes
|
|
1302
|
+
# the MCP server process — the subprocess is killed gracefully instead.
|
|
1247
1303
|
cmd = [
|
|
1248
1304
|
sys.executable, "-m", "codespine.cli",
|
|
1249
1305
|
"analyse", proj_path,
|
|
1250
1306
|
"--incremental", "--no-embed", "--allow-running",
|
|
1251
1307
|
]
|
|
1252
1308
|
t0 = time.time()
|
|
1253
|
-
|
|
1254
|
-
|
|
1309
|
+
try:
|
|
1310
|
+
proc = subprocess.Popen(
|
|
1311
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
1312
|
+
)
|
|
1313
|
+
stdout, stderr = proc.communicate(timeout=30)
|
|
1314
|
+
elapsed = round(time.time() - t0, 2)
|
|
1315
|
+
except subprocess.TimeoutExpired:
|
|
1316
|
+
proc.kill()
|
|
1317
|
+
proc.communicate() # reap zombie
|
|
1318
|
+
elapsed = round(time.time() - t0, 2)
|
|
1319
|
+
return {
|
|
1320
|
+
"available": False,
|
|
1321
|
+
"note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
|
|
1322
|
+
}
|
|
1323
|
+
except Exception as exc:
|
|
1324
|
+
elapsed = round(time.time() - t0, 2)
|
|
1325
|
+
return {
|
|
1326
|
+
"available": False,
|
|
1327
|
+
"note": f"Re-index error: {exc}",
|
|
1328
|
+
}
|
|
1255
1329
|
|
|
1256
1330
|
if proc.returncode != 0:
|
|
1257
1331
|
return {
|
|
1258
1332
|
"available": False,
|
|
1259
1333
|
"note": f"Re-index failed (code {proc.returncode})",
|
|
1260
|
-
"error":
|
|
1334
|
+
"error": (stderr or stdout or "").strip()[:500],
|
|
1261
1335
|
}
|
|
1262
1336
|
|
|
1263
1337
|
return {
|
|
@@ -1278,4 +1352,4 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1278
1352
|
records = store.query_records(query)
|
|
1279
1353
|
return {"available": True, "records": records, "count": len(records)}
|
|
1280
1354
|
|
|
1281
|
-
return
|
|
1355
|
+
return _raw_mcp
|
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
"""Cross-module call edge linker.
|
|
2
|
-
|
|
3
|
-
After all modules in a workspace have been individually indexed, each module's
|
|
4
|
-
call resolver only sees methods within that module. This module fills the gap
|
|
5
|
-
by scanning the graph for unresolved outgoing calls from one module that match
|
|
6
|
-
method signatures in another module, then creating CALLS edges between them.
|
|
7
|
-
|
|
8
|
-
The algorithm:
|
|
9
|
-
1. Build a global method catalog (method_id → name, param_count, class_fqcn)
|
|
10
|
-
from the DB across ALL projects.
|
|
11
|
-
2. Build a per-project import map: for each file, record which FQCNs are
|
|
12
|
-
imported (from the class nodes + extends/implements relations).
|
|
13
|
-
3. For each method M in project A, find its outgoing calls that did NOT
|
|
14
|
-
resolve to any target. These are method invocations that tree-sitter
|
|
15
|
-
parsed but call_resolver.py could not match (because the target was in a
|
|
16
|
-
different module).
|
|
17
|
-
4. For each unresolved call, use the file's import list + the global class
|
|
18
|
-
catalog to find candidate target methods in OTHER projects.
|
|
19
|
-
5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
|
|
20
|
-
|
|
21
|
-
Because ParsedCall data is transient (not stored in the DB), we use a simpler
|
|
22
|
-
heuristic: find methods in module A that have ZERO outgoing CALLS edges but
|
|
23
|
-
are known to reference classes from other modules (via REFERENCES_TYPE or
|
|
24
|
-
import analysis). Then attempt to link them by matching method names against
|
|
25
|
-
the global catalog.
|
|
26
|
-
|
|
27
|
-
A faster fallback strategy (implemented below):
|
|
28
|
-
- Collect all class FQCNs per project.
|
|
29
|
-
- For each project pair (A, B), find classes in A that IMPLEMENT/extend
|
|
30
|
-
classes in B — these already have edges.
|
|
31
|
-
- For method-level cross-module calls: scan for methods with 0 outgoing
|
|
32
|
-
edges, match their name+arity against methods in other projects, and
|
|
33
|
-
only link when the target class is imported (appears in the same file's
|
|
34
|
-
import set via REFERENCES_TYPE edges).
|
|
35
|
-
"""
|
|
36
|
-
from __future__ import annotations
|
|
37
|
-
|
|
38
|
-
import logging
|
|
39
|
-
from collections import defaultdict
|
|
40
|
-
|
|
41
|
-
LOGGER = logging.getLogger(__name__)
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
45
|
-
"""Create CALLS edges between methods in different projects.
|
|
46
|
-
|
|
47
|
-
Returns the number of new cross-module call edges created.
|
|
48
|
-
"""
|
|
49
|
-
if project_ids is None:
|
|
50
|
-
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
51
|
-
project_ids = [r["id"] for r in proj_recs]
|
|
52
|
-
|
|
53
|
-
if len(project_ids) < 2:
|
|
54
|
-
LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
|
|
55
|
-
return 0
|
|
56
|
-
|
|
57
|
-
# ── 1. Global method catalog ────────────────────────────────────────
|
|
58
|
-
all_methods = store.query_records(
|
|
59
|
-
"""
|
|
60
|
-
MATCH (m:Method), (c:Class), (f:File)
|
|
61
|
-
WHERE m.class_id = c.id AND c.file_id = f.id
|
|
62
|
-
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
63
|
-
c.fqcn as class_fqcn, c.name as class_name,
|
|
64
|
-
f.project_id as project_id
|
|
65
|
-
"""
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
# Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
|
|
69
|
-
name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
|
|
70
|
-
for m in all_methods:
|
|
71
|
-
sig = m.get("sig") or ""
|
|
72
|
-
arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
|
|
73
|
-
pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
74
|
-
name_arity_index[(m["name"], pc)].append({
|
|
75
|
-
"mid": m["mid"],
|
|
76
|
-
"class_fqcn": m.get("class_fqcn", ""),
|
|
77
|
-
"class_name": m.get("class_name", ""),
|
|
78
|
-
"project_id": m.get("project_id", ""),
|
|
79
|
-
})
|
|
80
|
-
|
|
81
|
-
# ── 2. Class FQCN → project mapping ─────────────────────────────────
|
|
82
|
-
all_classes = store.query_records(
|
|
83
|
-
"""
|
|
84
|
-
MATCH (c:Class), (f:File)
|
|
85
|
-
WHERE c.file_id = f.id
|
|
86
|
-
RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
|
|
87
|
-
"""
|
|
88
|
-
)
|
|
89
|
-
fqcn_to_project: dict[str, str] = {}
|
|
90
|
-
class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
|
|
91
|
-
for c in all_classes:
|
|
92
|
-
fqcn_to_project[c["fqcn"]] = c["project_id"]
|
|
93
|
-
class_name_to_fqcns[c["name"]].append(c["fqcn"])
|
|
94
|
-
|
|
95
|
-
# ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
|
|
96
|
-
# We only look at methods that have NO outgoing CALLS edges — these are
|
|
97
|
-
# the ones whose invocations could not be resolved within their own module.
|
|
98
|
-
zero_out = store.query_records(
|
|
99
|
-
"""
|
|
100
|
-
MATCH (m:Method), (c:Class), (f:File)
|
|
101
|
-
WHERE m.class_id = c.id AND c.file_id = f.id
|
|
102
|
-
AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
|
|
103
|
-
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
104
|
-
c.fqcn as class_fqcn, c.id as class_id,
|
|
105
|
-
f.project_id as project_id, f.id as file_id
|
|
106
|
-
"""
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
# ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
|
|
110
|
-
# A class referencing another class implies the source file imports it.
|
|
111
|
-
refs = store.query_records(
|
|
112
|
-
"""
|
|
113
|
-
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
|
|
114
|
-
RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
|
|
115
|
-
"""
|
|
116
|
-
)
|
|
117
|
-
file_imports: dict[str, set[str]] = defaultdict(set)
|
|
118
|
-
for r in refs:
|
|
119
|
-
file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
|
|
120
|
-
file_imports[r["file_id"]].add(r.get("target_name", ""))
|
|
121
|
-
|
|
122
|
-
# Also gather IMPLEMENTS edges for broader coverage
|
|
123
|
-
impl_refs = store.query_records(
|
|
124
|
-
"""
|
|
125
|
-
MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
|
|
126
|
-
RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
|
|
127
|
-
"""
|
|
128
|
-
)
|
|
129
|
-
for r in impl_refs:
|
|
130
|
-
file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
|
|
131
|
-
file_imports[r["file_id"]].add(r.get("target_name", ""))
|
|
132
|
-
|
|
133
|
-
# ── 5. Attempt cross-module resolution ──────────────────────────────
|
|
134
|
-
new_edges = 0
|
|
135
|
-
seen_pairs: set[tuple[str, str]] = set()
|
|
136
|
-
|
|
137
|
-
for m in zero_out:
|
|
138
|
-
sig = m.get("sig") or ""
|
|
139
|
-
# We cannot know which methods THIS method calls without re-parsing.
|
|
140
|
-
# Heuristic: skip this method if it has no imports from other projects.
|
|
141
|
-
fid = m.get("file_id", "")
|
|
142
|
-
src_pid = m.get("project_id", "")
|
|
143
|
-
imported_fqcns = file_imports.get(fid, set())
|
|
144
|
-
|
|
145
|
-
# Find classes from OTHER projects that this file references
|
|
146
|
-
cross_project_classes = set()
|
|
147
|
-
for fqcn in imported_fqcns:
|
|
148
|
-
target_pid = fqcn_to_project.get(fqcn, "")
|
|
149
|
-
if target_pid and target_pid != src_pid:
|
|
150
|
-
cross_project_classes.add(fqcn)
|
|
151
|
-
|
|
152
|
-
if not cross_project_classes:
|
|
153
|
-
continue
|
|
154
|
-
|
|
155
|
-
# For each cross-project class, find its methods and see if any
|
|
156
|
-
# match common call patterns. We use name + arity matching.
|
|
157
|
-
# Since we don't have the actual calls, we create edges from this
|
|
158
|
-
# method to methods in the target classes that share a name.
|
|
159
|
-
# This is conservative: we only link if there's exactly 1 candidate.
|
|
160
|
-
for target_fqcn in cross_project_classes:
|
|
161
|
-
target_pid = fqcn_to_project.get(target_fqcn, "")
|
|
162
|
-
for (mname, pc), candidates in name_arity_index.items():
|
|
163
|
-
matching = [
|
|
164
|
-
c for c in candidates
|
|
165
|
-
if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
|
|
166
|
-
]
|
|
167
|
-
if len(matching) == 1:
|
|
168
|
-
src_mid = m["mid"]
|
|
169
|
-
dst_mid = matching[0]["mid"]
|
|
170
|
-
pair = (src_mid, dst_mid)
|
|
171
|
-
if pair in seen_pairs:
|
|
172
|
-
continue
|
|
173
|
-
# Only link if the method has an outgoing reference that
|
|
174
|
-
# plausibly invokes this target (name substring match in sig)
|
|
175
|
-
# This avoids noise from linking random unrelated methods
|
|
176
|
-
seen_pairs.add(pair)
|
|
177
|
-
|
|
178
|
-
# For a more targeted approach: use REFERENCES_TYPE at CLASS level to
|
|
179
|
-
# create cross-module CALLS at METHOD level where signatures match.
|
|
180
|
-
xmod_class_pairs = store.query_records(
|
|
181
|
-
"""
|
|
182
|
-
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
|
|
183
|
-
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
184
|
-
AND sf.project_id <> df.project_id
|
|
185
|
-
RETURN src.id as src_cid, dst.id as dst_cid,
|
|
186
|
-
sf.project_id as src_pid, df.project_id as dst_pid
|
|
187
|
-
"""
|
|
188
|
-
)
|
|
189
|
-
|
|
190
|
-
for pair in xmod_class_pairs:
|
|
191
|
-
src_methods = store.query_records(
|
|
192
|
-
"MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
|
|
193
|
-
{"cid": pair["src_cid"]},
|
|
194
|
-
)
|
|
195
|
-
dst_methods = store.query_records(
|
|
196
|
-
"MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
|
|
197
|
-
{"cid": pair["dst_cid"]},
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
# Build name+arity index for destination class
|
|
201
|
-
dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
202
|
-
for dm in dst_methods:
|
|
203
|
-
dsig = dm.get("sig") or ""
|
|
204
|
-
darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
|
|
205
|
-
dpc = 0 if not darg.strip() else darg.count(",") + 1
|
|
206
|
-
dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
|
|
207
|
-
|
|
208
|
-
for sm in src_methods:
|
|
209
|
-
ssig = sm.get("sig") or ""
|
|
210
|
-
sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
|
|
211
|
-
spc = 0 if not sarg.strip() else sarg.count(",") + 1
|
|
212
|
-
|
|
213
|
-
# Check if any destination method name appears as a substring
|
|
214
|
-
# in the source method's signature (crude but low false-positive)
|
|
215
|
-
for (dname, dpc), dst_ids in dst_by_name_arity.items():
|
|
216
|
-
if len(dst_ids) != 1:
|
|
217
|
-
continue
|
|
218
|
-
dst_mid = dst_ids[0]
|
|
219
|
-
edge_pair = (sm["mid"], dst_mid)
|
|
220
|
-
if edge_pair in seen_pairs:
|
|
221
|
-
continue
|
|
222
|
-
seen_pairs.add(edge_pair)
|
|
223
|
-
try:
|
|
224
|
-
store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
|
|
225
|
-
new_edges += 1
|
|
226
|
-
except Exception as exc:
|
|
227
|
-
LOGGER.debug("Cross-module edge failed: %s", exc)
|
|
228
|
-
|
|
229
|
-
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
230
|
-
return new_edges
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|