codespine 0.5.1__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.1 → codespine-0.5.2}/PKG-INFO +1 -1
- {codespine-0.5.1 → codespine-0.5.2}/codespine/__init__.py +1 -1
- codespine-0.5.2/codespine/analysis/crossmodule.py +185 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/db/store.py +25 -10
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.5.1 → codespine-0.5.2}/pyproject.toml +1 -1
- codespine-0.5.1/codespine/analysis/crossmodule.py +0 -173
- {codespine-0.5.1 → codespine-0.5.2}/LICENSE +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/README.md +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/community.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/context.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/coupling.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/flow.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/cli.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/config.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/db/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/db/schema.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/indexer/engine.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/mcp/server.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/bm25.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/rrf.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/search/vector.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine/watch/watcher.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/gindex.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/setup.cfg +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_java_parser.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.1 → codespine-0.5.2}/tests/test_search_ranking.py +0 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
"""Cross-module call edge linker.
|
|
2
|
+
|
|
3
|
+
After all modules in a workspace have been individually indexed, each module's
|
|
4
|
+
call resolver only sees methods *within that module* (the class/method catalogs
|
|
5
|
+
are project-scoped). This module fills the gap by:
|
|
6
|
+
|
|
7
|
+
1. Building a **global** class-name index across ALL projects.
|
|
8
|
+
2. Scanning every method's signature and return type for class names that
|
|
9
|
+
belong to a DIFFERENT project.
|
|
10
|
+
3. Creating CALLS edges between the referencing method and the methods of
|
|
11
|
+
the referenced class.
|
|
12
|
+
|
|
13
|
+
Two linking strategies are applied:
|
|
14
|
+
|
|
15
|
+
Strategy A — Name + arity match (confidence 0.7)
|
|
16
|
+
The referencing method M_src calls a method with the same name AND
|
|
17
|
+
parameter count as a method M_dst in the referenced class. This catches
|
|
18
|
+
delegation, interface-implementation forwarding, and adapter patterns.
|
|
19
|
+
|
|
20
|
+
Strategy B — Type-reference fallback (confidence 0.4)
|
|
21
|
+
For every *public, non-constructor* method in the referenced class that
|
|
22
|
+
received NO name-match edge, create ONE low-confidence edge from the
|
|
23
|
+
referencing method. This prevents methods that are genuinely used
|
|
24
|
+
cross-module from appearing as dead code.
|
|
25
|
+
"""
|
|
26
|
+
from __future__ import annotations
|
|
27
|
+
|
|
28
|
+
import logging
|
|
29
|
+
import re
|
|
30
|
+
from collections import defaultdict
|
|
31
|
+
|
|
32
|
+
LOGGER = logging.getLogger(__name__)
|
|
33
|
+
|
|
34
|
+
# Very short class names produce too many false-positive matches when scanned
|
|
35
|
+
# as substrings of method signatures. Skip names ≤ this length.
|
|
36
|
+
_MIN_CLASS_NAME_LEN = 4
|
|
37
|
+
|
|
38
|
+
# Regex to split a Java signature into word tokens (class names, keywords, etc.)
|
|
39
|
+
_TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _param_count(sig: str) -> int:
|
|
43
|
+
"""Count parameters from a method signature string."""
|
|
44
|
+
if not sig or "(" not in sig or ")" not in sig:
|
|
45
|
+
return 0
|
|
46
|
+
arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
|
|
47
|
+
return 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
51
|
+
"""Create CALLS edges between methods in different projects.
|
|
52
|
+
|
|
53
|
+
Returns the number of new cross-module call edges created.
|
|
54
|
+
"""
|
|
55
|
+
if project_ids is None:
|
|
56
|
+
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
57
|
+
project_ids = [r["id"] for r in proj_recs]
|
|
58
|
+
|
|
59
|
+
if len(project_ids) < 2:
|
|
60
|
+
LOGGER.info(
|
|
61
|
+
"Only %d project(s) indexed — skipping cross-module linking.",
|
|
62
|
+
len(project_ids),
|
|
63
|
+
)
|
|
64
|
+
return 0
|
|
65
|
+
|
|
66
|
+
# ── 1. Global class index ─────────────────────────────────────────
|
|
67
|
+
all_classes = store.query_records(
|
|
68
|
+
"""
|
|
69
|
+
MATCH (c:Class), (f:File)
|
|
70
|
+
WHERE c.file_id = f.id
|
|
71
|
+
RETURN c.id as cid, c.name as name, c.fqcn as fqcn, f.project_id as pid
|
|
72
|
+
"""
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# class_name → [(class_id, project_id)]
|
|
76
|
+
name_to_classes: dict[str, list[tuple[str, str]]] = defaultdict(list)
|
|
77
|
+
for c in all_classes:
|
|
78
|
+
name_to_classes[c["name"]].append((c["cid"], c["pid"]))
|
|
79
|
+
|
|
80
|
+
# ── 2. Per-project class name sets (for O(1) lookups) ─────────────
|
|
81
|
+
# For each project pair (src, dst), we need the set of class names
|
|
82
|
+
# that belong to the OTHER project(s). Pre-compute per-project sets.
|
|
83
|
+
classes_per_project: dict[str, set[str]] = defaultdict(set)
|
|
84
|
+
for c in all_classes:
|
|
85
|
+
if len(c["name"]) > _MIN_CLASS_NAME_LEN:
|
|
86
|
+
classes_per_project[c["pid"]].add(c["name"])
|
|
87
|
+
|
|
88
|
+
# ── 3. Scan methods for cross-project type references ─────────────
|
|
89
|
+
new_edges = 0
|
|
90
|
+
seen: set[tuple[str, str]] = set()
|
|
91
|
+
|
|
92
|
+
for src_pid in project_ids:
|
|
93
|
+
# Build the set of "interesting" class names from OTHER projects
|
|
94
|
+
other_class_names: set[str] = set()
|
|
95
|
+
for other_pid in project_ids:
|
|
96
|
+
if other_pid != src_pid:
|
|
97
|
+
other_class_names |= classes_per_project.get(other_pid, set())
|
|
98
|
+
|
|
99
|
+
if not other_class_names:
|
|
100
|
+
continue
|
|
101
|
+
|
|
102
|
+
# Fetch all methods in this project
|
|
103
|
+
src_methods = store.query_records(
|
|
104
|
+
"""
|
|
105
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
106
|
+
WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
|
|
107
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
108
|
+
m.return_type as rtype, c.id as cid
|
|
109
|
+
""",
|
|
110
|
+
{"pid": src_pid},
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
for sm in src_methods:
|
|
114
|
+
sig = sm.get("sig") or ""
|
|
115
|
+
rtype = sm.get("rtype") or ""
|
|
116
|
+
# Tokenize signature + return type into words
|
|
117
|
+
tokens = set(_TOKEN_RE.findall(sig + " " + rtype))
|
|
118
|
+
# Find which class names from other projects appear in the tokens
|
|
119
|
+
matched_class_names = tokens & other_class_names
|
|
120
|
+
if not matched_class_names:
|
|
121
|
+
continue
|
|
122
|
+
|
|
123
|
+
# For each matched class, create CALLS edges
|
|
124
|
+
for class_name in matched_class_names:
|
|
125
|
+
for dst_cid, dst_pid in name_to_classes.get(class_name, []):
|
|
126
|
+
if dst_pid == src_pid:
|
|
127
|
+
continue # same project — not cross-module
|
|
128
|
+
|
|
129
|
+
# Get methods of the destination class
|
|
130
|
+
dst_methods = store.query_records(
|
|
131
|
+
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
132
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
133
|
+
m.modifiers as modifiers, m.is_constructor as is_ctor""",
|
|
134
|
+
{"cid": dst_cid},
|
|
135
|
+
)
|
|
136
|
+
if not dst_methods:
|
|
137
|
+
continue
|
|
138
|
+
|
|
139
|
+
# Strategy A: name + arity match
|
|
140
|
+
matched_dst_mids: set[str] = set()
|
|
141
|
+
sm_name = sm["name"]
|
|
142
|
+
sm_pc = _param_count(sm.get("sig") or "")
|
|
143
|
+
for dm in dst_methods:
|
|
144
|
+
if dm["name"] == sm_name:
|
|
145
|
+
dm_pc = _param_count(dm.get("sig") or "")
|
|
146
|
+
if dm_pc == sm_pc:
|
|
147
|
+
pair = (sm["mid"], dm["mid"])
|
|
148
|
+
if pair not in seen:
|
|
149
|
+
seen.add(pair)
|
|
150
|
+
try:
|
|
151
|
+
store.add_call(
|
|
152
|
+
sm["mid"], dm["mid"],
|
|
153
|
+
0.7, "cross_module_name_match",
|
|
154
|
+
)
|
|
155
|
+
new_edges += 1
|
|
156
|
+
except Exception as exc:
|
|
157
|
+
LOGGER.debug("Name-match edge failed: %s", exc)
|
|
158
|
+
matched_dst_mids.add(dm["mid"])
|
|
159
|
+
|
|
160
|
+
# Strategy B: fallback for unmatched public dst methods
|
|
161
|
+
for dm in dst_methods:
|
|
162
|
+
if dm["mid"] in matched_dst_mids:
|
|
163
|
+
continue
|
|
164
|
+
if dm.get("is_ctor"):
|
|
165
|
+
continue
|
|
166
|
+
mods = dm.get("modifiers") or []
|
|
167
|
+
mod_strs = {str(m).strip() for m in mods} if mods else set()
|
|
168
|
+
if "private" in mod_strs:
|
|
169
|
+
continue
|
|
170
|
+
|
|
171
|
+
pair = (sm["mid"], dm["mid"])
|
|
172
|
+
if pair in seen:
|
|
173
|
+
continue
|
|
174
|
+
seen.add(pair)
|
|
175
|
+
try:
|
|
176
|
+
store.add_call(
|
|
177
|
+
sm["mid"], dm["mid"],
|
|
178
|
+
0.4, "cross_module_type_ref",
|
|
179
|
+
)
|
|
180
|
+
new_edges += 1
|
|
181
|
+
except Exception as exc:
|
|
182
|
+
LOGGER.debug("Fallback edge failed: %s", exc)
|
|
183
|
+
|
|
184
|
+
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
185
|
+
return new_edges
|
|
@@ -17,7 +17,7 @@ from codespine.db.schema import ensure_schema
|
|
|
17
17
|
|
|
18
18
|
LOGGER = logging.getLogger(__name__)
|
|
19
19
|
|
|
20
|
-
_BUFFER_POOL_SIZE =
|
|
20
|
+
_BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@dataclass
|
|
@@ -298,15 +298,23 @@ class GraphStore:
|
|
|
298
298
|
)
|
|
299
299
|
self.execute(query, {"src_id": src_id, "dst_id": dst_id, "confidence": confidence})
|
|
300
300
|
|
|
301
|
+
def _recycle_conn(self) -> None:
|
|
302
|
+
"""Drop and recreate the per-thread connection to release buffer pages."""
|
|
303
|
+
try:
|
|
304
|
+
if hasattr(self._tls, "conn") and self._tls.conn is not None:
|
|
305
|
+
self._tls.conn = None
|
|
306
|
+
except Exception:
|
|
307
|
+
pass
|
|
308
|
+
|
|
301
309
|
def set_community(self, community_id: str, label: str, cohesion: float, symbol_ids: list[str]) -> None:
|
|
302
310
|
self.execute(
|
|
303
311
|
"MERGE (c:Community {id: $id}) SET c.label = $label, c.cohesion = $cohesion",
|
|
304
312
|
{"id": community_id, "label": label, "cohesion": cohesion},
|
|
305
313
|
)
|
|
306
|
-
# Commit in batches of
|
|
307
|
-
# communities.
|
|
308
|
-
#
|
|
309
|
-
_BATCH =
|
|
314
|
+
# Commit in batches of 500 to keep Kuzu's buffer pool from OOMing on
|
|
315
|
+
# large communities. After each batch, recycle the connection so Kuzu
|
|
316
|
+
# can release buffer pages accumulated during the transaction.
|
|
317
|
+
_BATCH = 500
|
|
310
318
|
for i in range(0, len(symbol_ids), _BATCH):
|
|
311
319
|
batch = symbol_ids[i : i + _BATCH]
|
|
312
320
|
with self.transaction():
|
|
@@ -315,17 +323,24 @@ class GraphStore:
|
|
|
315
323
|
"MATCH (s:Symbol {id: $sid}), (c:Community {id: $cid}) MERGE (s)-[:IN_COMMUNITY]->(c)",
|
|
316
324
|
{"sid": sid, "cid": community_id},
|
|
317
325
|
)
|
|
326
|
+
# Recycle connection after each batch to let Kuzu free buffer pages
|
|
327
|
+
self._recycle_conn()
|
|
318
328
|
|
|
319
329
|
def set_flow(self, flow_id: str, entry_symbol_id: str, kind: str, symbols_at_depth: list[tuple[str, int]]) -> None:
|
|
320
330
|
self.execute(
|
|
321
331
|
"MERGE (f:Flow {id: $id}) SET f.entry_symbol_id = $entry, f.kind = $kind",
|
|
322
332
|
{"id": flow_id, "entry": entry_symbol_id, "kind": kind},
|
|
323
333
|
)
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
334
|
+
_BATCH = 500
|
|
335
|
+
for i in range(0, len(symbols_at_depth), _BATCH):
|
|
336
|
+
batch = symbols_at_depth[i : i + _BATCH]
|
|
337
|
+
with self.transaction():
|
|
338
|
+
for sid, depth in batch:
|
|
339
|
+
self.execute(
|
|
340
|
+
"MATCH (s:Symbol {id: $sid}), (f:Flow {id: $fid}) MERGE (s)-[:IN_FLOW {depth: $depth}]->(f)",
|
|
341
|
+
{"sid": sid, "fid": flow_id, "depth": int(depth)},
|
|
342
|
+
)
|
|
343
|
+
self._recycle_conn()
|
|
329
344
|
|
|
330
345
|
def upsert_coupling(self, file_a: str, file_b: str, strength: float, cochanges: int, months: int) -> None:
|
|
331
346
|
self.execute(
|
|
@@ -1,173 +0,0 @@
|
|
|
1
|
-
"""Cross-module call edge linker.
|
|
2
|
-
|
|
3
|
-
After all modules in a workspace have been individually indexed, each module's
|
|
4
|
-
call resolver only sees methods within that module. This module fills the gap
|
|
5
|
-
by scanning the graph for cross-project class references (REFERENCES_TYPE and
|
|
6
|
-
IMPLEMENTS edges) and creating CALLS edges between methods where the call is
|
|
7
|
-
plausible.
|
|
8
|
-
|
|
9
|
-
Strategy A — Name + arity match (confidence 0.7)
|
|
10
|
-
If src_class references dst_class (cross-project) and both have a method
|
|
11
|
-
with the same name and same parameter count, create a CALLS edge. This
|
|
12
|
-
catches delegation, interface-implementation forwarding, and adapter
|
|
13
|
-
patterns.
|
|
14
|
-
|
|
15
|
-
Strategy B — Type-reference fallback (confidence 0.4)
|
|
16
|
-
For each *public* method in dst_class that received NO name-match edge,
|
|
17
|
-
create ONE low-confidence edge from a representative src method (preferring
|
|
18
|
-
one with zero outgoing calls). This prevents methods that are genuinely
|
|
19
|
-
used cross-module from appearing as dead code.
|
|
20
|
-
"""
|
|
21
|
-
from __future__ import annotations
|
|
22
|
-
|
|
23
|
-
import logging
|
|
24
|
-
from collections import defaultdict
|
|
25
|
-
|
|
26
|
-
LOGGER = logging.getLogger(__name__)
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def _param_count(sig: str) -> int:
|
|
30
|
-
"""Count parameters from a method signature string."""
|
|
31
|
-
if not sig or "(" not in sig or ")" not in sig:
|
|
32
|
-
return 0
|
|
33
|
-
arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
|
|
34
|
-
return 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
38
|
-
"""Create CALLS edges between methods in different projects.
|
|
39
|
-
|
|
40
|
-
Returns the number of new cross-module call edges created.
|
|
41
|
-
"""
|
|
42
|
-
if project_ids is None:
|
|
43
|
-
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
44
|
-
project_ids = [r["id"] for r in proj_recs]
|
|
45
|
-
|
|
46
|
-
if len(project_ids) < 2:
|
|
47
|
-
LOGGER.info(
|
|
48
|
-
"Only %d project(s) indexed — skipping cross-module linking.",
|
|
49
|
-
len(project_ids),
|
|
50
|
-
)
|
|
51
|
-
return 0
|
|
52
|
-
|
|
53
|
-
# ── 1. Collect cross-project class pairs ──────────────────────────
|
|
54
|
-
ref_pairs = store.query_records(
|
|
55
|
-
"""
|
|
56
|
-
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
|
|
57
|
-
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
58
|
-
AND sf.project_id <> df.project_id
|
|
59
|
-
RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
|
|
60
|
-
"""
|
|
61
|
-
)
|
|
62
|
-
impl_pairs = store.query_records(
|
|
63
|
-
"""
|
|
64
|
-
MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
|
|
65
|
-
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
66
|
-
AND sf.project_id <> df.project_id
|
|
67
|
-
RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
|
|
68
|
-
"""
|
|
69
|
-
)
|
|
70
|
-
|
|
71
|
-
all_pairs: set[tuple[str, str]] = set()
|
|
72
|
-
for p in ref_pairs:
|
|
73
|
-
all_pairs.add((p["src_cid"], p["dst_cid"]))
|
|
74
|
-
for p in impl_pairs:
|
|
75
|
-
all_pairs.add((p["src_cid"], p["dst_cid"]))
|
|
76
|
-
|
|
77
|
-
if not all_pairs:
|
|
78
|
-
LOGGER.info("No cross-project class references found.")
|
|
79
|
-
return 0
|
|
80
|
-
|
|
81
|
-
LOGGER.info(
|
|
82
|
-
"Cross-module: %d cross-project class pair(s) to process.",
|
|
83
|
-
len(all_pairs),
|
|
84
|
-
)
|
|
85
|
-
|
|
86
|
-
# ── 2. Process each class pair ────────────────────────────────────
|
|
87
|
-
new_edges = 0
|
|
88
|
-
seen: set[tuple[str, str]] = set()
|
|
89
|
-
|
|
90
|
-
for src_cid, dst_cid in all_pairs:
|
|
91
|
-
src_methods = store.query_records(
|
|
92
|
-
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
93
|
-
RETURN m.id as mid, m.name as name, m.signature as sig""",
|
|
94
|
-
{"cid": src_cid},
|
|
95
|
-
)
|
|
96
|
-
dst_methods = store.query_records(
|
|
97
|
-
"""MATCH (m:Method) WHERE m.class_id = $cid
|
|
98
|
-
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
99
|
-
m.modifiers as modifiers, m.is_constructor as is_ctor""",
|
|
100
|
-
{"cid": dst_cid},
|
|
101
|
-
)
|
|
102
|
-
if not src_methods or not dst_methods:
|
|
103
|
-
continue
|
|
104
|
-
|
|
105
|
-
# Build name → methods index for src class
|
|
106
|
-
src_by_name: dict[str, list[dict]] = defaultdict(list)
|
|
107
|
-
for sm in src_methods:
|
|
108
|
-
src_by_name[sm["name"]].append(sm)
|
|
109
|
-
|
|
110
|
-
# ── Strategy A: name + arity matching ─────────────────────────
|
|
111
|
-
matched_dst_mids: set[str] = set()
|
|
112
|
-
|
|
113
|
-
for dm in dst_methods:
|
|
114
|
-
dm_name = dm["name"]
|
|
115
|
-
dm_pc = _param_count(dm.get("sig") or "")
|
|
116
|
-
candidates = src_by_name.get(dm_name, [])
|
|
117
|
-
for sm in candidates:
|
|
118
|
-
sm_pc = _param_count(sm.get("sig") or "")
|
|
119
|
-
if sm_pc == dm_pc:
|
|
120
|
-
pair = (sm["mid"], dm["mid"])
|
|
121
|
-
if pair in seen:
|
|
122
|
-
matched_dst_mids.add(dm["mid"])
|
|
123
|
-
continue
|
|
124
|
-
seen.add(pair)
|
|
125
|
-
try:
|
|
126
|
-
store.add_call(
|
|
127
|
-
sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
|
|
128
|
-
)
|
|
129
|
-
new_edges += 1
|
|
130
|
-
matched_dst_mids.add(dm["mid"])
|
|
131
|
-
except Exception as exc:
|
|
132
|
-
LOGGER.debug("Name-match edge failed: %s", exc)
|
|
133
|
-
|
|
134
|
-
# ── Strategy B: fallback for unmatched public dst methods ─────
|
|
135
|
-
# Find a representative caller: prefer src methods with 0 outgoing calls
|
|
136
|
-
fallback_src = None
|
|
137
|
-
for sm in src_methods:
|
|
138
|
-
out = store.query_records(
|
|
139
|
-
"MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
|
|
140
|
-
{"mid": sm["mid"]},
|
|
141
|
-
)
|
|
142
|
-
if out and out[0]["n"] == 0:
|
|
143
|
-
fallback_src = sm
|
|
144
|
-
break
|
|
145
|
-
if fallback_src is None and src_methods:
|
|
146
|
-
fallback_src = src_methods[0]
|
|
147
|
-
|
|
148
|
-
if fallback_src:
|
|
149
|
-
for dm in dst_methods:
|
|
150
|
-
if dm["mid"] in matched_dst_mids:
|
|
151
|
-
continue
|
|
152
|
-
# Skip constructors and private methods
|
|
153
|
-
if dm.get("is_ctor"):
|
|
154
|
-
continue
|
|
155
|
-
mods = dm.get("modifiers") or []
|
|
156
|
-
mod_strs = {str(m).strip() for m in mods} if mods else set()
|
|
157
|
-
if "private" in mod_strs:
|
|
158
|
-
continue
|
|
159
|
-
|
|
160
|
-
pair = (fallback_src["mid"], dm["mid"])
|
|
161
|
-
if pair in seen:
|
|
162
|
-
continue
|
|
163
|
-
seen.add(pair)
|
|
164
|
-
try:
|
|
165
|
-
store.add_call(
|
|
166
|
-
fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
|
|
167
|
-
)
|
|
168
|
-
new_edges += 1
|
|
169
|
-
except Exception as exc:
|
|
170
|
-
LOGGER.debug("Fallback edge failed: %s", exc)
|
|
171
|
-
|
|
172
|
-
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
173
|
-
return new_edges
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|