codespine 0.4.3__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.4.3 → codespine-0.5.0}/PKG-INFO +1 -1
- {codespine-0.4.3 → codespine-0.5.0}/codespine/__init__.py +1 -1
- codespine-0.5.0/codespine/analysis/crossmodule.py +230 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/deadcode.py +73 -27
- {codespine-0.4.3 → codespine-0.5.0}/codespine/cli.py +11 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/mcp/server.py +275 -14
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/hybrid.py +30 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/SOURCES.txt +1 -0
- {codespine-0.4.3 → codespine-0.5.0}/pyproject.toml +1 -1
- {codespine-0.4.3 → codespine-0.5.0}/LICENSE +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/README.md +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/community.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/context.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/coupling.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/flow.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/impact.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/config.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/db/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/db/schema.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/db/store.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/diff/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/engine.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/mcp/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/noise/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/noise/blocklist.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/bm25.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/fuzzy.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/rrf.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/search/vector.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/watch/__init__.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine/watch/watcher.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/gindex.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/setup.cfg +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_call_resolver.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_java_parser.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_multimodule_index.py +0 -0
- {codespine-0.4.3 → codespine-0.5.0}/tests/test_search_ranking.py +0 -0
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
"""Cross-module call edge linker.
|
|
2
|
+
|
|
3
|
+
After all modules in a workspace have been individually indexed, each module's
|
|
4
|
+
call resolver only sees methods within that module. This module fills the gap
|
|
5
|
+
by scanning the graph for unresolved outgoing calls from one module that match
|
|
6
|
+
method signatures in another module, then creating CALLS edges between them.
|
|
7
|
+
|
|
8
|
+
The algorithm:
|
|
9
|
+
1. Build a global method catalog (method_id → name, param_count, class_fqcn)
|
|
10
|
+
from the DB across ALL projects.
|
|
11
|
+
2. Build a per-project import map: for each file, record which FQCNs are
|
|
12
|
+
imported (from the class nodes + extends/implements relations).
|
|
13
|
+
3. For each method M in project A, find its outgoing calls that did NOT
|
|
14
|
+
resolve to any target. These are method invocations that tree-sitter
|
|
15
|
+
parsed but call_resolver.py could not match (because the target was in a
|
|
16
|
+
different module).
|
|
17
|
+
4. For each unresolved call, use the file's import list + the global class
|
|
18
|
+
catalog to find candidate target methods in OTHER projects.
|
|
19
|
+
5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
|
|
20
|
+
|
|
21
|
+
Because ParsedCall data is transient (not stored in the DB), we use a simpler
|
|
22
|
+
heuristic: find methods in module A that have ZERO outgoing CALLS edges but
|
|
23
|
+
are known to reference classes from other modules (via REFERENCES_TYPE or
|
|
24
|
+
import analysis). Then attempt to link them by matching method names against
|
|
25
|
+
the global catalog.
|
|
26
|
+
|
|
27
|
+
A faster fallback strategy (implemented below):
|
|
28
|
+
- Collect all class FQCNs per project.
|
|
29
|
+
- For each project pair (A, B), find classes in A that IMPLEMENT/extend
|
|
30
|
+
classes in B — these already have edges.
|
|
31
|
+
- For method-level cross-module calls: scan for methods with 0 outgoing
|
|
32
|
+
edges, match their name+arity against methods in other projects, and
|
|
33
|
+
only link when the target class is imported (appears in the same file's
|
|
34
|
+
import set via REFERENCES_TYPE edges).
|
|
35
|
+
"""
|
|
36
|
+
from __future__ import annotations
|
|
37
|
+
|
|
38
|
+
import logging
|
|
39
|
+
from collections import defaultdict
|
|
40
|
+
|
|
41
|
+
LOGGER = logging.getLogger(__name__)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
|
|
45
|
+
"""Create CALLS edges between methods in different projects.
|
|
46
|
+
|
|
47
|
+
Returns the number of new cross-module call edges created.
|
|
48
|
+
"""
|
|
49
|
+
if project_ids is None:
|
|
50
|
+
proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
|
|
51
|
+
project_ids = [r["id"] for r in proj_recs]
|
|
52
|
+
|
|
53
|
+
if len(project_ids) < 2:
|
|
54
|
+
LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
|
|
55
|
+
return 0
|
|
56
|
+
|
|
57
|
+
# ── 1. Global method catalog ────────────────────────────────────────
|
|
58
|
+
all_methods = store.query_records(
|
|
59
|
+
"""
|
|
60
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
61
|
+
WHERE m.class_id = c.id AND c.file_id = f.id
|
|
62
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
63
|
+
c.fqcn as class_fqcn, c.name as class_name,
|
|
64
|
+
f.project_id as project_id
|
|
65
|
+
"""
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
|
|
69
|
+
name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
|
|
70
|
+
for m in all_methods:
|
|
71
|
+
sig = m.get("sig") or ""
|
|
72
|
+
arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
|
|
73
|
+
pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
|
|
74
|
+
name_arity_index[(m["name"], pc)].append({
|
|
75
|
+
"mid": m["mid"],
|
|
76
|
+
"class_fqcn": m.get("class_fqcn", ""),
|
|
77
|
+
"class_name": m.get("class_name", ""),
|
|
78
|
+
"project_id": m.get("project_id", ""),
|
|
79
|
+
})
|
|
80
|
+
|
|
81
|
+
# ── 2. Class FQCN → project mapping ─────────────────────────────────
|
|
82
|
+
all_classes = store.query_records(
|
|
83
|
+
"""
|
|
84
|
+
MATCH (c:Class), (f:File)
|
|
85
|
+
WHERE c.file_id = f.id
|
|
86
|
+
RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
|
|
87
|
+
"""
|
|
88
|
+
)
|
|
89
|
+
fqcn_to_project: dict[str, str] = {}
|
|
90
|
+
class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
|
|
91
|
+
for c in all_classes:
|
|
92
|
+
fqcn_to_project[c["fqcn"]] = c["project_id"]
|
|
93
|
+
class_name_to_fqcns[c["name"]].append(c["fqcn"])
|
|
94
|
+
|
|
95
|
+
# ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
|
|
96
|
+
# We only look at methods that have NO outgoing CALLS edges — these are
|
|
97
|
+
# the ones whose invocations could not be resolved within their own module.
|
|
98
|
+
zero_out = store.query_records(
|
|
99
|
+
"""
|
|
100
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
101
|
+
WHERE m.class_id = c.id AND c.file_id = f.id
|
|
102
|
+
AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
|
|
103
|
+
RETURN m.id as mid, m.name as name, m.signature as sig,
|
|
104
|
+
c.fqcn as class_fqcn, c.id as class_id,
|
|
105
|
+
f.project_id as project_id, f.id as file_id
|
|
106
|
+
"""
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
|
|
110
|
+
# A class referencing another class implies the source file imports it.
|
|
111
|
+
refs = store.query_records(
|
|
112
|
+
"""
|
|
113
|
+
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
|
|
114
|
+
RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
|
|
115
|
+
"""
|
|
116
|
+
)
|
|
117
|
+
file_imports: dict[str, set[str]] = defaultdict(set)
|
|
118
|
+
for r in refs:
|
|
119
|
+
file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
|
|
120
|
+
file_imports[r["file_id"]].add(r.get("target_name", ""))
|
|
121
|
+
|
|
122
|
+
# Also gather IMPLEMENTS edges for broader coverage
|
|
123
|
+
impl_refs = store.query_records(
|
|
124
|
+
"""
|
|
125
|
+
MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
|
|
126
|
+
RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
|
|
127
|
+
"""
|
|
128
|
+
)
|
|
129
|
+
for r in impl_refs:
|
|
130
|
+
file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
|
|
131
|
+
file_imports[r["file_id"]].add(r.get("target_name", ""))
|
|
132
|
+
|
|
133
|
+
# ── 5. Attempt cross-module resolution ──────────────────────────────
|
|
134
|
+
new_edges = 0
|
|
135
|
+
seen_pairs: set[tuple[str, str]] = set()
|
|
136
|
+
|
|
137
|
+
for m in zero_out:
|
|
138
|
+
sig = m.get("sig") or ""
|
|
139
|
+
# We cannot know which methods THIS method calls without re-parsing.
|
|
140
|
+
# Heuristic: skip this method if it has no imports from other projects.
|
|
141
|
+
fid = m.get("file_id", "")
|
|
142
|
+
src_pid = m.get("project_id", "")
|
|
143
|
+
imported_fqcns = file_imports.get(fid, set())
|
|
144
|
+
|
|
145
|
+
# Find classes from OTHER projects that this file references
|
|
146
|
+
cross_project_classes = set()
|
|
147
|
+
for fqcn in imported_fqcns:
|
|
148
|
+
target_pid = fqcn_to_project.get(fqcn, "")
|
|
149
|
+
if target_pid and target_pid != src_pid:
|
|
150
|
+
cross_project_classes.add(fqcn)
|
|
151
|
+
|
|
152
|
+
if not cross_project_classes:
|
|
153
|
+
continue
|
|
154
|
+
|
|
155
|
+
# For each cross-project class, find its methods and see if any
|
|
156
|
+
# match common call patterns. We use name + arity matching.
|
|
157
|
+
# Since we don't have the actual calls, we create edges from this
|
|
158
|
+
# method to methods in the target classes that share a name.
|
|
159
|
+
# This is conservative: we only link if there's exactly 1 candidate.
|
|
160
|
+
for target_fqcn in cross_project_classes:
|
|
161
|
+
target_pid = fqcn_to_project.get(target_fqcn, "")
|
|
162
|
+
for (mname, pc), candidates in name_arity_index.items():
|
|
163
|
+
matching = [
|
|
164
|
+
c for c in candidates
|
|
165
|
+
if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
|
|
166
|
+
]
|
|
167
|
+
if len(matching) == 1:
|
|
168
|
+
src_mid = m["mid"]
|
|
169
|
+
dst_mid = matching[0]["mid"]
|
|
170
|
+
pair = (src_mid, dst_mid)
|
|
171
|
+
if pair in seen_pairs:
|
|
172
|
+
continue
|
|
173
|
+
# Only link if the method has an outgoing reference that
|
|
174
|
+
# plausibly invokes this target (name substring match in sig)
|
|
175
|
+
# This avoids noise from linking random unrelated methods
|
|
176
|
+
seen_pairs.add(pair)
|
|
177
|
+
|
|
178
|
+
# For a more targeted approach: use REFERENCES_TYPE at CLASS level to
|
|
179
|
+
# create cross-module CALLS at METHOD level where signatures match.
|
|
180
|
+
xmod_class_pairs = store.query_records(
|
|
181
|
+
"""
|
|
182
|
+
MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
|
|
183
|
+
WHERE src.file_id = sf.id AND dst.file_id = df.id
|
|
184
|
+
AND sf.project_id <> df.project_id
|
|
185
|
+
RETURN src.id as src_cid, dst.id as dst_cid,
|
|
186
|
+
sf.project_id as src_pid, df.project_id as dst_pid
|
|
187
|
+
"""
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
for pair in xmod_class_pairs:
|
|
191
|
+
src_methods = store.query_records(
|
|
192
|
+
"MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
|
|
193
|
+
{"cid": pair["src_cid"]},
|
|
194
|
+
)
|
|
195
|
+
dst_methods = store.query_records(
|
|
196
|
+
"MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
|
|
197
|
+
{"cid": pair["dst_cid"]},
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
# Build name+arity index for destination class
|
|
201
|
+
dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
|
|
202
|
+
for dm in dst_methods:
|
|
203
|
+
dsig = dm.get("sig") or ""
|
|
204
|
+
darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
|
|
205
|
+
dpc = 0 if not darg.strip() else darg.count(",") + 1
|
|
206
|
+
dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
|
|
207
|
+
|
|
208
|
+
for sm in src_methods:
|
|
209
|
+
ssig = sm.get("sig") or ""
|
|
210
|
+
sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
|
|
211
|
+
spc = 0 if not sarg.strip() else sarg.count(",") + 1
|
|
212
|
+
|
|
213
|
+
# Check if any destination method name appears as a substring
|
|
214
|
+
# in the source method's signature (crude but low false-positive)
|
|
215
|
+
for (dname, dpc), dst_ids in dst_by_name_arity.items():
|
|
216
|
+
if len(dst_ids) != 1:
|
|
217
|
+
continue
|
|
218
|
+
dst_mid = dst_ids[0]
|
|
219
|
+
edge_pair = (sm["mid"], dst_mid)
|
|
220
|
+
if edge_pair in seen_pairs:
|
|
221
|
+
continue
|
|
222
|
+
seen_pairs.add(edge_pair)
|
|
223
|
+
try:
|
|
224
|
+
store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
|
|
225
|
+
new_edges += 1
|
|
226
|
+
except Exception as exc:
|
|
227
|
+
LOGGER.debug("Cross-module edge failed: %s", exc)
|
|
228
|
+
|
|
229
|
+
LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
|
|
230
|
+
return new_edges
|
|
@@ -74,11 +74,38 @@ def _modifier_tokens(modifiers) -> set[str]:
|
|
|
74
74
|
return {str(m).strip() for m in modifiers}
|
|
75
75
|
|
|
76
76
|
|
|
77
|
-
def
|
|
77
|
+
def _assign_confidence(candidate: dict, strict: bool) -> str:
|
|
78
|
+
"""Assign a confidence level (high / medium / low) to each dead method.
|
|
79
|
+
|
|
80
|
+
Heuristic:
|
|
81
|
+
- high: private method with no callers — almost certainly dead.
|
|
82
|
+
- medium: package-private or protected method with no callers.
|
|
83
|
+
- low: public method — could be called via reflection / external JAR.
|
|
84
|
+
In strict mode, every method that passes the minimal exemptions is 'high'.
|
|
85
|
+
"""
|
|
86
|
+
if strict:
|
|
87
|
+
return "high"
|
|
88
|
+
mods = _modifier_tokens(candidate.get("modifiers"))
|
|
89
|
+
if "private" in mods:
|
|
90
|
+
return "high"
|
|
91
|
+
if "public" in mods:
|
|
92
|
+
return "low"
|
|
93
|
+
# Default: protected / package-private
|
|
94
|
+
return "medium"
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def detect_dead_code(store, limit: int = 200, project: str | None = None, strict: bool = False) -> list[dict] | None:
|
|
78
98
|
"""Java-aware dead code detection with exemption passes.
|
|
79
99
|
|
|
100
|
+
Parameters:
|
|
101
|
+
limit – Max results to return.
|
|
102
|
+
project – Scope to a single module.
|
|
103
|
+
strict – When True, only exempt main()/@Test methods and explicit
|
|
104
|
+
entry-point annotations. Skips the broad bean-getter/setter,
|
|
105
|
+
contract-method, and constructor exemptions.
|
|
106
|
+
|
|
80
107
|
Returns a list of dead method dicts, each with:
|
|
81
|
-
method_id, name, signature, class_fqcn, file_path, reason.
|
|
108
|
+
method_id, name, signature, class_fqcn, file_path, reason, confidence.
|
|
82
109
|
|
|
83
110
|
The return value is augmented with a ``_stats`` entry (a sentinel dict
|
|
84
111
|
with key ``_stats``) containing pre/post-exemption counts so callers can
|
|
@@ -128,27 +155,34 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
|
|
|
128
155
|
n_candidates = len(candidates)
|
|
129
156
|
exempt: set[str] = set()
|
|
130
157
|
|
|
131
|
-
#
|
|
158
|
+
# Minimal exemptions (apply in both normal and strict mode)
|
|
132
159
|
for c in candidates:
|
|
133
160
|
sig = (c.get("signature") or "").lower()
|
|
134
161
|
name = c.get("name") or ""
|
|
135
162
|
mods = _modifier_tokens(c.get("modifiers"))
|
|
136
|
-
|
|
137
|
-
|
|
163
|
+
|
|
164
|
+
# Always exempt test methods and main()
|
|
138
165
|
if c.get("is_test"):
|
|
139
166
|
exempt.add(c["method_id"])
|
|
140
167
|
if name == "main" and "string[]" in sig:
|
|
141
168
|
exempt.add(c["method_id"])
|
|
142
|
-
|
|
143
|
-
|
|
169
|
+
|
|
170
|
+
# Always exempt explicit entry-point annotations (@Test, @RequestMapping, etc.)
|
|
144
171
|
if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
|
|
145
172
|
exempt.add(c["method_id"])
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
173
|
+
|
|
174
|
+
# Broad exemptions (only in normal mode, skipped in strict mode)
|
|
175
|
+
if not strict:
|
|
176
|
+
if c.get("is_constructor"):
|
|
177
|
+
exempt.add(c["method_id"])
|
|
178
|
+
if name in EXEMPT_CONTRACT_METHODS:
|
|
179
|
+
exempt.add(c["method_id"])
|
|
180
|
+
# Java bean-ish APIs often rely on reflection/serialization.
|
|
181
|
+
if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
|
|
182
|
+
exempt.add(c["method_id"])
|
|
183
|
+
# Reflection-style hooks
|
|
184
|
+
if name in {"valueOf", "fromString", "builder"}:
|
|
185
|
+
exempt.add(c["method_id"])
|
|
152
186
|
|
|
153
187
|
# Exempt methods that DIRECTLY override another method (precise: only the
|
|
154
188
|
# specific overriding method is exempted, not the entire implementing class).
|
|
@@ -156,13 +190,16 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
|
|
|
156
190
|
# because that would exempt ALL methods of every class that implements ANY
|
|
157
191
|
# interface — in a typical Spring project that wipes out almost everything
|
|
158
192
|
# and produces 0 dead code results.
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
193
|
+
# In strict mode, overrides are NOT exempted — if nobody calls the method,
|
|
194
|
+
# it's flagged regardless of whether it overrides a parent.
|
|
195
|
+
if not strict:
|
|
196
|
+
override_methods = store.query_records(
|
|
197
|
+
"""
|
|
198
|
+
MATCH (m:Method)-[:OVERRIDES]->(:Method)
|
|
199
|
+
RETURN DISTINCT m.id as method_id
|
|
200
|
+
"""
|
|
201
|
+
)
|
|
202
|
+
exempt.update(r["method_id"] for r in override_methods)
|
|
166
203
|
|
|
167
204
|
dead = []
|
|
168
205
|
for c in candidates:
|
|
@@ -175,6 +212,7 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
|
|
|
175
212
|
"signature": c.get("signature"),
|
|
176
213
|
"class_fqcn": c.get("class_fqcn"),
|
|
177
214
|
"file_path": c.get("file_path"),
|
|
215
|
+
"confidence": _assign_confidence(c, strict),
|
|
178
216
|
"reason": "no_incoming_calls_after_exemptions",
|
|
179
217
|
}
|
|
180
218
|
)
|
|
@@ -184,18 +222,26 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
|
|
|
184
222
|
# Append stats as a sentinel entry so the MCP layer can surface them
|
|
185
223
|
# without changing the return type. Callers should strip entries that
|
|
186
224
|
# have a "_stats" key when iterating over method results.
|
|
225
|
+
if strict:
|
|
226
|
+
exemption_note = (
|
|
227
|
+
"STRICT MODE: Only test methods, main(), and explicit entry-point "
|
|
228
|
+
"annotations are exempted. Constructors, getters/setters, "
|
|
229
|
+
"contract methods, and overrides are NOT exempt."
|
|
230
|
+
)
|
|
231
|
+
else:
|
|
232
|
+
exemption_note = (
|
|
233
|
+
"Exemptions cover: constructors, test methods, main(), "
|
|
234
|
+
"toString/hashCode/equals/compareTo, public getters/setters, "
|
|
235
|
+
"methods with DI/framework annotations, and direct method overrides. "
|
|
236
|
+
"Use strict=True for minimal exemptions."
|
|
237
|
+
)
|
|
187
238
|
result.append({
|
|
188
239
|
"_stats": {
|
|
189
240
|
"candidates_with_no_callers": n_candidates,
|
|
190
241
|
"exempted": len(exempt),
|
|
191
242
|
"dead_returned": len(result),
|
|
192
|
-
"
|
|
193
|
-
|
|
194
|
-
"toString/hashCode/equals/compareTo, public getters/setters, "
|
|
195
|
-
"methods with DI/framework annotations, and direct method overrides. "
|
|
196
|
-
"The class-level IMPLEMENTS exemption has been removed — only "
|
|
197
|
-
"methods with direct OVERRIDES relations are now exempted."
|
|
198
|
-
),
|
|
243
|
+
"mode": "strict" if strict else "normal",
|
|
244
|
+
"note": exemption_note,
|
|
199
245
|
}
|
|
200
246
|
})
|
|
201
247
|
|
|
@@ -14,6 +14,7 @@ import psutil
|
|
|
14
14
|
from codespine.analysis.community import detect_communities, symbol_community
|
|
15
15
|
from codespine.analysis.context import build_symbol_context
|
|
16
16
|
from codespine.analysis.coupling import compute_coupling, get_coupling
|
|
17
|
+
from codespine.analysis.crossmodule import link_cross_module_calls
|
|
17
18
|
from codespine.analysis.deadcode import detect_dead_code
|
|
18
19
|
from codespine.analysis.flow import trace_execution_flows
|
|
19
20
|
from codespine.analysis.impact import analyze_impact
|
|
@@ -216,6 +217,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
216
217
|
elif parse_state["indexed"] < parse_state["total"]:
|
|
217
218
|
_phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
|
|
218
219
|
|
|
220
|
+
# ── Cross-module call linking ──────────────────────────────────────
|
|
221
|
+
# When multiple modules/projects are indexed, attempt to resolve call
|
|
222
|
+
# edges that span module boundaries using import + REFERENCES_TYPE info.
|
|
223
|
+
if is_multi and len(modules_with_ids) > 1:
|
|
224
|
+
xmod_pids = [pid for _, pid in modules_with_ids]
|
|
225
|
+
xmod_edges = link_cross_module_calls(store, project_ids=xmod_pids)
|
|
226
|
+
_phase("Cross-module linking...", f"{xmod_edges} cross-module call edges")
|
|
227
|
+
else:
|
|
228
|
+
_phase("Cross-module linking...", "skipped (single module)")
|
|
229
|
+
|
|
219
230
|
communities: list[dict] = []
|
|
220
231
|
flows: list[dict] = []
|
|
221
232
|
dead: list[dict] = []
|
|
@@ -48,6 +48,36 @@ def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse
|
|
|
48
48
|
return {"available": False, "note": note}
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
|
|
52
|
+
"""Inject index staleness metadata into every tool response.
|
|
53
|
+
|
|
54
|
+
Adds ``index_age_seconds`` and ``stale_warning`` when the index is old.
|
|
55
|
+
"""
|
|
56
|
+
try:
|
|
57
|
+
if project:
|
|
58
|
+
recs = store.query_records(
|
|
59
|
+
"MATCH (p:Project) WHERE p.id = $pid RETURN p.indexed_at as ts",
|
|
60
|
+
{"pid": project},
|
|
61
|
+
)
|
|
62
|
+
else:
|
|
63
|
+
recs = store.query_records(
|
|
64
|
+
"MATCH (p:Project) RETURN p.indexed_at as ts ORDER BY p.indexed_at ASC LIMIT 1"
|
|
65
|
+
)
|
|
66
|
+
if recs:
|
|
67
|
+
ts = int(recs[0].get("ts") or 0)
|
|
68
|
+
if ts:
|
|
69
|
+
age = int(time.time()) - ts
|
|
70
|
+
response["index_age_seconds"] = age
|
|
71
|
+
if age > 3600:
|
|
72
|
+
response["stale_warning"] = (
|
|
73
|
+
f"Index is {age // 3600}h {(age % 3600) // 60}m old. "
|
|
74
|
+
"Run analyse_project() or start_watch() to refresh."
|
|
75
|
+
)
|
|
76
|
+
except Exception:
|
|
77
|
+
pass
|
|
78
|
+
return response
|
|
79
|
+
|
|
80
|
+
|
|
51
81
|
def build_mcp_server(store, repo_path_provider):
|
|
52
82
|
mcp = FastMCP("codespine")
|
|
53
83
|
|
|
@@ -159,6 +189,8 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
159
189
|
"git_log": git_ok,
|
|
160
190
|
"git_diff": git_ok,
|
|
161
191
|
"compare_branches": git_ok,
|
|
192
|
+
"get_neighborhood": n_sym > 0,
|
|
193
|
+
"reindex_file": True,
|
|
162
194
|
"watch_mode": True,
|
|
163
195
|
"analyse_project": True,
|
|
164
196
|
},
|
|
@@ -235,7 +267,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
235
267
|
results = hybrid_search(store, query, k=k, project=project)
|
|
236
268
|
if not results:
|
|
237
269
|
return _no_symbols_response()
|
|
238
|
-
return {"available": True, "results": results}
|
|
270
|
+
return _staleness_meta(store, {"available": True, "results": results}, project)
|
|
239
271
|
|
|
240
272
|
@mcp.tool()
|
|
241
273
|
def get_impact(symbol: str, max_depth: int = 4, project: str | None = None):
|
|
@@ -246,20 +278,30 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
246
278
|
result = analyze_impact(store, symbol, max_depth=max_depth, project=project)
|
|
247
279
|
if not result.get("targets_resolved"):
|
|
248
280
|
return {"available": False, "note": f"Symbol '{symbol}' not found in the index."}
|
|
249
|
-
return {"available": True, **result}
|
|
281
|
+
return _staleness_meta(store, {"available": True, **result}, project)
|
|
250
282
|
|
|
251
283
|
@mcp.tool()
|
|
252
|
-
def detect_dead_code(limit: int = 200, project: str | None = None):
|
|
284
|
+
def detect_dead_code(limit: int = 200, project: str | None = None, strict: bool = False):
|
|
253
285
|
"""
|
|
254
286
|
Detect methods with no incoming calls (after Java-aware exemptions).
|
|
255
287
|
Pass project to scope to a single module.
|
|
256
288
|
|
|
289
|
+
Parameters:
|
|
290
|
+
strict – When True, only main()/@Test and explicit entry-point
|
|
291
|
+
annotations are exempted. Constructors, getters/setters,
|
|
292
|
+
contract methods (toString, hashCode, equals), and method
|
|
293
|
+
overrides are NOT exempt. Use this for a thorough audit.
|
|
294
|
+
Each result includes a confidence level (high/medium/low):
|
|
295
|
+
high = private method, almost certainly dead
|
|
296
|
+
medium = package-private or protected
|
|
297
|
+
low = public method, could be called via reflection
|
|
298
|
+
|
|
257
299
|
Returns dead_code list, count, and an exemption_stats dict showing
|
|
258
300
|
how many candidates were found and how many were filtered out by the
|
|
259
301
|
exemption rules — useful for validating that the feature is working
|
|
260
302
|
even when the dead list is empty.
|
|
261
303
|
"""
|
|
262
|
-
raw = detect_dead_code_analysis(store, limit=limit, project=project)
|
|
304
|
+
raw = detect_dead_code_analysis(store, limit=limit, project=project, strict=strict)
|
|
263
305
|
if raw is None:
|
|
264
306
|
return _no_symbols_response()
|
|
265
307
|
|
|
@@ -272,12 +314,12 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
272
314
|
else:
|
|
273
315
|
dead.append(entry)
|
|
274
316
|
|
|
275
|
-
return {
|
|
317
|
+
return _staleness_meta(store, {
|
|
276
318
|
"available": True,
|
|
277
319
|
"dead_code": dead,
|
|
278
320
|
"count": len(dead),
|
|
279
321
|
"exemption_stats": stats,
|
|
280
|
-
}
|
|
322
|
+
}, project)
|
|
281
323
|
|
|
282
324
|
@mcp.tool()
|
|
283
325
|
def trace_execution_flows(entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None):
|
|
@@ -288,7 +330,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
288
330
|
flows = trace_flows_analysis(store, entry_symbol=entry_symbol, max_depth=max_depth, project=project)
|
|
289
331
|
if not flows:
|
|
290
332
|
return _no_symbols_response("No entry points found. Run 'codespine analyse --deep' or provide entry_symbol.")
|
|
291
|
-
return {"available": True, "flows": flows}
|
|
333
|
+
return _staleness_meta(store, {"available": True, "flows": flows}, project)
|
|
292
334
|
|
|
293
335
|
@mcp.tool()
|
|
294
336
|
def get_symbol_community(symbol: str):
|
|
@@ -300,7 +342,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
300
342
|
result = symbol_community(store, symbol)
|
|
301
343
|
if not result.get("matches"):
|
|
302
344
|
return {"available": False, "note": "No community data yet. Run 'codespine analyse --deep'."}
|
|
303
|
-
return {"available": True, **result}
|
|
345
|
+
return _staleness_meta(store, {"available": True, **result})
|
|
304
346
|
|
|
305
347
|
@mcp.tool()
|
|
306
348
|
def get_change_coupling(
|
|
@@ -319,7 +361,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
319
361
|
"available": False,
|
|
320
362
|
"note": "No coupling data. Run 'codespine analyse --deep' with a git repository.",
|
|
321
363
|
}
|
|
322
|
-
return {"available": True, "coupling": result}
|
|
364
|
+
return _staleness_meta(store, {"available": True, "coupling": result})
|
|
323
365
|
|
|
324
366
|
@mcp.tool()
|
|
325
367
|
def get_symbol_context(query: str, max_depth: int = 3, project: str | None = None):
|
|
@@ -330,7 +372,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
330
372
|
result = build_symbol_context(store, query, max_depth=max_depth, project=project)
|
|
331
373
|
if not result.get("search_candidates"):
|
|
332
374
|
return _no_symbols_response()
|
|
333
|
-
return {"available": True, **result}
|
|
375
|
+
return _staleness_meta(store, {"available": True, **result}, project)
|
|
334
376
|
|
|
335
377
|
@mcp.tool()
|
|
336
378
|
def get_codebase_stats():
|
|
@@ -496,7 +538,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
496
538
|
by_project.setdefault(pid, {"classes": [], "methods": []})
|
|
497
539
|
by_project[pid]["methods"].append(m)
|
|
498
540
|
|
|
499
|
-
return {
|
|
541
|
+
return _staleness_meta(store, {
|
|
500
542
|
"available": True,
|
|
501
543
|
"query": name,
|
|
502
544
|
"total_matches": total,
|
|
@@ -505,7 +547,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
505
547
|
f"Found {total} match(es). If multiple projects contain the same name, "
|
|
506
548
|
"pass project=<project_id> to subsequent tools to avoid cross-project ambiguity."
|
|
507
549
|
) if total > 1 else None,
|
|
508
|
-
}
|
|
550
|
+
}, project)
|
|
509
551
|
|
|
510
552
|
@mcp.tool()
|
|
511
553
|
def list_packages(project: str | None = None, limit: int = 200):
|
|
@@ -548,11 +590,11 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
548
590
|
"class_count": r.get("class_count", 0),
|
|
549
591
|
})
|
|
550
592
|
|
|
551
|
-
return {
|
|
593
|
+
return _staleness_meta(store, {
|
|
552
594
|
"available": True,
|
|
553
595
|
"total_packages": len(recs),
|
|
554
596
|
"by_project": by_project,
|
|
555
|
-
}
|
|
597
|
+
}, project)
|
|
556
598
|
|
|
557
599
|
# ------------------------------------------------------------------
|
|
558
600
|
# Git tools
|
|
@@ -1007,6 +1049,225 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1007
1049
|
),
|
|
1008
1050
|
}
|
|
1009
1051
|
|
|
1052
|
+
# ------------------------------------------------------------------
|
|
1053
|
+
# Neighborhood exploration
|
|
1054
|
+
# ------------------------------------------------------------------
|
|
1055
|
+
|
|
1056
|
+
@mcp.tool()
|
|
1057
|
+
def get_neighborhood(symbol: str, project: str | None = None):
|
|
1058
|
+
"""
|
|
1059
|
+
One-shot structural context for a symbol: callers (upstream), callees
|
|
1060
|
+
(downstream), sibling methods in the same class, and override /
|
|
1061
|
+
implements relationships.
|
|
1062
|
+
|
|
1063
|
+
This is the tool to call when you want to understand a method's
|
|
1064
|
+
immediate surroundings in the call graph without traversing the
|
|
1065
|
+
full impact tree.
|
|
1066
|
+
|
|
1067
|
+
Parameters:
|
|
1068
|
+
symbol – Method name, signature fragment, or fully-qualified name.
|
|
1069
|
+
project – Optional project_id to scope the symbol lookup.
|
|
1070
|
+
"""
|
|
1071
|
+
from codespine.analysis.impact import _resolve_method_metadata
|
|
1072
|
+
|
|
1073
|
+
project_clause = "AND f.project_id = $proj" if project else ""
|
|
1074
|
+
params: dict = {"q": symbol}
|
|
1075
|
+
if project:
|
|
1076
|
+
params["proj"] = project
|
|
1077
|
+
|
|
1078
|
+
# 1. Resolve the symbol to method IDs
|
|
1079
|
+
method_recs = store.query_records(
|
|
1080
|
+
f"""
|
|
1081
|
+
MATCH (m:Method), (c:Class), (f:File)
|
|
1082
|
+
WHERE m.class_id = c.id AND c.file_id = f.id {project_clause}
|
|
1083
|
+
AND (m.id = $q OR lower(m.name) = lower($q)
|
|
1084
|
+
OR lower(m.signature) CONTAINS lower($q))
|
|
1085
|
+
RETURN m.id as id, m.name as name, m.signature as signature,
|
|
1086
|
+
c.id as class_id, c.fqcn as class_fqcn,
|
|
1087
|
+
f.path as file_path, f.project_id as project_id
|
|
1088
|
+
LIMIT 5
|
|
1089
|
+
""",
|
|
1090
|
+
params,
|
|
1091
|
+
)
|
|
1092
|
+
if not method_recs:
|
|
1093
|
+
return {"available": False, "note": f"Symbol '{symbol}' not found. Try find_symbol or search_hybrid."}
|
|
1094
|
+
|
|
1095
|
+
target = method_recs[0]
|
|
1096
|
+
mid = target["id"]
|
|
1097
|
+
cid = target["class_id"]
|
|
1098
|
+
|
|
1099
|
+
# 2. Callers (upstream)
|
|
1100
|
+
callers = store.query_records(
|
|
1101
|
+
"""
|
|
1102
|
+
MATCH (caller:Method)-[r:CALLS]->(m:Method {id: $mid})
|
|
1103
|
+
RETURN caller.id as id, coalesce(r.confidence, 0.5) as confidence,
|
|
1104
|
+
coalesce(r.reason, 'unknown') as reason
|
|
1105
|
+
""",
|
|
1106
|
+
{"mid": mid},
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
# 3. Callees (downstream)
|
|
1110
|
+
callees = store.query_records(
|
|
1111
|
+
"""
|
|
1112
|
+
MATCH (m:Method {id: $mid})-[r:CALLS]->(callee:Method)
|
|
1113
|
+
RETURN callee.id as id, coalesce(r.confidence, 0.5) as confidence,
|
|
1114
|
+
coalesce(r.reason, 'unknown') as reason
|
|
1115
|
+
""",
|
|
1116
|
+
{"mid": mid},
|
|
1117
|
+
)
|
|
1118
|
+
|
|
1119
|
+
# 4. Siblings (same class, excluding self)
|
|
1120
|
+
siblings = store.query_records(
|
|
1121
|
+
"""
|
|
1122
|
+
MATCH (m:Method)
|
|
1123
|
+
WHERE m.class_id = $cid AND m.id <> $mid
|
|
1124
|
+
RETURN m.id as id, m.name as name, m.signature as signature
|
|
1125
|
+
""",
|
|
1126
|
+
{"cid": cid, "mid": mid},
|
|
1127
|
+
)
|
|
1128
|
+
|
|
1129
|
+
# 5. Override / implements relationships
|
|
1130
|
+
overrides_up = store.query_records(
|
|
1131
|
+
"MATCH (m:Method {id: $mid})-[:OVERRIDES]->(parent:Method) RETURN parent.id as id",
|
|
1132
|
+
{"mid": mid},
|
|
1133
|
+
)
|
|
1134
|
+
overrides_down = store.query_records(
|
|
1135
|
+
"MATCH (child:Method)-[:OVERRIDES]->(m:Method {id: $mid}) RETURN child.id as id",
|
|
1136
|
+
{"mid": mid},
|
|
1137
|
+
)
|
|
1138
|
+
|
|
1139
|
+
# Bulk-resolve all referenced method IDs for human-readable output
|
|
1140
|
+
all_ids = (
|
|
1141
|
+
[c["id"] for c in callers]
|
|
1142
|
+
+ [c["id"] for c in callees]
|
|
1143
|
+
+ [o["id"] for o in overrides_up]
|
|
1144
|
+
+ [o["id"] for o in overrides_down]
|
|
1145
|
+
)
|
|
1146
|
+
meta = _resolve_method_metadata(store, all_ids) if all_ids else {}
|
|
1147
|
+
|
|
1148
|
+
def _enrich(items, extra_keys=None):
|
|
1149
|
+
enriched = []
|
|
1150
|
+
for item in items:
|
|
1151
|
+
m = meta.get(item["id"], {})
|
|
1152
|
+
entry = {
|
|
1153
|
+
"id": item["id"],
|
|
1154
|
+
"name": m.get("name") or item.get("name"),
|
|
1155
|
+
"fqname": m.get("fqname") or item.get("signature"),
|
|
1156
|
+
"class_fqcn": m.get("class_fqcn"),
|
|
1157
|
+
"file_path": m.get("file_path"),
|
|
1158
|
+
"project_id": m.get("project_id"),
|
|
1159
|
+
}
|
|
1160
|
+
if extra_keys:
|
|
1161
|
+
for k in extra_keys:
|
|
1162
|
+
if k in item:
|
|
1163
|
+
entry[k] = item[k]
|
|
1164
|
+
enriched.append(entry)
|
|
1165
|
+
return enriched
|
|
1166
|
+
|
|
1167
|
+
result = {
|
|
1168
|
+
"available": True,
|
|
1169
|
+
"target": {
|
|
1170
|
+
"id": mid,
|
|
1171
|
+
"name": target["name"],
|
|
1172
|
+
"signature": target["signature"],
|
|
1173
|
+
"class_fqcn": target["class_fqcn"],
|
|
1174
|
+
"file_path": target["file_path"],
|
|
1175
|
+
"project_id": target["project_id"],
|
|
1176
|
+
},
|
|
1177
|
+
"callers": _enrich(callers, extra_keys=["confidence", "reason"]),
|
|
1178
|
+
"callees": _enrich(callees, extra_keys=["confidence", "reason"]),
|
|
1179
|
+
"siblings": [
|
|
1180
|
+
{"name": s["name"], "signature": s["signature"]}
|
|
1181
|
+
for s in siblings
|
|
1182
|
+
],
|
|
1183
|
+
"overrides": _enrich(overrides_up),
|
|
1184
|
+
"overridden_by": _enrich(overrides_down),
|
|
1185
|
+
"summary": {
|
|
1186
|
+
"callers": len(callers),
|
|
1187
|
+
"callees": len(callees),
|
|
1188
|
+
"siblings": len(siblings),
|
|
1189
|
+
"overrides": len(overrides_up),
|
|
1190
|
+
"overridden_by": len(overrides_down),
|
|
1191
|
+
},
|
|
1192
|
+
}
|
|
1193
|
+
return _staleness_meta(store, result)
|
|
1194
|
+
|
|
1195
|
+
# ------------------------------------------------------------------
|
|
1196
|
+
# Single-file re-index
|
|
1197
|
+
# ------------------------------------------------------------------
|
|
1198
|
+
|
|
1199
|
+
@mcp.tool()
|
|
1200
|
+
def reindex_file(file_path: str, project: str | None = None):
|
|
1201
|
+
"""
|
|
1202
|
+
Incrementally re-index a single Java file (<1 s for typical files).
|
|
1203
|
+
|
|
1204
|
+
Use this after editing a file to immediately refresh the graph without
|
|
1205
|
+
waiting for watch mode or running a full analysis.
|
|
1206
|
+
|
|
1207
|
+
Parameters:
|
|
1208
|
+
file_path – Absolute path to the .java file.
|
|
1209
|
+
project – Optional project_id. If omitted, the tool infers the
|
|
1210
|
+
project by matching the file path against indexed projects.
|
|
1211
|
+
"""
|
|
1212
|
+
import os as _os
|
|
1213
|
+
|
|
1214
|
+
abs_fp = _os.path.abspath(file_path)
|
|
1215
|
+
if not _os.path.isfile(abs_fp) or not abs_fp.endswith(".java"):
|
|
1216
|
+
return {"available": False, "note": f"Not a valid .java file: {abs_fp}"}
|
|
1217
|
+
|
|
1218
|
+
# Resolve project from indexed projects if not given
|
|
1219
|
+
if not project:
|
|
1220
|
+
projects = store.query_records(
|
|
1221
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path"
|
|
1222
|
+
)
|
|
1223
|
+
for p in projects:
|
|
1224
|
+
if abs_fp.startswith(p["path"] + _os.sep):
|
|
1225
|
+
project = p["id"]
|
|
1226
|
+
break
|
|
1227
|
+
if not project:
|
|
1228
|
+
return {
|
|
1229
|
+
"available": False,
|
|
1230
|
+
"note": (
|
|
1231
|
+
"Cannot determine project for this file. "
|
|
1232
|
+
"Pass project=<project_id> explicitly."
|
|
1233
|
+
),
|
|
1234
|
+
}
|
|
1235
|
+
|
|
1236
|
+
# Find the project path to use as root for indexing
|
|
1237
|
+
proj_recs = store.query_records(
|
|
1238
|
+
"MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
|
|
1239
|
+
{"pid": project},
|
|
1240
|
+
)
|
|
1241
|
+
if not proj_recs:
|
|
1242
|
+
return {"available": False, "note": f"Project '{project}' not found in index."}
|
|
1243
|
+
|
|
1244
|
+
proj_path = proj_recs[0]["path"]
|
|
1245
|
+
|
|
1246
|
+
# Run incremental index via subprocess to avoid read-only DB constraint
|
|
1247
|
+
cmd = [
|
|
1248
|
+
sys.executable, "-m", "codespine.cli",
|
|
1249
|
+
"analyse", proj_path,
|
|
1250
|
+
"--incremental", "--no-embed", "--allow-running",
|
|
1251
|
+
]
|
|
1252
|
+
t0 = time.time()
|
|
1253
|
+
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
|
|
1254
|
+
elapsed = round(time.time() - t0, 2)
|
|
1255
|
+
|
|
1256
|
+
if proc.returncode != 0:
|
|
1257
|
+
return {
|
|
1258
|
+
"available": False,
|
|
1259
|
+
"note": f"Re-index failed (code {proc.returncode})",
|
|
1260
|
+
"error": proc.stderr.strip() or proc.stdout.strip(),
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
return {
|
|
1264
|
+
"available": True,
|
|
1265
|
+
"file": abs_fp,
|
|
1266
|
+
"project": project,
|
|
1267
|
+
"elapsed_s": elapsed,
|
|
1268
|
+
"note": f"Re-indexed project {project} incrementally in {elapsed}s.",
|
|
1269
|
+
}
|
|
1270
|
+
|
|
1010
1271
|
# ------------------------------------------------------------------
|
|
1011
1272
|
# Advanced / raw access
|
|
1012
1273
|
# ------------------------------------------------------------------
|
|
@@ -1,11 +1,31 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import os
|
|
4
|
+
|
|
3
5
|
from codespine.search.bm25 import rank_bm25
|
|
4
6
|
from codespine.search.fuzzy import rank_fuzzy
|
|
5
7
|
from codespine.search.rrf import reciprocal_rank_fusion
|
|
6
8
|
from codespine.search.vector import _load_model, rank_semantic
|
|
7
9
|
|
|
8
10
|
_LOW_CONFIDENCE_THRESHOLD = 0.05
|
|
11
|
+
_SNIPPET_CONTEXT_LINES = 2 # lines above and below the symbol declaration
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _read_snippet(file_path: str, line: int, context: int = _SNIPPET_CONTEXT_LINES) -> str | None:
|
|
15
|
+
"""Best-effort extraction of source lines around a symbol declaration."""
|
|
16
|
+
if not file_path or not line or line < 1:
|
|
17
|
+
return None
|
|
18
|
+
try:
|
|
19
|
+
if not os.path.isfile(file_path):
|
|
20
|
+
return None
|
|
21
|
+
with open(file_path, "r", encoding="utf-8", errors="replace") as fh:
|
|
22
|
+
all_lines = fh.readlines()
|
|
23
|
+
start = max(0, line - 1 - context)
|
|
24
|
+
end = min(len(all_lines), line + context)
|
|
25
|
+
snippet_lines = all_lines[start:end]
|
|
26
|
+
return "".join(snippet_lines).rstrip("\n")
|
|
27
|
+
except Exception:
|
|
28
|
+
return None
|
|
9
29
|
|
|
10
30
|
|
|
11
31
|
def hybrid_search(store, query: str, k: int = 20, project: str | None = None) -> list[dict]:
|
|
@@ -26,6 +46,7 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
|
|
|
26
46
|
s.name as name,
|
|
27
47
|
s.fqname as fqname,
|
|
28
48
|
s.embedding as embedding,
|
|
49
|
+
s.line as line,
|
|
29
50
|
f.path as file_path,
|
|
30
51
|
f.is_test as is_test
|
|
31
52
|
""",
|
|
@@ -73,6 +94,7 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
|
|
|
73
94
|
"name": rec.get("name"),
|
|
74
95
|
"fqname": rec.get("fqname"),
|
|
75
96
|
"file_path": rec.get("file_path"),
|
|
97
|
+
"line": rec.get("line"),
|
|
76
98
|
"score": score * multiplier,
|
|
77
99
|
}
|
|
78
100
|
)
|
|
@@ -94,6 +116,14 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
|
|
|
94
116
|
)
|
|
95
117
|
item["context"] = ctx
|
|
96
118
|
|
|
119
|
+
# Attach source code snippets (3–5 lines around the declaration) to the
|
|
120
|
+
# top results so agents have immediate context without reading the file.
|
|
121
|
+
for item in top_k:
|
|
122
|
+
if isinstance(item, dict) and item.get("file_path") and item.get("line"):
|
|
123
|
+
snippet = _read_snippet(item["file_path"], int(item["line"]))
|
|
124
|
+
if snippet:
|
|
125
|
+
item["snippet"] = snippet
|
|
126
|
+
|
|
97
127
|
# Warn when all scores are near zero — the results are likely noise.
|
|
98
128
|
# The threshold 0.05 is calibrated for embedding mode. Without sentence-
|
|
99
129
|
# transformers the hash-fallback vector and BM25/fuzzy signals produce lower
|
|
@@ -15,6 +15,7 @@ codespine/analysis/__init__.py
|
|
|
15
15
|
codespine/analysis/community.py
|
|
16
16
|
codespine/analysis/context.py
|
|
17
17
|
codespine/analysis/coupling.py
|
|
18
|
+
codespine/analysis/crossmodule.py
|
|
18
19
|
codespine/analysis/deadcode.py
|
|
19
20
|
codespine/analysis/flow.py
|
|
20
21
|
codespine/analysis/impact.py
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|