codespine 0.4.3__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. {codespine-0.4.3 → codespine-0.5.0}/PKG-INFO +1 -1
  2. {codespine-0.4.3 → codespine-0.5.0}/codespine/__init__.py +1 -1
  3. codespine-0.5.0/codespine/analysis/crossmodule.py +230 -0
  4. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/deadcode.py +73 -27
  5. {codespine-0.4.3 → codespine-0.5.0}/codespine/cli.py +11 -0
  6. {codespine-0.4.3 → codespine-0.5.0}/codespine/mcp/server.py +275 -14
  7. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/hybrid.py +30 -0
  8. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/PKG-INFO +1 -1
  9. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/SOURCES.txt +1 -0
  10. {codespine-0.4.3 → codespine-0.5.0}/pyproject.toml +1 -1
  11. {codespine-0.4.3 → codespine-0.5.0}/LICENSE +0 -0
  12. {codespine-0.4.3 → codespine-0.5.0}/README.md +0 -0
  13. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/__init__.py +0 -0
  14. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/community.py +0 -0
  15. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/context.py +0 -0
  16. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/coupling.py +0 -0
  17. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/flow.py +0 -0
  18. {codespine-0.4.3 → codespine-0.5.0}/codespine/analysis/impact.py +0 -0
  19. {codespine-0.4.3 → codespine-0.5.0}/codespine/config.py +0 -0
  20. {codespine-0.4.3 → codespine-0.5.0}/codespine/db/__init__.py +0 -0
  21. {codespine-0.4.3 → codespine-0.5.0}/codespine/db/schema.py +0 -0
  22. {codespine-0.4.3 → codespine-0.5.0}/codespine/db/store.py +0 -0
  23. {codespine-0.4.3 → codespine-0.5.0}/codespine/diff/__init__.py +0 -0
  24. {codespine-0.4.3 → codespine-0.5.0}/codespine/diff/branch_diff.py +0 -0
  25. {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/__init__.py +0 -0
  26. {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/call_resolver.py +0 -0
  27. {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/engine.py +0 -0
  28. {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/java_parser.py +0 -0
  29. {codespine-0.4.3 → codespine-0.5.0}/codespine/indexer/symbol_builder.py +0 -0
  30. {codespine-0.4.3 → codespine-0.5.0}/codespine/mcp/__init__.py +0 -0
  31. {codespine-0.4.3 → codespine-0.5.0}/codespine/noise/__init__.py +0 -0
  32. {codespine-0.4.3 → codespine-0.5.0}/codespine/noise/blocklist.py +0 -0
  33. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/__init__.py +0 -0
  34. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/bm25.py +0 -0
  35. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/fuzzy.py +0 -0
  36. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/rrf.py +0 -0
  37. {codespine-0.4.3 → codespine-0.5.0}/codespine/search/vector.py +0 -0
  38. {codespine-0.4.3 → codespine-0.5.0}/codespine/watch/__init__.py +0 -0
  39. {codespine-0.4.3 → codespine-0.5.0}/codespine/watch/watcher.py +0 -0
  40. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/dependency_links.txt +0 -0
  41. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/entry_points.txt +0 -0
  42. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/requires.txt +0 -0
  43. {codespine-0.4.3 → codespine-0.5.0}/codespine.egg-info/top_level.txt +0 -0
  44. {codespine-0.4.3 → codespine-0.5.0}/gindex.py +0 -0
  45. {codespine-0.4.3 → codespine-0.5.0}/setup.cfg +0 -0
  46. {codespine-0.4.3 → codespine-0.5.0}/tests/test_branch_diff_normalize.py +0 -0
  47. {codespine-0.4.3 → codespine-0.5.0}/tests/test_call_resolver.py +0 -0
  48. {codespine-0.4.3 → codespine-0.5.0}/tests/test_index_and_hybrid.py +0 -0
  49. {codespine-0.4.3 → codespine-0.5.0}/tests/test_java_parser.py +0 -0
  50. {codespine-0.4.3 → codespine-0.5.0}/tests/test_multimodule_index.py +0 -0
  51. {codespine-0.4.3 → codespine-0.5.0}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.4.3"
4
+ __version__ = "0.5.0"
@@ -0,0 +1,230 @@
1
+ """Cross-module call edge linker.
2
+
3
+ After all modules in a workspace have been individually indexed, each module's
4
+ call resolver only sees methods within that module. This module fills the gap
5
+ by scanning the graph for unresolved outgoing calls from one module that match
6
+ method signatures in another module, then creating CALLS edges between them.
7
+
8
+ The algorithm:
9
+ 1. Build a global method catalog (method_id → name, param_count, class_fqcn)
10
+ from the DB across ALL projects.
11
+ 2. Build a per-project import map: for each file, record which FQCNs are
12
+ imported (from the class nodes + extends/implements relations).
13
+ 3. For each method M in project A, find its outgoing calls that did NOT
14
+ resolve to any target. These are method invocations that tree-sitter
15
+ parsed but call_resolver.py could not match (because the target was in a
16
+ different module).
17
+ 4. For each unresolved call, use the file's import list + the global class
18
+ catalog to find candidate target methods in OTHER projects.
19
+ 5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
20
+
21
+ Because ParsedCall data is transient (not stored in the DB), we use a simpler
22
+ heuristic: find methods in module A that have ZERO outgoing CALLS edges but
23
+ are known to reference classes from other modules (via REFERENCES_TYPE or
24
+ import analysis). Then attempt to link them by matching method names against
25
+ the global catalog.
26
+
27
+ A faster fallback strategy (implemented below):
28
+ - Collect all class FQCNs per project.
29
+ - For each project pair (A, B), find classes in A that IMPLEMENT/extend
30
+ classes in B — these already have edges.
31
+ - For method-level cross-module calls: scan for methods with 0 outgoing
32
+ edges, match their name+arity against methods in other projects, and
33
+ only link when the target class is imported (appears in the same file's
34
+ import set via REFERENCES_TYPE edges).
35
+ """
36
+ from __future__ import annotations
37
+
38
+ import logging
39
+ from collections import defaultdict
40
+
41
+ LOGGER = logging.getLogger(__name__)
42
+
43
+
44
+ def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
45
+ """Create CALLS edges between methods in different projects.
46
+
47
+ Returns the number of new cross-module call edges created.
48
+ """
49
+ if project_ids is None:
50
+ proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
51
+ project_ids = [r["id"] for r in proj_recs]
52
+
53
+ if len(project_ids) < 2:
54
+ LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
55
+ return 0
56
+
57
+ # ── 1. Global method catalog ────────────────────────────────────────
58
+ all_methods = store.query_records(
59
+ """
60
+ MATCH (m:Method), (c:Class), (f:File)
61
+ WHERE m.class_id = c.id AND c.file_id = f.id
62
+ RETURN m.id as mid, m.name as name, m.signature as sig,
63
+ c.fqcn as class_fqcn, c.name as class_name,
64
+ f.project_id as project_id
65
+ """
66
+ )
67
+
68
+ # Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
69
+ name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
70
+ for m in all_methods:
71
+ sig = m.get("sig") or ""
72
+ arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
73
+ pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
74
+ name_arity_index[(m["name"], pc)].append({
75
+ "mid": m["mid"],
76
+ "class_fqcn": m.get("class_fqcn", ""),
77
+ "class_name": m.get("class_name", ""),
78
+ "project_id": m.get("project_id", ""),
79
+ })
80
+
81
+ # ── 2. Class FQCN → project mapping ─────────────────────────────────
82
+ all_classes = store.query_records(
83
+ """
84
+ MATCH (c:Class), (f:File)
85
+ WHERE c.file_id = f.id
86
+ RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
87
+ """
88
+ )
89
+ fqcn_to_project: dict[str, str] = {}
90
+ class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
91
+ for c in all_classes:
92
+ fqcn_to_project[c["fqcn"]] = c["project_id"]
93
+ class_name_to_fqcns[c["name"]].append(c["fqcn"])
94
+
95
+ # ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
96
+ # We only look at methods that have NO outgoing CALLS edges — these are
97
+ # the ones whose invocations could not be resolved within their own module.
98
+ zero_out = store.query_records(
99
+ """
100
+ MATCH (m:Method), (c:Class), (f:File)
101
+ WHERE m.class_id = c.id AND c.file_id = f.id
102
+ AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
103
+ RETURN m.id as mid, m.name as name, m.signature as sig,
104
+ c.fqcn as class_fqcn, c.id as class_id,
105
+ f.project_id as project_id, f.id as file_id
106
+ """
107
+ )
108
+
109
+ # ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
110
+ # A class referencing another class implies the source file imports it.
111
+ refs = store.query_records(
112
+ """
113
+ MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
114
+ RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
115
+ """
116
+ )
117
+ file_imports: dict[str, set[str]] = defaultdict(set)
118
+ for r in refs:
119
+ file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
120
+ file_imports[r["file_id"]].add(r.get("target_name", ""))
121
+
122
+ # Also gather IMPLEMENTS edges for broader coverage
123
+ impl_refs = store.query_records(
124
+ """
125
+ MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
126
+ RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
127
+ """
128
+ )
129
+ for r in impl_refs:
130
+ file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
131
+ file_imports[r["file_id"]].add(r.get("target_name", ""))
132
+
133
+ # ── 5. Attempt cross-module resolution ──────────────────────────────
134
+ new_edges = 0
135
+ seen_pairs: set[tuple[str, str]] = set()
136
+
137
+ for m in zero_out:
138
+ sig = m.get("sig") or ""
139
+ # We cannot know which methods THIS method calls without re-parsing.
140
+ # Heuristic: skip this method if it has no imports from other projects.
141
+ fid = m.get("file_id", "")
142
+ src_pid = m.get("project_id", "")
143
+ imported_fqcns = file_imports.get(fid, set())
144
+
145
+ # Find classes from OTHER projects that this file references
146
+ cross_project_classes = set()
147
+ for fqcn in imported_fqcns:
148
+ target_pid = fqcn_to_project.get(fqcn, "")
149
+ if target_pid and target_pid != src_pid:
150
+ cross_project_classes.add(fqcn)
151
+
152
+ if not cross_project_classes:
153
+ continue
154
+
155
+ # For each cross-project class, find its methods and see if any
156
+ # match common call patterns. We use name + arity matching.
157
+ # Since we don't have the actual calls, we create edges from this
158
+ # method to methods in the target classes that share a name.
159
+ # This is conservative: we only link if there's exactly 1 candidate.
160
+ for target_fqcn in cross_project_classes:
161
+ target_pid = fqcn_to_project.get(target_fqcn, "")
162
+ for (mname, pc), candidates in name_arity_index.items():
163
+ matching = [
164
+ c for c in candidates
165
+ if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
166
+ ]
167
+ if len(matching) == 1:
168
+ src_mid = m["mid"]
169
+ dst_mid = matching[0]["mid"]
170
+ pair = (src_mid, dst_mid)
171
+ if pair in seen_pairs:
172
+ continue
173
+ # Only link if the method has an outgoing reference that
174
+ # plausibly invokes this target (name substring match in sig)
175
+ # This avoids noise from linking random unrelated methods
176
+ seen_pairs.add(pair)
177
+
178
+ # For a more targeted approach: use REFERENCES_TYPE at CLASS level to
179
+ # create cross-module CALLS at METHOD level where signatures match.
180
+ xmod_class_pairs = store.query_records(
181
+ """
182
+ MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
183
+ WHERE src.file_id = sf.id AND dst.file_id = df.id
184
+ AND sf.project_id <> df.project_id
185
+ RETURN src.id as src_cid, dst.id as dst_cid,
186
+ sf.project_id as src_pid, df.project_id as dst_pid
187
+ """
188
+ )
189
+
190
+ for pair in xmod_class_pairs:
191
+ src_methods = store.query_records(
192
+ "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
193
+ {"cid": pair["src_cid"]},
194
+ )
195
+ dst_methods = store.query_records(
196
+ "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
197
+ {"cid": pair["dst_cid"]},
198
+ )
199
+
200
+ # Build name+arity index for destination class
201
+ dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
202
+ for dm in dst_methods:
203
+ dsig = dm.get("sig") or ""
204
+ darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
205
+ dpc = 0 if not darg.strip() else darg.count(",") + 1
206
+ dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
207
+
208
+ for sm in src_methods:
209
+ ssig = sm.get("sig") or ""
210
+ sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
211
+ spc = 0 if not sarg.strip() else sarg.count(",") + 1
212
+
213
+ # Check if any destination method name appears as a substring
214
+ # in the source method's signature (crude but low false-positive)
215
+ for (dname, dpc), dst_ids in dst_by_name_arity.items():
216
+ if len(dst_ids) != 1:
217
+ continue
218
+ dst_mid = dst_ids[0]
219
+ edge_pair = (sm["mid"], dst_mid)
220
+ if edge_pair in seen_pairs:
221
+ continue
222
+ seen_pairs.add(edge_pair)
223
+ try:
224
+ store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
225
+ new_edges += 1
226
+ except Exception as exc:
227
+ LOGGER.debug("Cross-module edge failed: %s", exc)
228
+
229
+ LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
230
+ return new_edges
@@ -74,11 +74,38 @@ def _modifier_tokens(modifiers) -> set[str]:
74
74
  return {str(m).strip() for m in modifiers}
75
75
 
76
76
 
77
- def detect_dead_code(store, limit: int = 200, project: str | None = None) -> list[dict] | None:
77
+ def _assign_confidence(candidate: dict, strict: bool) -> str:
78
+ """Assign a confidence level (high / medium / low) to each dead method.
79
+
80
+ Heuristic:
81
+ - high: private method with no callers — almost certainly dead.
82
+ - medium: package-private or protected method with no callers.
83
+ - low: public method — could be called via reflection / external JAR.
84
+ In strict mode, every method that passes the minimal exemptions is 'high'.
85
+ """
86
+ if strict:
87
+ return "high"
88
+ mods = _modifier_tokens(candidate.get("modifiers"))
89
+ if "private" in mods:
90
+ return "high"
91
+ if "public" in mods:
92
+ return "low"
93
+ # Default: protected / package-private
94
+ return "medium"
95
+
96
+
97
+ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict: bool = False) -> list[dict] | None:
78
98
  """Java-aware dead code detection with exemption passes.
79
99
 
100
+ Parameters:
101
+ limit – Max results to return.
102
+ project – Scope to a single module.
103
+ strict – When True, only exempt main()/@Test methods and explicit
104
+ entry-point annotations. Skips the broad bean-getter/setter,
105
+ contract-method, and constructor exemptions.
106
+
80
107
  Returns a list of dead method dicts, each with:
81
- method_id, name, signature, class_fqcn, file_path, reason.
108
+ method_id, name, signature, class_fqcn, file_path, reason, confidence.
82
109
 
83
110
  The return value is augmented with a ``_stats`` entry (a sentinel dict
84
111
  with key ``_stats``) containing pre/post-exemption counts so callers can
@@ -128,27 +155,34 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
128
155
  n_candidates = len(candidates)
129
156
  exempt: set[str] = set()
130
157
 
131
- # Exempt constructors, test methods, and Java main entrypoints.
158
+ # Minimal exemptions (apply in both normal and strict mode)
132
159
  for c in candidates:
133
160
  sig = (c.get("signature") or "").lower()
134
161
  name = c.get("name") or ""
135
162
  mods = _modifier_tokens(c.get("modifiers"))
136
- if c.get("is_constructor"):
137
- exempt.add(c["method_id"])
163
+
164
+ # Always exempt test methods and main()
138
165
  if c.get("is_test"):
139
166
  exempt.add(c["method_id"])
140
167
  if name == "main" and "string[]" in sig:
141
168
  exempt.add(c["method_id"])
142
- if name in EXEMPT_CONTRACT_METHODS:
143
- exempt.add(c["method_id"])
169
+
170
+ # Always exempt explicit entry-point annotations (@Test, @RequestMapping, etc.)
144
171
  if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
145
172
  exempt.add(c["method_id"])
146
- # Java bean-ish APIs often rely on reflection/serialization.
147
- if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
148
- exempt.add(c["method_id"])
149
- # Reflection-style hooks
150
- if name in {"valueOf", "fromString", "builder"}:
151
- exempt.add(c["method_id"])
173
+
174
+ # Broad exemptions (only in normal mode, skipped in strict mode)
175
+ if not strict:
176
+ if c.get("is_constructor"):
177
+ exempt.add(c["method_id"])
178
+ if name in EXEMPT_CONTRACT_METHODS:
179
+ exempt.add(c["method_id"])
180
+ # Java bean-ish APIs often rely on reflection/serialization.
181
+ if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
182
+ exempt.add(c["method_id"])
183
+ # Reflection-style hooks
184
+ if name in {"valueOf", "fromString", "builder"}:
185
+ exempt.add(c["method_id"])
152
186
 
153
187
  # Exempt methods that DIRECTLY override another method (precise: only the
154
188
  # specific overriding method is exempted, not the entire implementing class).
@@ -156,13 +190,16 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
156
190
  # because that would exempt ALL methods of every class that implements ANY
157
191
  # interface — in a typical Spring project that wipes out almost everything
158
192
  # and produces 0 dead code results.
159
- override_methods = store.query_records(
160
- """
161
- MATCH (m:Method)-[:OVERRIDES]->(:Method)
162
- RETURN DISTINCT m.id as method_id
163
- """
164
- )
165
- exempt.update(r["method_id"] for r in override_methods)
193
+ # In strict mode, overrides are NOT exempted — if nobody calls the method,
194
+ # it's flagged regardless of whether it overrides a parent.
195
+ if not strict:
196
+ override_methods = store.query_records(
197
+ """
198
+ MATCH (m:Method)-[:OVERRIDES]->(:Method)
199
+ RETURN DISTINCT m.id as method_id
200
+ """
201
+ )
202
+ exempt.update(r["method_id"] for r in override_methods)
166
203
 
167
204
  dead = []
168
205
  for c in candidates:
@@ -175,6 +212,7 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
175
212
  "signature": c.get("signature"),
176
213
  "class_fqcn": c.get("class_fqcn"),
177
214
  "file_path": c.get("file_path"),
215
+ "confidence": _assign_confidence(c, strict),
178
216
  "reason": "no_incoming_calls_after_exemptions",
179
217
  }
180
218
  )
@@ -184,18 +222,26 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None) -> lis
184
222
  # Append stats as a sentinel entry so the MCP layer can surface them
185
223
  # without changing the return type. Callers should strip entries that
186
224
  # have a "_stats" key when iterating over method results.
225
+ if strict:
226
+ exemption_note = (
227
+ "STRICT MODE: Only test methods, main(), and explicit entry-point "
228
+ "annotations are exempted. Constructors, getters/setters, "
229
+ "contract methods, and overrides are NOT exempt."
230
+ )
231
+ else:
232
+ exemption_note = (
233
+ "Exemptions cover: constructors, test methods, main(), "
234
+ "toString/hashCode/equals/compareTo, public getters/setters, "
235
+ "methods with DI/framework annotations, and direct method overrides. "
236
+ "Use strict=True for minimal exemptions."
237
+ )
187
238
  result.append({
188
239
  "_stats": {
189
240
  "candidates_with_no_callers": n_candidates,
190
241
  "exempted": len(exempt),
191
242
  "dead_returned": len(result),
192
- "note": (
193
- "Exemptions cover: constructors, test methods, main(), "
194
- "toString/hashCode/equals/compareTo, public getters/setters, "
195
- "methods with DI/framework annotations, and direct method overrides. "
196
- "The class-level IMPLEMENTS exemption has been removed — only "
197
- "methods with direct OVERRIDES relations are now exempted."
198
- ),
243
+ "mode": "strict" if strict else "normal",
244
+ "note": exemption_note,
199
245
  }
200
246
  })
201
247
 
@@ -14,6 +14,7 @@ import psutil
14
14
  from codespine.analysis.community import detect_communities, symbol_community
15
15
  from codespine.analysis.context import build_symbol_context
16
16
  from codespine.analysis.coupling import compute_coupling, get_coupling
17
+ from codespine.analysis.crossmodule import link_cross_module_calls
17
18
  from codespine.analysis.deadcode import detect_dead_code
18
19
  from codespine.analysis.flow import trace_execution_flows
19
20
  from codespine.analysis.impact import analyze_impact
@@ -216,6 +217,16 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
216
217
  elif parse_state["indexed"] < parse_state["total"]:
217
218
  _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
218
219
 
220
+ # ── Cross-module call linking ──────────────────────────────────────
221
+ # When multiple modules/projects are indexed, attempt to resolve call
222
+ # edges that span module boundaries using import + REFERENCES_TYPE info.
223
+ if is_multi and len(modules_with_ids) > 1:
224
+ xmod_pids = [pid for _, pid in modules_with_ids]
225
+ xmod_edges = link_cross_module_calls(store, project_ids=xmod_pids)
226
+ _phase("Cross-module linking...", f"{xmod_edges} cross-module call edges")
227
+ else:
228
+ _phase("Cross-module linking...", "skipped (single module)")
229
+
219
230
  communities: list[dict] = []
220
231
  flows: list[dict] = []
221
232
  dead: list[dict] = []
@@ -48,6 +48,36 @@ def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse
48
48
  return {"available": False, "note": note}
49
49
 
50
50
 
51
+ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
52
+ """Inject index staleness metadata into every tool response.
53
+
54
+ Adds ``index_age_seconds`` and ``stale_warning`` when the index is old.
55
+ """
56
+ try:
57
+ if project:
58
+ recs = store.query_records(
59
+ "MATCH (p:Project) WHERE p.id = $pid RETURN p.indexed_at as ts",
60
+ {"pid": project},
61
+ )
62
+ else:
63
+ recs = store.query_records(
64
+ "MATCH (p:Project) RETURN p.indexed_at as ts ORDER BY p.indexed_at ASC LIMIT 1"
65
+ )
66
+ if recs:
67
+ ts = int(recs[0].get("ts") or 0)
68
+ if ts:
69
+ age = int(time.time()) - ts
70
+ response["index_age_seconds"] = age
71
+ if age > 3600:
72
+ response["stale_warning"] = (
73
+ f"Index is {age // 3600}h {(age % 3600) // 60}m old. "
74
+ "Run analyse_project() or start_watch() to refresh."
75
+ )
76
+ except Exception:
77
+ pass
78
+ return response
79
+
80
+
51
81
  def build_mcp_server(store, repo_path_provider):
52
82
  mcp = FastMCP("codespine")
53
83
 
@@ -159,6 +189,8 @@ def build_mcp_server(store, repo_path_provider):
159
189
  "git_log": git_ok,
160
190
  "git_diff": git_ok,
161
191
  "compare_branches": git_ok,
192
+ "get_neighborhood": n_sym > 0,
193
+ "reindex_file": True,
162
194
  "watch_mode": True,
163
195
  "analyse_project": True,
164
196
  },
@@ -235,7 +267,7 @@ def build_mcp_server(store, repo_path_provider):
235
267
  results = hybrid_search(store, query, k=k, project=project)
236
268
  if not results:
237
269
  return _no_symbols_response()
238
- return {"available": True, "results": results}
270
+ return _staleness_meta(store, {"available": True, "results": results}, project)
239
271
 
240
272
  @mcp.tool()
241
273
  def get_impact(symbol: str, max_depth: int = 4, project: str | None = None):
@@ -246,20 +278,30 @@ def build_mcp_server(store, repo_path_provider):
246
278
  result = analyze_impact(store, symbol, max_depth=max_depth, project=project)
247
279
  if not result.get("targets_resolved"):
248
280
  return {"available": False, "note": f"Symbol '{symbol}' not found in the index."}
249
- return {"available": True, **result}
281
+ return _staleness_meta(store, {"available": True, **result}, project)
250
282
 
251
283
  @mcp.tool()
252
- def detect_dead_code(limit: int = 200, project: str | None = None):
284
+ def detect_dead_code(limit: int = 200, project: str | None = None, strict: bool = False):
253
285
  """
254
286
  Detect methods with no incoming calls (after Java-aware exemptions).
255
287
  Pass project to scope to a single module.
256
288
 
289
+ Parameters:
290
+ strict – When True, only main()/@Test and explicit entry-point
291
+ annotations are exempted. Constructors, getters/setters,
292
+ contract methods (toString, hashCode, equals), and method
293
+ overrides are NOT exempt. Use this for a thorough audit.
294
+ Each result includes a confidence level (high/medium/low):
295
+ high = private method, almost certainly dead
296
+ medium = package-private or protected
297
+ low = public method, could be called via reflection
298
+
257
299
  Returns dead_code list, count, and an exemption_stats dict showing
258
300
  how many candidates were found and how many were filtered out by the
259
301
  exemption rules — useful for validating that the feature is working
260
302
  even when the dead list is empty.
261
303
  """
262
- raw = detect_dead_code_analysis(store, limit=limit, project=project)
304
+ raw = detect_dead_code_analysis(store, limit=limit, project=project, strict=strict)
263
305
  if raw is None:
264
306
  return _no_symbols_response()
265
307
 
@@ -272,12 +314,12 @@ def build_mcp_server(store, repo_path_provider):
272
314
  else:
273
315
  dead.append(entry)
274
316
 
275
- return {
317
+ return _staleness_meta(store, {
276
318
  "available": True,
277
319
  "dead_code": dead,
278
320
  "count": len(dead),
279
321
  "exemption_stats": stats,
280
- }
322
+ }, project)
281
323
 
282
324
  @mcp.tool()
283
325
  def trace_execution_flows(entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None):
@@ -288,7 +330,7 @@ def build_mcp_server(store, repo_path_provider):
288
330
  flows = trace_flows_analysis(store, entry_symbol=entry_symbol, max_depth=max_depth, project=project)
289
331
  if not flows:
290
332
  return _no_symbols_response("No entry points found. Run 'codespine analyse --deep' or provide entry_symbol.")
291
- return {"available": True, "flows": flows}
333
+ return _staleness_meta(store, {"available": True, "flows": flows}, project)
292
334
 
293
335
  @mcp.tool()
294
336
  def get_symbol_community(symbol: str):
@@ -300,7 +342,7 @@ def build_mcp_server(store, repo_path_provider):
300
342
  result = symbol_community(store, symbol)
301
343
  if not result.get("matches"):
302
344
  return {"available": False, "note": "No community data yet. Run 'codespine analyse --deep'."}
303
- return {"available": True, **result}
345
+ return _staleness_meta(store, {"available": True, **result})
304
346
 
305
347
  @mcp.tool()
306
348
  def get_change_coupling(
@@ -319,7 +361,7 @@ def build_mcp_server(store, repo_path_provider):
319
361
  "available": False,
320
362
  "note": "No coupling data. Run 'codespine analyse --deep' with a git repository.",
321
363
  }
322
- return {"available": True, "coupling": result}
364
+ return _staleness_meta(store, {"available": True, "coupling": result})
323
365
 
324
366
  @mcp.tool()
325
367
  def get_symbol_context(query: str, max_depth: int = 3, project: str | None = None):
@@ -330,7 +372,7 @@ def build_mcp_server(store, repo_path_provider):
330
372
  result = build_symbol_context(store, query, max_depth=max_depth, project=project)
331
373
  if not result.get("search_candidates"):
332
374
  return _no_symbols_response()
333
- return {"available": True, **result}
375
+ return _staleness_meta(store, {"available": True, **result}, project)
334
376
 
335
377
  @mcp.tool()
336
378
  def get_codebase_stats():
@@ -496,7 +538,7 @@ def build_mcp_server(store, repo_path_provider):
496
538
  by_project.setdefault(pid, {"classes": [], "methods": []})
497
539
  by_project[pid]["methods"].append(m)
498
540
 
499
- return {
541
+ return _staleness_meta(store, {
500
542
  "available": True,
501
543
  "query": name,
502
544
  "total_matches": total,
@@ -505,7 +547,7 @@ def build_mcp_server(store, repo_path_provider):
505
547
  f"Found {total} match(es). If multiple projects contain the same name, "
506
548
  "pass project=<project_id> to subsequent tools to avoid cross-project ambiguity."
507
549
  ) if total > 1 else None,
508
- }
550
+ }, project)
509
551
 
510
552
  @mcp.tool()
511
553
  def list_packages(project: str | None = None, limit: int = 200):
@@ -548,11 +590,11 @@ def build_mcp_server(store, repo_path_provider):
548
590
  "class_count": r.get("class_count", 0),
549
591
  })
550
592
 
551
- return {
593
+ return _staleness_meta(store, {
552
594
  "available": True,
553
595
  "total_packages": len(recs),
554
596
  "by_project": by_project,
555
- }
597
+ }, project)
556
598
 
557
599
  # ------------------------------------------------------------------
558
600
  # Git tools
@@ -1007,6 +1049,225 @@ def build_mcp_server(store, repo_path_provider):
1007
1049
  ),
1008
1050
  }
1009
1051
 
1052
+ # ------------------------------------------------------------------
1053
+ # Neighborhood exploration
1054
+ # ------------------------------------------------------------------
1055
+
1056
+ @mcp.tool()
1057
+ def get_neighborhood(symbol: str, project: str | None = None):
1058
+ """
1059
+ One-shot structural context for a symbol: callers (upstream), callees
1060
+ (downstream), sibling methods in the same class, and override /
1061
+ implements relationships.
1062
+
1063
+ This is the tool to call when you want to understand a method's
1064
+ immediate surroundings in the call graph without traversing the
1065
+ full impact tree.
1066
+
1067
+ Parameters:
1068
+ symbol – Method name, signature fragment, or fully-qualified name.
1069
+ project – Optional project_id to scope the symbol lookup.
1070
+ """
1071
+ from codespine.analysis.impact import _resolve_method_metadata
1072
+
1073
+ project_clause = "AND f.project_id = $proj" if project else ""
1074
+ params: dict = {"q": symbol}
1075
+ if project:
1076
+ params["proj"] = project
1077
+
1078
+ # 1. Resolve the symbol to method IDs
1079
+ method_recs = store.query_records(
1080
+ f"""
1081
+ MATCH (m:Method), (c:Class), (f:File)
1082
+ WHERE m.class_id = c.id AND c.file_id = f.id {project_clause}
1083
+ AND (m.id = $q OR lower(m.name) = lower($q)
1084
+ OR lower(m.signature) CONTAINS lower($q))
1085
+ RETURN m.id as id, m.name as name, m.signature as signature,
1086
+ c.id as class_id, c.fqcn as class_fqcn,
1087
+ f.path as file_path, f.project_id as project_id
1088
+ LIMIT 5
1089
+ """,
1090
+ params,
1091
+ )
1092
+ if not method_recs:
1093
+ return {"available": False, "note": f"Symbol '{symbol}' not found. Try find_symbol or search_hybrid."}
1094
+
1095
+ target = method_recs[0]
1096
+ mid = target["id"]
1097
+ cid = target["class_id"]
1098
+
1099
+ # 2. Callers (upstream)
1100
+ callers = store.query_records(
1101
+ """
1102
+ MATCH (caller:Method)-[r:CALLS]->(m:Method {id: $mid})
1103
+ RETURN caller.id as id, coalesce(r.confidence, 0.5) as confidence,
1104
+ coalesce(r.reason, 'unknown') as reason
1105
+ """,
1106
+ {"mid": mid},
1107
+ )
1108
+
1109
+ # 3. Callees (downstream)
1110
+ callees = store.query_records(
1111
+ """
1112
+ MATCH (m:Method {id: $mid})-[r:CALLS]->(callee:Method)
1113
+ RETURN callee.id as id, coalesce(r.confidence, 0.5) as confidence,
1114
+ coalesce(r.reason, 'unknown') as reason
1115
+ """,
1116
+ {"mid": mid},
1117
+ )
1118
+
1119
+ # 4. Siblings (same class, excluding self)
1120
+ siblings = store.query_records(
1121
+ """
1122
+ MATCH (m:Method)
1123
+ WHERE m.class_id = $cid AND m.id <> $mid
1124
+ RETURN m.id as id, m.name as name, m.signature as signature
1125
+ """,
1126
+ {"cid": cid, "mid": mid},
1127
+ )
1128
+
1129
+ # 5. Override / implements relationships
1130
+ overrides_up = store.query_records(
1131
+ "MATCH (m:Method {id: $mid})-[:OVERRIDES]->(parent:Method) RETURN parent.id as id",
1132
+ {"mid": mid},
1133
+ )
1134
+ overrides_down = store.query_records(
1135
+ "MATCH (child:Method)-[:OVERRIDES]->(m:Method {id: $mid}) RETURN child.id as id",
1136
+ {"mid": mid},
1137
+ )
1138
+
1139
+ # Bulk-resolve all referenced method IDs for human-readable output
1140
+ all_ids = (
1141
+ [c["id"] for c in callers]
1142
+ + [c["id"] for c in callees]
1143
+ + [o["id"] for o in overrides_up]
1144
+ + [o["id"] for o in overrides_down]
1145
+ )
1146
+ meta = _resolve_method_metadata(store, all_ids) if all_ids else {}
1147
+
1148
+ def _enrich(items, extra_keys=None):
1149
+ enriched = []
1150
+ for item in items:
1151
+ m = meta.get(item["id"], {})
1152
+ entry = {
1153
+ "id": item["id"],
1154
+ "name": m.get("name") or item.get("name"),
1155
+ "fqname": m.get("fqname") or item.get("signature"),
1156
+ "class_fqcn": m.get("class_fqcn"),
1157
+ "file_path": m.get("file_path"),
1158
+ "project_id": m.get("project_id"),
1159
+ }
1160
+ if extra_keys:
1161
+ for k in extra_keys:
1162
+ if k in item:
1163
+ entry[k] = item[k]
1164
+ enriched.append(entry)
1165
+ return enriched
1166
+
1167
+ result = {
1168
+ "available": True,
1169
+ "target": {
1170
+ "id": mid,
1171
+ "name": target["name"],
1172
+ "signature": target["signature"],
1173
+ "class_fqcn": target["class_fqcn"],
1174
+ "file_path": target["file_path"],
1175
+ "project_id": target["project_id"],
1176
+ },
1177
+ "callers": _enrich(callers, extra_keys=["confidence", "reason"]),
1178
+ "callees": _enrich(callees, extra_keys=["confidence", "reason"]),
1179
+ "siblings": [
1180
+ {"name": s["name"], "signature": s["signature"]}
1181
+ for s in siblings
1182
+ ],
1183
+ "overrides": _enrich(overrides_up),
1184
+ "overridden_by": _enrich(overrides_down),
1185
+ "summary": {
1186
+ "callers": len(callers),
1187
+ "callees": len(callees),
1188
+ "siblings": len(siblings),
1189
+ "overrides": len(overrides_up),
1190
+ "overridden_by": len(overrides_down),
1191
+ },
1192
+ }
1193
+ return _staleness_meta(store, result)
1194
+
1195
+ # ------------------------------------------------------------------
1196
+ # Single-file re-index
1197
+ # ------------------------------------------------------------------
1198
+
1199
+ @mcp.tool()
1200
+ def reindex_file(file_path: str, project: str | None = None):
1201
+ """
1202
+ Incrementally re-index a single Java file (<1 s for typical files).
1203
+
1204
+ Use this after editing a file to immediately refresh the graph without
1205
+ waiting for watch mode or running a full analysis.
1206
+
1207
+ Parameters:
1208
+ file_path – Absolute path to the .java file.
1209
+ project – Optional project_id. If omitted, the tool infers the
1210
+ project by matching the file path against indexed projects.
1211
+ """
1212
+ import os as _os
1213
+
1214
+ abs_fp = _os.path.abspath(file_path)
1215
+ if not _os.path.isfile(abs_fp) or not abs_fp.endswith(".java"):
1216
+ return {"available": False, "note": f"Not a valid .java file: {abs_fp}"}
1217
+
1218
+ # Resolve project from indexed projects if not given
1219
+ if not project:
1220
+ projects = store.query_records(
1221
+ "MATCH (p:Project) RETURN p.id as id, p.path as path"
1222
+ )
1223
+ for p in projects:
1224
+ if abs_fp.startswith(p["path"] + _os.sep):
1225
+ project = p["id"]
1226
+ break
1227
+ if not project:
1228
+ return {
1229
+ "available": False,
1230
+ "note": (
1231
+ "Cannot determine project for this file. "
1232
+ "Pass project=<project_id> explicitly."
1233
+ ),
1234
+ }
1235
+
1236
+ # Find the project path to use as root for indexing
1237
+ proj_recs = store.query_records(
1238
+ "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
1239
+ {"pid": project},
1240
+ )
1241
+ if not proj_recs:
1242
+ return {"available": False, "note": f"Project '{project}' not found in index."}
1243
+
1244
+ proj_path = proj_recs[0]["path"]
1245
+
1246
+ # Run incremental index via subprocess to avoid read-only DB constraint
1247
+ cmd = [
1248
+ sys.executable, "-m", "codespine.cli",
1249
+ "analyse", proj_path,
1250
+ "--incremental", "--no-embed", "--allow-running",
1251
+ ]
1252
+ t0 = time.time()
1253
+ proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
1254
+ elapsed = round(time.time() - t0, 2)
1255
+
1256
+ if proc.returncode != 0:
1257
+ return {
1258
+ "available": False,
1259
+ "note": f"Re-index failed (code {proc.returncode})",
1260
+ "error": proc.stderr.strip() or proc.stdout.strip(),
1261
+ }
1262
+
1263
+ return {
1264
+ "available": True,
1265
+ "file": abs_fp,
1266
+ "project": project,
1267
+ "elapsed_s": elapsed,
1268
+ "note": f"Re-indexed project {project} incrementally in {elapsed}s.",
1269
+ }
1270
+
1010
1271
  # ------------------------------------------------------------------
1011
1272
  # Advanced / raw access
1012
1273
  # ------------------------------------------------------------------
@@ -1,11 +1,31 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import os
4
+
3
5
  from codespine.search.bm25 import rank_bm25
4
6
  from codespine.search.fuzzy import rank_fuzzy
5
7
  from codespine.search.rrf import reciprocal_rank_fusion
6
8
  from codespine.search.vector import _load_model, rank_semantic
7
9
 
8
10
  _LOW_CONFIDENCE_THRESHOLD = 0.05
11
+ _SNIPPET_CONTEXT_LINES = 2 # lines above and below the symbol declaration
12
+
13
+
14
+ def _read_snippet(file_path: str, line: int, context: int = _SNIPPET_CONTEXT_LINES) -> str | None:
15
+ """Best-effort extraction of source lines around a symbol declaration."""
16
+ if not file_path or not line or line < 1:
17
+ return None
18
+ try:
19
+ if not os.path.isfile(file_path):
20
+ return None
21
+ with open(file_path, "r", encoding="utf-8", errors="replace") as fh:
22
+ all_lines = fh.readlines()
23
+ start = max(0, line - 1 - context)
24
+ end = min(len(all_lines), line + context)
25
+ snippet_lines = all_lines[start:end]
26
+ return "".join(snippet_lines).rstrip("\n")
27
+ except Exception:
28
+ return None
9
29
 
10
30
 
11
31
  def hybrid_search(store, query: str, k: int = 20, project: str | None = None) -> list[dict]:
@@ -26,6 +46,7 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
26
46
  s.name as name,
27
47
  s.fqname as fqname,
28
48
  s.embedding as embedding,
49
+ s.line as line,
29
50
  f.path as file_path,
30
51
  f.is_test as is_test
31
52
  """,
@@ -73,6 +94,7 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
73
94
  "name": rec.get("name"),
74
95
  "fqname": rec.get("fqname"),
75
96
  "file_path": rec.get("file_path"),
97
+ "line": rec.get("line"),
76
98
  "score": score * multiplier,
77
99
  }
78
100
  )
@@ -94,6 +116,14 @@ def hybrid_search(store, query: str, k: int = 20, project: str | None = None) ->
94
116
  )
95
117
  item["context"] = ctx
96
118
 
119
+ # Attach source code snippets (3–5 lines around the declaration) to the
120
+ # top results so agents have immediate context without reading the file.
121
+ for item in top_k:
122
+ if isinstance(item, dict) and item.get("file_path") and item.get("line"):
123
+ snippet = _read_snippet(item["file_path"], int(item["line"]))
124
+ if snippet:
125
+ item["snippet"] = snippet
126
+
97
127
  # Warn when all scores are near zero — the results are likely noise.
98
128
  # The threshold 0.05 is calibrated for embedding mode. Without sentence-
99
129
  # transformers the hash-fallback vector and BM25/fuzzy signals produce lower
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.4.3
3
+ Version: 0.5.0
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -15,6 +15,7 @@ codespine/analysis/__init__.py
15
15
  codespine/analysis/community.py
16
16
  codespine/analysis/context.py
17
17
  codespine/analysis/coupling.py
18
+ codespine/analysis/crossmodule.py
18
19
  codespine/analysis/deadcode.py
19
20
  codespine/analysis/flow.py
20
21
  codespine/analysis/impact.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.4.3"
7
+ version = "0.5.0"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes
File without changes