codespine 0.5.0__tar.gz → 0.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {codespine-0.5.0 → codespine-0.5.1}/PKG-INFO +1 -1
  2. {codespine-0.5.0 → codespine-0.5.1}/codespine/__init__.py +1 -1
  3. codespine-0.5.1/codespine/analysis/crossmodule.py +173 -0
  4. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/deadcode.py +113 -53
  5. {codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/server.py +87 -13
  6. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/PKG-INFO +1 -1
  7. {codespine-0.5.0 → codespine-0.5.1}/pyproject.toml +1 -1
  8. codespine-0.5.0/codespine/analysis/crossmodule.py +0 -230
  9. {codespine-0.5.0 → codespine-0.5.1}/LICENSE +0 -0
  10. {codespine-0.5.0 → codespine-0.5.1}/README.md +0 -0
  11. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/__init__.py +0 -0
  12. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/community.py +0 -0
  13. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/context.py +0 -0
  14. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/coupling.py +0 -0
  15. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/flow.py +0 -0
  16. {codespine-0.5.0 → codespine-0.5.1}/codespine/analysis/impact.py +0 -0
  17. {codespine-0.5.0 → codespine-0.5.1}/codespine/cli.py +0 -0
  18. {codespine-0.5.0 → codespine-0.5.1}/codespine/config.py +0 -0
  19. {codespine-0.5.0 → codespine-0.5.1}/codespine/db/__init__.py +0 -0
  20. {codespine-0.5.0 → codespine-0.5.1}/codespine/db/schema.py +0 -0
  21. {codespine-0.5.0 → codespine-0.5.1}/codespine/db/store.py +0 -0
  22. {codespine-0.5.0 → codespine-0.5.1}/codespine/diff/__init__.py +0 -0
  23. {codespine-0.5.0 → codespine-0.5.1}/codespine/diff/branch_diff.py +0 -0
  24. {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/__init__.py +0 -0
  25. {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/call_resolver.py +0 -0
  26. {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/engine.py +0 -0
  27. {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/java_parser.py +0 -0
  28. {codespine-0.5.0 → codespine-0.5.1}/codespine/indexer/symbol_builder.py +0 -0
  29. {codespine-0.5.0 → codespine-0.5.1}/codespine/mcp/__init__.py +0 -0
  30. {codespine-0.5.0 → codespine-0.5.1}/codespine/noise/__init__.py +0 -0
  31. {codespine-0.5.0 → codespine-0.5.1}/codespine/noise/blocklist.py +0 -0
  32. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/__init__.py +0 -0
  33. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/bm25.py +0 -0
  34. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/fuzzy.py +0 -0
  35. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/hybrid.py +0 -0
  36. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/rrf.py +0 -0
  37. {codespine-0.5.0 → codespine-0.5.1}/codespine/search/vector.py +0 -0
  38. {codespine-0.5.0 → codespine-0.5.1}/codespine/watch/__init__.py +0 -0
  39. {codespine-0.5.0 → codespine-0.5.1}/codespine/watch/watcher.py +0 -0
  40. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/SOURCES.txt +0 -0
  41. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/dependency_links.txt +0 -0
  42. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/entry_points.txt +0 -0
  43. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/requires.txt +0 -0
  44. {codespine-0.5.0 → codespine-0.5.1}/codespine.egg-info/top_level.txt +0 -0
  45. {codespine-0.5.0 → codespine-0.5.1}/gindex.py +0 -0
  46. {codespine-0.5.0 → codespine-0.5.1}/setup.cfg +0 -0
  47. {codespine-0.5.0 → codespine-0.5.1}/tests/test_branch_diff_normalize.py +0 -0
  48. {codespine-0.5.0 → codespine-0.5.1}/tests/test_call_resolver.py +0 -0
  49. {codespine-0.5.0 → codespine-0.5.1}/tests/test_index_and_hybrid.py +0 -0
  50. {codespine-0.5.0 → codespine-0.5.1}/tests/test_java_parser.py +0 -0
  51. {codespine-0.5.0 → codespine-0.5.1}/tests/test_multimodule_index.py +0 -0
  52. {codespine-0.5.0 → codespine-0.5.1}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.0"
4
+ __version__ = "0.5.1"
@@ -0,0 +1,173 @@
1
+ """Cross-module call edge linker.
2
+
3
+ After all modules in a workspace have been individually indexed, each module's
4
+ call resolver only sees methods within that module. This module fills the gap
5
+ by scanning the graph for cross-project class references (REFERENCES_TYPE and
6
+ IMPLEMENTS edges) and creating CALLS edges between methods where the call is
7
+ plausible.
8
+
9
+ Strategy A — Name + arity match (confidence 0.7)
10
+ If src_class references dst_class (cross-project) and both have a method
11
+ with the same name and same parameter count, create a CALLS edge. This
12
+ catches delegation, interface-implementation forwarding, and adapter
13
+ patterns.
14
+
15
+ Strategy B — Type-reference fallback (confidence 0.4)
16
+ For each *public* method in dst_class that received NO name-match edge,
17
+ create ONE low-confidence edge from a representative src method (preferring
18
+ one with zero outgoing calls). This prevents methods that are genuinely
19
+ used cross-module from appearing as dead code.
20
+ """
21
+ from __future__ import annotations
22
+
23
+ import logging
24
+ from collections import defaultdict
25
+
26
+ LOGGER = logging.getLogger(__name__)
27
+
28
+
29
+ def _param_count(sig: str) -> int:
30
+ """Count parameters from a method signature string."""
31
+ if not sig or "(" not in sig or ")" not in sig:
32
+ return 0
33
+ arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
34
+ return 0 if not arg_str.strip() else arg_str.count(",") + 1
35
+
36
+
37
+ def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
38
+ """Create CALLS edges between methods in different projects.
39
+
40
+ Returns the number of new cross-module call edges created.
41
+ """
42
+ if project_ids is None:
43
+ proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
44
+ project_ids = [r["id"] for r in proj_recs]
45
+
46
+ if len(project_ids) < 2:
47
+ LOGGER.info(
48
+ "Only %d project(s) indexed — skipping cross-module linking.",
49
+ len(project_ids),
50
+ )
51
+ return 0
52
+
53
+ # ── 1. Collect cross-project class pairs ──────────────────────────
54
+ ref_pairs = store.query_records(
55
+ """
56
+ MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
57
+ WHERE src.file_id = sf.id AND dst.file_id = df.id
58
+ AND sf.project_id <> df.project_id
59
+ RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
60
+ """
61
+ )
62
+ impl_pairs = store.query_records(
63
+ """
64
+ MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
65
+ WHERE src.file_id = sf.id AND dst.file_id = df.id
66
+ AND sf.project_id <> df.project_id
67
+ RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
68
+ """
69
+ )
70
+
71
+ all_pairs: set[tuple[str, str]] = set()
72
+ for p in ref_pairs:
73
+ all_pairs.add((p["src_cid"], p["dst_cid"]))
74
+ for p in impl_pairs:
75
+ all_pairs.add((p["src_cid"], p["dst_cid"]))
76
+
77
+ if not all_pairs:
78
+ LOGGER.info("No cross-project class references found.")
79
+ return 0
80
+
81
+ LOGGER.info(
82
+ "Cross-module: %d cross-project class pair(s) to process.",
83
+ len(all_pairs),
84
+ )
85
+
86
+ # ── 2. Process each class pair ────────────────────────────────────
87
+ new_edges = 0
88
+ seen: set[tuple[str, str]] = set()
89
+
90
+ for src_cid, dst_cid in all_pairs:
91
+ src_methods = store.query_records(
92
+ """MATCH (m:Method) WHERE m.class_id = $cid
93
+ RETURN m.id as mid, m.name as name, m.signature as sig""",
94
+ {"cid": src_cid},
95
+ )
96
+ dst_methods = store.query_records(
97
+ """MATCH (m:Method) WHERE m.class_id = $cid
98
+ RETURN m.id as mid, m.name as name, m.signature as sig,
99
+ m.modifiers as modifiers, m.is_constructor as is_ctor""",
100
+ {"cid": dst_cid},
101
+ )
102
+ if not src_methods or not dst_methods:
103
+ continue
104
+
105
+ # Build name → methods index for src class
106
+ src_by_name: dict[str, list[dict]] = defaultdict(list)
107
+ for sm in src_methods:
108
+ src_by_name[sm["name"]].append(sm)
109
+
110
+ # ── Strategy A: name + arity matching ─────────────────────────
111
+ matched_dst_mids: set[str] = set()
112
+
113
+ for dm in dst_methods:
114
+ dm_name = dm["name"]
115
+ dm_pc = _param_count(dm.get("sig") or "")
116
+ candidates = src_by_name.get(dm_name, [])
117
+ for sm in candidates:
118
+ sm_pc = _param_count(sm.get("sig") or "")
119
+ if sm_pc == dm_pc:
120
+ pair = (sm["mid"], dm["mid"])
121
+ if pair in seen:
122
+ matched_dst_mids.add(dm["mid"])
123
+ continue
124
+ seen.add(pair)
125
+ try:
126
+ store.add_call(
127
+ sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
128
+ )
129
+ new_edges += 1
130
+ matched_dst_mids.add(dm["mid"])
131
+ except Exception as exc:
132
+ LOGGER.debug("Name-match edge failed: %s", exc)
133
+
134
+ # ── Strategy B: fallback for unmatched public dst methods ─────
135
+ # Find a representative caller: prefer src methods with 0 outgoing calls
136
+ fallback_src = None
137
+ for sm in src_methods:
138
+ out = store.query_records(
139
+ "MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
140
+ {"mid": sm["mid"]},
141
+ )
142
+ if out and out[0]["n"] == 0:
143
+ fallback_src = sm
144
+ break
145
+ if fallback_src is None and src_methods:
146
+ fallback_src = src_methods[0]
147
+
148
+ if fallback_src:
149
+ for dm in dst_methods:
150
+ if dm["mid"] in matched_dst_mids:
151
+ continue
152
+ # Skip constructors and private methods
153
+ if dm.get("is_ctor"):
154
+ continue
155
+ mods = dm.get("modifiers") or []
156
+ mod_strs = {str(m).strip() for m in mods} if mods else set()
157
+ if "private" in mod_strs:
158
+ continue
159
+
160
+ pair = (fallback_src["mid"], dm["mid"])
161
+ if pair in seen:
162
+ continue
163
+ seen.add(pair)
164
+ try:
165
+ store.add_call(
166
+ fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
167
+ )
168
+ new_edges += 1
169
+ except Exception as exc:
170
+ LOGGER.debug("Fallback edge failed: %s", exc)
171
+
172
+ LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
173
+ return new_edges
@@ -1,8 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- EXEMPT_ANNOTATIONS = {
4
- # Java standard
5
- "Override",
3
+ from collections import defaultdict
4
+
5
+ # ── Annotation sets ──────────────────────────────────────────────────
6
+ # Entry-point annotations — exempt even in strict mode. These represent
7
+ # actual runtime entry points that the framework calls reflectively.
8
+ ENTRY_POINT_ANNOTATIONS = {
6
9
  # JUnit / testing
7
10
  "Test",
8
11
  "ParameterizedTest",
@@ -10,21 +13,6 @@ EXEMPT_ANNOTATIONS = {
10
13
  "AfterEach",
11
14
  "BeforeAll",
12
15
  "AfterAll",
13
- # Spring – component model (class-level; methods inside are never "dead")
14
- "Component",
15
- "Service",
16
- "Repository",
17
- "Controller",
18
- "RestController",
19
- "Configuration",
20
- "Bean",
21
- "Aspect",
22
- # Spring – lifecycle / event hooks
23
- "PostConstruct",
24
- "PreDestroy",
25
- "EventListener",
26
- "TransactionalEventListener",
27
- "Scheduled",
28
16
  # Spring – web entry points
29
17
  "RequestMapping",
30
18
  "GetMapping",
@@ -33,12 +21,35 @@ EXEMPT_ANNOTATIONS = {
33
21
  "DeleteMapping",
34
22
  "PatchMapping",
35
23
  "MessageMapping",
36
- # Spring – messaging / async
24
+ # Spring – messaging / async entry points
37
25
  "KafkaListener",
38
26
  "RabbitListener",
39
27
  "JmsListener",
40
28
  "SqsListener",
41
29
  "StreamListener",
30
+ # Spring – lifecycle / event hooks
31
+ "PostConstruct",
32
+ "PreDestroy",
33
+ "EventListener",
34
+ "TransactionalEventListener",
35
+ "Scheduled",
36
+ }
37
+
38
+ # Broad annotations — exempt only in normal mode. These indicate the
39
+ # method is *likely* used via DI / serialisation / reflection, but in a
40
+ # strict audit the user may want to verify that manually.
41
+ BROAD_ANNOTATIONS = {
42
+ # Java standard
43
+ "Override",
44
+ # Spring – component model (class-level; methods inside are never "dead")
45
+ "Component",
46
+ "Service",
47
+ "Repository",
48
+ "Controller",
49
+ "RestController",
50
+ "Configuration",
51
+ "Bean",
52
+ "Aspect",
42
53
  # Spring Data / persistence
43
54
  "Query",
44
55
  "Modifying",
@@ -48,18 +59,21 @@ EXEMPT_ANNOTATIONS = {
48
59
  "Singleton",
49
60
  "Named",
50
61
  "Qualifier",
51
- # Jakarta / javax DI (same semantics as Guice/Spring variants)
62
+ # Jakarta / javax DI
52
63
  "ApplicationScoped",
53
64
  "RequestScoped",
54
65
  "SessionScoped",
55
66
  "Dependent",
56
- # Jackson / serialization (called reflectively)
67
+ # Jackson / serialization
57
68
  "JsonCreator",
58
69
  "JsonProperty",
59
70
  "JsonDeserialize",
60
71
  "JsonSerialize",
61
72
  }
62
73
 
74
+ # Full set used in normal mode
75
+ EXEMPT_ANNOTATIONS = ENTRY_POINT_ANNOTATIONS | BROAD_ANNOTATIONS
76
+
63
77
  EXEMPT_CONTRACT_METHODS = {
64
78
  "toString",
65
79
  "hashCode",
@@ -74,6 +88,15 @@ def _modifier_tokens(modifiers) -> set[str]:
74
88
  return {str(m).strip() for m in modifiers}
75
89
 
76
90
 
91
+ def _matched_annotation(mods: set[str], annotation_set: set[str]) -> str | None:
92
+ """Return the first annotation in *mods* that appears in *annotation_set*, or None."""
93
+ for m in mods:
94
+ bare = m.lstrip("@")
95
+ if bare in annotation_set:
96
+ return bare
97
+ return None
98
+
99
+
77
100
  def _assign_confidence(candidate: dict, strict: bool) -> str:
78
101
  """Assign a confidence level (high / medium / low) to each dead method.
79
102
 
@@ -101,16 +124,17 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
101
124
  limit – Max results to return.
102
125
  project – Scope to a single module.
103
126
  strict – When True, only exempt main()/@Test methods and explicit
104
- entry-point annotations. Skips the broad bean-getter/setter,
105
- contract-method, and constructor exemptions.
127
+ entry-point annotations (RequestMapping, KafkaListener, etc.).
128
+ Skips the broad bean-getter/setter, contract-method,
129
+ constructor, Override, and DI annotation exemptions.
106
130
 
107
131
  Returns a list of dead method dicts, each with:
108
132
  method_id, name, signature, class_fqcn, file_path, reason, confidence.
109
133
 
110
134
  The return value is augmented with a ``_stats`` entry (a sentinel dict
111
- with key ``_stats``) containing pre/post-exemption counts so callers can
112
- show users that the exemption logic is actually working:
113
- candidates_with_no_callers, exempted, dead_returned
135
+ with key ``_stats``) containing pre/post-exemption counts, a breakdown
136
+ of exemption reasons, and a sample of exempted methods so callers can
137
+ validate that the exemption logic is working correctly.
114
138
  """
115
139
  if project:
116
140
  candidates = store.query_records(
@@ -153,43 +177,56 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
153
177
  return []
154
178
 
155
179
  n_candidates = len(candidates)
156
- exempt: set[str] = set()
157
180
 
158
- # Minimal exemptions (apply in both normal and strict mode)
181
+ # Track exemptions as {method_id: reason} instead of a plain set
182
+ exempt: dict[str, str] = {}
183
+
184
+ # Choose annotation set based on mode
185
+ annotations_to_check = ENTRY_POINT_ANNOTATIONS if strict else EXEMPT_ANNOTATIONS
186
+
187
+ # ── Exemption passes ──────────────────────────────────────────────
159
188
  for c in candidates:
189
+ mid = c["method_id"]
190
+ if mid in exempt:
191
+ continue
160
192
  sig = (c.get("signature") or "").lower()
161
193
  name = c.get("name") or ""
162
194
  mods = _modifier_tokens(c.get("modifiers"))
163
195
 
164
196
  # Always exempt test methods and main()
165
197
  if c.get("is_test"):
166
- exempt.add(c["method_id"])
198
+ exempt[mid] = "test_method"
199
+ continue
167
200
  if name == "main" and "string[]" in sig:
168
- exempt.add(c["method_id"])
201
+ exempt[mid] = "main_method"
202
+ continue
169
203
 
170
- # Always exempt explicit entry-point annotations (@Test, @RequestMapping, etc.)
171
- if any(m.lstrip("@") in EXEMPT_ANNOTATIONS for m in mods):
172
- exempt.add(c["method_id"])
204
+ # Exempt methods with entry-point (strict) or all framework (normal) annotations
205
+ matched = _matched_annotation(mods, annotations_to_check)
206
+ if matched:
207
+ exempt[mid] = f"annotation:{matched}"
208
+ continue
173
209
 
174
- # Broad exemptions (only in normal mode, skipped in strict mode)
210
+ # ── Broad exemptions (only in normal mode) ────────────────────
175
211
  if not strict:
176
212
  if c.get("is_constructor"):
177
- exempt.add(c["method_id"])
213
+ exempt[mid] = "constructor"
214
+ continue
178
215
  if name in EXEMPT_CONTRACT_METHODS:
179
- exempt.add(c["method_id"])
216
+ exempt[mid] = f"contract_method:{name}"
217
+ continue
180
218
  # Java bean-ish APIs often rely on reflection/serialization.
181
- if "public" in mods and (name.startswith("get") or name.startswith("set") or name.startswith("is")):
182
- exempt.add(c["method_id"])
219
+ if "public" in mods and (
220
+ name.startswith("get") or name.startswith("set") or name.startswith("is")
221
+ ):
222
+ exempt[mid] = "bean_accessor"
223
+ continue
183
224
  # Reflection-style hooks
184
225
  if name in {"valueOf", "fromString", "builder"}:
185
- exempt.add(c["method_id"])
186
-
187
- # Exempt methods that DIRECTLY override another method (precise: only the
188
- # specific overriding method is exempted, not the entire implementing class).
189
- # NOTE: we intentionally do NOT use the class-level IMPLEMENTS relation here
190
- # because that would exempt ALL methods of every class that implements ANY
191
- # interface — in a typical Spring project that wipes out almost everything
192
- # and produces 0 dead code results.
226
+ exempt[mid] = f"reflection_hook:{name}"
227
+ continue
228
+
229
+ # Exempt methods that DIRECTLY override another method.
193
230
  # In strict mode, overrides are NOT exempted — if nobody calls the method,
194
231
  # it's flagged regardless of whether it overrides a parent.
195
232
  if not strict:
@@ -199,8 +236,12 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
199
236
  RETURN DISTINCT m.id as method_id
200
237
  """
201
238
  )
202
- exempt.update(r["method_id"] for r in override_methods)
239
+ for r in override_methods:
240
+ mid = r["method_id"]
241
+ if mid not in exempt:
242
+ exempt[mid] = "method_override"
203
243
 
244
+ # ── Build dead list ───────────────────────────────────────────────
204
245
  dead = []
205
246
  for c in candidates:
206
247
  if c["method_id"] in exempt:
@@ -219,14 +260,31 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
219
260
 
220
261
  result = dead[:limit]
221
262
 
222
- # Append stats as a sentinel entry so the MCP layer can surface them
223
- # without changing the return type. Callers should strip entries that
224
- # have a "_stats" key when iterating over method results.
263
+ # ── Stats with exemption breakdown ────────────────────────────────
264
+ reason_counts: dict[str, int] = defaultdict(int)
265
+ for reason in exempt.values():
266
+ # Group annotation reasons by prefix for readability
267
+ key = reason.split(":")[0] if ":" in reason else reason
268
+ reason_counts[key] += 1
269
+
270
+ # Sample of exempted methods (up to 10) for user inspection
271
+ exempted_sample = []
272
+ for mid, reason in list(exempt.items())[:10]:
273
+ candidate = next((c for c in candidates if c["method_id"] == mid), None)
274
+ if candidate:
275
+ exempted_sample.append({
276
+ "name": candidate.get("name"),
277
+ "signature": candidate.get("signature"),
278
+ "class_fqcn": candidate.get("class_fqcn"),
279
+ "exemption_reason": reason,
280
+ })
281
+
225
282
  if strict:
226
283
  exemption_note = (
227
- "STRICT MODE: Only test methods, main(), and explicit entry-point "
228
- "annotations are exempted. Constructors, getters/setters, "
229
- "contract methods, and overrides are NOT exempt."
284
+ "STRICT MODE: Only test methods, main(), and entry-point "
285
+ "annotations (RequestMapping, KafkaListener, Scheduled, etc.) "
286
+ "are exempted. Constructors, getters/setters, @Override, DI "
287
+ "annotations, and contract methods are NOT exempt."
230
288
  )
231
289
  else:
232
290
  exemption_note = (
@@ -242,6 +300,8 @@ def detect_dead_code(store, limit: int = 200, project: str | None = None, strict
242
300
  "dead_returned": len(result),
243
301
  "mode": "strict" if strict else "normal",
244
302
  "note": exemption_note,
303
+ "exemptions_breakdown": dict(reason_counts),
304
+ "exempted_sample": exempted_sample,
245
305
  }
246
306
  })
247
307
 
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json as _json_mod
3
4
  import subprocess
4
5
  import sys
5
6
  import tempfile
@@ -18,6 +19,16 @@ from codespine.diff.branch_diff import compare_branches as compare_branches_anal
18
19
  from codespine.search.hybrid import hybrid_search
19
20
 
20
21
 
22
+ def _json(data: dict) -> str:
23
+ """Serialize response dict to a JSON string.
24
+
25
+ FastMCP double-serialises dict return values on many transports (SSE,
26
+ stdio) producing duplicate JSON payloads that waste ~50 K tokens/session.
27
+ Returning a pre-serialised string guarantees a single TextContent block.
28
+ """
29
+ return _json_mod.dumps(data, separators=(",", ":"))
30
+
31
+
21
32
  def _git_available(path: str) -> bool:
22
33
  """Return True if path is inside a git repository."""
23
34
  try:
@@ -44,14 +55,27 @@ def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
44
55
  return repo_path_provider()
45
56
 
46
57
 
47
- def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") -> dict:
48
- return {"available": False, "note": note}
58
+ def _no_symbols_response(note: str = "No symbols indexed. Run 'codespine analyse <path>' first.") -> str:
59
+ return _json({"available": False, "note": note})
49
60
 
50
61
 
51
- def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
52
- """Inject index staleness metadata into every tool response.
62
+ def _parse_indexed_at(raw) -> int:
63
+ """Robustly parse an indexed_at value that may be str, int, float, or None."""
64
+ if raw is None:
65
+ return 0
66
+ try:
67
+ val = int(float(str(raw)))
68
+ # Sanity check: must look like a Unix timestamp (> year 2000)
69
+ return val if val > 946684800 else 0
70
+ except (ValueError, TypeError):
71
+ return 0
72
+
73
+
74
+ def _staleness_meta(store, response: dict, project: str | None = None) -> str:
75
+ """Inject index staleness metadata into every tool response and serialise.
53
76
 
54
77
  Adds ``index_age_seconds`` and ``stale_warning`` when the index is old.
78
+ Returns a JSON string (not a dict) to avoid FastMCP double-serialisation.
55
79
  """
56
80
  try:
57
81
  if project:
@@ -64,10 +88,11 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
64
88
  "MATCH (p:Project) RETURN p.indexed_at as ts ORDER BY p.indexed_at ASC LIMIT 1"
65
89
  )
66
90
  if recs:
67
- ts = int(recs[0].get("ts") or 0)
91
+ ts = _parse_indexed_at(recs[0].get("ts"))
68
92
  if ts:
69
93
  age = int(time.time()) - ts
70
94
  response["index_age_seconds"] = age
95
+ response["indexed_at_epoch"] = ts
71
96
  if age > 3600:
72
97
  response["stale_warning"] = (
73
98
  f"Index is {age // 3600}h {(age % 3600) // 60}m old. "
@@ -75,11 +100,40 @@ def _staleness_meta(store, response: dict, project: str | None = None) -> dict:
75
100
  )
76
101
  except Exception:
77
102
  pass
78
- return response
103
+ return _json(response)
79
104
 
80
105
 
81
106
  def build_mcp_server(store, repo_path_provider):
82
- mcp = FastMCP("codespine")
107
+ _raw_mcp = FastMCP("codespine")
108
+
109
+ # ── Anti-duplicate-JSON wrapper ────────────────────────────────────
110
+ # FastMCP double-serialises dict return values on many transports,
111
+ # producing duplicate JSON payloads that waste ~50 K tokens/session.
112
+ # We intercept tool registration so every tool's dict return is
113
+ # pre-serialised to a JSON string (single TextContent block).
114
+ import functools as _functools
115
+
116
+ class _JsonMCP:
117
+ """Thin proxy that wraps tool functions to return JSON strings."""
118
+ def __getattr__(self, name):
119
+ return getattr(_raw_mcp, name)
120
+
121
+ def tool(self, *args, **kwargs):
122
+ original_decorator = _raw_mcp.tool(*args, **kwargs)
123
+ def wrapper(fn):
124
+ @_functools.wraps(fn)
125
+ def json_fn(*a, **kw):
126
+ result = fn(*a, **kw)
127
+ if isinstance(result, dict):
128
+ return _json(result)
129
+ return result
130
+ return original_decorator(json_fn)
131
+ return wrapper
132
+
133
+ def run(self):
134
+ return _raw_mcp.run()
135
+
136
+ mcp = _JsonMCP()
83
137
 
84
138
  # Background job state (per-server-instance, persists across tool calls)
85
139
  _watch: dict = {"proc": None, "path": None, "started_at": None, "interval": 30}
@@ -92,7 +146,7 @@ def build_mcp_server(store, repo_path_provider):
92
146
  @mcp.tool()
93
147
  def ping():
94
148
  """Verify the MCP server is alive. Call this first to confirm connectivity."""
95
- return {"status": "ok", "version": __version__}
149
+ return _json({"status": "ok", "version": __version__})
96
150
 
97
151
  @mcp.tool()
98
152
  def get_capabilities():
@@ -1243,21 +1297,41 @@ def build_mcp_server(store, repo_path_provider):
1243
1297
 
1244
1298
  proj_path = proj_recs[0]["path"]
1245
1299
 
1246
- # Run incremental index via subprocess to avoid read-only DB constraint
1300
+ # Run incremental index via subprocess to avoid read-only DB constraint.
1301
+ # Use Popen + communicate() with a timeout so that a hang never crashes
1302
+ # the MCP server process — the subprocess is killed gracefully instead.
1247
1303
  cmd = [
1248
1304
  sys.executable, "-m", "codespine.cli",
1249
1305
  "analyse", proj_path,
1250
1306
  "--incremental", "--no-embed", "--allow-running",
1251
1307
  ]
1252
1308
  t0 = time.time()
1253
- proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
1254
- elapsed = round(time.time() - t0, 2)
1309
+ try:
1310
+ proc = subprocess.Popen(
1311
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
1312
+ )
1313
+ stdout, stderr = proc.communicate(timeout=30)
1314
+ elapsed = round(time.time() - t0, 2)
1315
+ except subprocess.TimeoutExpired:
1316
+ proc.kill()
1317
+ proc.communicate() # reap zombie
1318
+ elapsed = round(time.time() - t0, 2)
1319
+ return {
1320
+ "available": False,
1321
+ "note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
1322
+ }
1323
+ except Exception as exc:
1324
+ elapsed = round(time.time() - t0, 2)
1325
+ return {
1326
+ "available": False,
1327
+ "note": f"Re-index error: {exc}",
1328
+ }
1255
1329
 
1256
1330
  if proc.returncode != 0:
1257
1331
  return {
1258
1332
  "available": False,
1259
1333
  "note": f"Re-index failed (code {proc.returncode})",
1260
- "error": proc.stderr.strip() or proc.stdout.strip(),
1334
+ "error": (stderr or stdout or "").strip()[:500],
1261
1335
  }
1262
1336
 
1263
1337
  return {
@@ -1278,4 +1352,4 @@ def build_mcp_server(store, repo_path_provider):
1278
1352
  records = store.query_records(query)
1279
1353
  return {"available": True, "records": records, "count": len(records)}
1280
1354
 
1281
- return mcp
1355
+ return _raw_mcp
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.0
3
+ Version: 0.5.1
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.5.0"
7
+ version = "0.5.1"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,230 +0,0 @@
1
- """Cross-module call edge linker.
2
-
3
- After all modules in a workspace have been individually indexed, each module's
4
- call resolver only sees methods within that module. This module fills the gap
5
- by scanning the graph for unresolved outgoing calls from one module that match
6
- method signatures in another module, then creating CALLS edges between them.
7
-
8
- The algorithm:
9
- 1. Build a global method catalog (method_id → name, param_count, class_fqcn)
10
- from the DB across ALL projects.
11
- 2. Build a per-project import map: for each file, record which FQCNs are
12
- imported (from the class nodes + extends/implements relations).
13
- 3. For each method M in project A, find its outgoing calls that did NOT
14
- resolve to any target. These are method invocations that tree-sitter
15
- parsed but call_resolver.py could not match (because the target was in a
16
- different module).
17
- 4. For each unresolved call, use the file's import list + the global class
18
- catalog to find candidate target methods in OTHER projects.
19
- 5. Create CALLS edges with confidence 0.6 and reason "cross_module_import".
20
-
21
- Because ParsedCall data is transient (not stored in the DB), we use a simpler
22
- heuristic: find methods in module A that have ZERO outgoing CALLS edges but
23
- are known to reference classes from other modules (via REFERENCES_TYPE or
24
- import analysis). Then attempt to link them by matching method names against
25
- the global catalog.
26
-
27
- A faster fallback strategy (implemented below):
28
- - Collect all class FQCNs per project.
29
- - For each project pair (A, B), find classes in A that IMPLEMENT/extend
30
- classes in B — these already have edges.
31
- - For method-level cross-module calls: scan for methods with 0 outgoing
32
- edges, match their name+arity against methods in other projects, and
33
- only link when the target class is imported (appears in the same file's
34
- import set via REFERENCES_TYPE edges).
35
- """
36
- from __future__ import annotations
37
-
38
- import logging
39
- from collections import defaultdict
40
-
41
- LOGGER = logging.getLogger(__name__)
42
-
43
-
44
- def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
45
- """Create CALLS edges between methods in different projects.
46
-
47
- Returns the number of new cross-module call edges created.
48
- """
49
- if project_ids is None:
50
- proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
51
- project_ids = [r["id"] for r in proj_recs]
52
-
53
- if len(project_ids) < 2:
54
- LOGGER.info("Only %d project(s) indexed — skipping cross-module linking.", len(project_ids))
55
- return 0
56
-
57
- # ── 1. Global method catalog ────────────────────────────────────────
58
- all_methods = store.query_records(
59
- """
60
- MATCH (m:Method), (c:Class), (f:File)
61
- WHERE m.class_id = c.id AND c.file_id = f.id
62
- RETURN m.id as mid, m.name as name, m.signature as sig,
63
- c.fqcn as class_fqcn, c.name as class_name,
64
- f.project_id as project_id
65
- """
66
- )
67
-
68
- # Index: (method_name, param_count) → list of (method_id, class_fqcn, project_id)
69
- name_arity_index: dict[tuple[str, int], list[dict]] = defaultdict(list)
70
- for m in all_methods:
71
- sig = m.get("sig") or ""
72
- arg_str = sig[sig.find("(") + 1: sig.rfind(")")] if "(" in sig and ")" in sig else ""
73
- pc = 0 if not arg_str.strip() else arg_str.count(",") + 1
74
- name_arity_index[(m["name"], pc)].append({
75
- "mid": m["mid"],
76
- "class_fqcn": m.get("class_fqcn", ""),
77
- "class_name": m.get("class_name", ""),
78
- "project_id": m.get("project_id", ""),
79
- })
80
-
81
- # ── 2. Class FQCN → project mapping ─────────────────────────────────
82
- all_classes = store.query_records(
83
- """
84
- MATCH (c:Class), (f:File)
85
- WHERE c.file_id = f.id
86
- RETURN c.fqcn as fqcn, c.name as name, f.project_id as project_id
87
- """
88
- )
89
- fqcn_to_project: dict[str, str] = {}
90
- class_name_to_fqcns: dict[str, list[str]] = defaultdict(list)
91
- for c in all_classes:
92
- fqcn_to_project[c["fqcn"]] = c["project_id"]
93
- class_name_to_fqcns[c["name"]].append(c["fqcn"])
94
-
95
- # ── 3. Find methods with 0 outgoing calls (potential unresolved) ────
96
- # We only look at methods that have NO outgoing CALLS edges — these are
97
- # the ones whose invocations could not be resolved within their own module.
98
- zero_out = store.query_records(
99
- """
100
- MATCH (m:Method), (c:Class), (f:File)
101
- WHERE m.class_id = c.id AND c.file_id = f.id
102
- AND NOT EXISTS { MATCH (m)-[:CALLS]->(:Method) }
103
- RETURN m.id as mid, m.name as name, m.signature as sig,
104
- c.fqcn as class_fqcn, c.id as class_id,
105
- f.project_id as project_id, f.id as file_id
106
- """
107
- )
108
-
109
- # ── 4. Build per-file import set from REFERENCES_TYPE edges ─────────
110
- # A class referencing another class implies the source file imports it.
111
- refs = store.query_records(
112
- """
113
- MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class)
114
- RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
115
- """
116
- )
117
- file_imports: dict[str, set[str]] = defaultdict(set)
118
- for r in refs:
119
- file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
120
- file_imports[r["file_id"]].add(r.get("target_name", ""))
121
-
122
- # Also gather IMPLEMENTS edges for broader coverage
123
- impl_refs = store.query_records(
124
- """
125
- MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class)
126
- RETURN src.file_id as file_id, dst.fqcn as target_fqcn, dst.name as target_name
127
- """
128
- )
129
- for r in impl_refs:
130
- file_imports[r["file_id"]].add(r.get("target_fqcn", ""))
131
- file_imports[r["file_id"]].add(r.get("target_name", ""))
132
-
133
- # ── 5. Attempt cross-module resolution ──────────────────────────────
134
- new_edges = 0
135
- seen_pairs: set[tuple[str, str]] = set()
136
-
137
- for m in zero_out:
138
- sig = m.get("sig") or ""
139
- # We cannot know which methods THIS method calls without re-parsing.
140
- # Heuristic: skip this method if it has no imports from other projects.
141
- fid = m.get("file_id", "")
142
- src_pid = m.get("project_id", "")
143
- imported_fqcns = file_imports.get(fid, set())
144
-
145
- # Find classes from OTHER projects that this file references
146
- cross_project_classes = set()
147
- for fqcn in imported_fqcns:
148
- target_pid = fqcn_to_project.get(fqcn, "")
149
- if target_pid and target_pid != src_pid:
150
- cross_project_classes.add(fqcn)
151
-
152
- if not cross_project_classes:
153
- continue
154
-
155
- # For each cross-project class, find its methods and see if any
156
- # match common call patterns. We use name + arity matching.
157
- # Since we don't have the actual calls, we create edges from this
158
- # method to methods in the target classes that share a name.
159
- # This is conservative: we only link if there's exactly 1 candidate.
160
- for target_fqcn in cross_project_classes:
161
- target_pid = fqcn_to_project.get(target_fqcn, "")
162
- for (mname, pc), candidates in name_arity_index.items():
163
- matching = [
164
- c for c in candidates
165
- if c["class_fqcn"] == target_fqcn and c["project_id"] == target_pid
166
- ]
167
- if len(matching) == 1:
168
- src_mid = m["mid"]
169
- dst_mid = matching[0]["mid"]
170
- pair = (src_mid, dst_mid)
171
- if pair in seen_pairs:
172
- continue
173
- # Only link if the method has an outgoing reference that
174
- # plausibly invokes this target (name substring match in sig)
175
- # This avoids noise from linking random unrelated methods
176
- seen_pairs.add(pair)
177
-
178
- # For a more targeted approach: use REFERENCES_TYPE at CLASS level to
179
- # create cross-module CALLS at METHOD level where signatures match.
180
- xmod_class_pairs = store.query_records(
181
- """
182
- MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
183
- WHERE src.file_id = sf.id AND dst.file_id = df.id
184
- AND sf.project_id <> df.project_id
185
- RETURN src.id as src_cid, dst.id as dst_cid,
186
- sf.project_id as src_pid, df.project_id as dst_pid
187
- """
188
- )
189
-
190
- for pair in xmod_class_pairs:
191
- src_methods = store.query_records(
192
- "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
193
- {"cid": pair["src_cid"]},
194
- )
195
- dst_methods = store.query_records(
196
- "MATCH (m:Method) WHERE m.class_id = $cid RETURN m.id as mid, m.name as name, m.signature as sig",
197
- {"cid": pair["dst_cid"]},
198
- )
199
-
200
- # Build name+arity index for destination class
201
- dst_by_name_arity: dict[tuple[str, int], list[str]] = defaultdict(list)
202
- for dm in dst_methods:
203
- dsig = dm.get("sig") or ""
204
- darg = dsig[dsig.find("(") + 1: dsig.rfind(")")] if "(" in dsig and ")" in dsig else ""
205
- dpc = 0 if not darg.strip() else darg.count(",") + 1
206
- dst_by_name_arity[(dm["name"], dpc)].append(dm["mid"])
207
-
208
- for sm in src_methods:
209
- ssig = sm.get("sig") or ""
210
- sarg = ssig[ssig.find("(") + 1: ssig.rfind(")")] if "(" in ssig and ")" in ssig else ""
211
- spc = 0 if not sarg.strip() else sarg.count(",") + 1
212
-
213
- # Check if any destination method name appears as a substring
214
- # in the source method's signature (crude but low false-positive)
215
- for (dname, dpc), dst_ids in dst_by_name_arity.items():
216
- if len(dst_ids) != 1:
217
- continue
218
- dst_mid = dst_ids[0]
219
- edge_pair = (sm["mid"], dst_mid)
220
- if edge_pair in seen_pairs:
221
- continue
222
- seen_pairs.add(edge_pair)
223
- try:
224
- store.add_call(sm["mid"], dst_mid, 0.6, "cross_module_import")
225
- new_edges += 1
226
- except Exception as exc:
227
- LOGGER.debug("Cross-module edge failed: %s", exc)
228
-
229
- LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
230
- return new_edges
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes