codespine 0.5.1__tar.gz → 0.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {codespine-0.5.1 → codespine-0.5.3}/PKG-INFO +1 -1
  2. {codespine-0.5.1 → codespine-0.5.3}/codespine/__init__.py +1 -1
  3. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/community.py +14 -1
  4. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/coupling.py +8 -0
  5. codespine-0.5.3/codespine/analysis/crossmodule.py +194 -0
  6. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/flow.py +11 -2
  7. {codespine-0.5.1 → codespine-0.5.3}/codespine/cli.py +37 -11
  8. {codespine-0.5.1 → codespine-0.5.3}/codespine/db/store.py +25 -10
  9. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/PKG-INFO +1 -1
  10. {codespine-0.5.1 → codespine-0.5.3}/pyproject.toml +1 -1
  11. codespine-0.5.1/codespine/analysis/crossmodule.py +0 -173
  12. {codespine-0.5.1 → codespine-0.5.3}/LICENSE +0 -0
  13. {codespine-0.5.1 → codespine-0.5.3}/README.md +0 -0
  14. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/__init__.py +0 -0
  15. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/context.py +0 -0
  16. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/deadcode.py +0 -0
  17. {codespine-0.5.1 → codespine-0.5.3}/codespine/analysis/impact.py +0 -0
  18. {codespine-0.5.1 → codespine-0.5.3}/codespine/config.py +0 -0
  19. {codespine-0.5.1 → codespine-0.5.3}/codespine/db/__init__.py +0 -0
  20. {codespine-0.5.1 → codespine-0.5.3}/codespine/db/schema.py +0 -0
  21. {codespine-0.5.1 → codespine-0.5.3}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.5.1 → codespine-0.5.3}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.5.1 → codespine-0.5.3}/codespine/indexer/__init__.py +0 -0
  24. {codespine-0.5.1 → codespine-0.5.3}/codespine/indexer/call_resolver.py +0 -0
  25. {codespine-0.5.1 → codespine-0.5.3}/codespine/indexer/engine.py +0 -0
  26. {codespine-0.5.1 → codespine-0.5.3}/codespine/indexer/java_parser.py +0 -0
  27. {codespine-0.5.1 → codespine-0.5.3}/codespine/indexer/symbol_builder.py +0 -0
  28. {codespine-0.5.1 → codespine-0.5.3}/codespine/mcp/__init__.py +0 -0
  29. {codespine-0.5.1 → codespine-0.5.3}/codespine/mcp/server.py +0 -0
  30. {codespine-0.5.1 → codespine-0.5.3}/codespine/noise/__init__.py +0 -0
  31. {codespine-0.5.1 → codespine-0.5.3}/codespine/noise/blocklist.py +0 -0
  32. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/__init__.py +0 -0
  33. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/bm25.py +0 -0
  34. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/fuzzy.py +0 -0
  35. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/hybrid.py +0 -0
  36. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/rrf.py +0 -0
  37. {codespine-0.5.1 → codespine-0.5.3}/codespine/search/vector.py +0 -0
  38. {codespine-0.5.1 → codespine-0.5.3}/codespine/watch/__init__.py +0 -0
  39. {codespine-0.5.1 → codespine-0.5.3}/codespine/watch/watcher.py +0 -0
  40. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/SOURCES.txt +0 -0
  41. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/dependency_links.txt +0 -0
  42. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/entry_points.txt +0 -0
  43. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/requires.txt +0 -0
  44. {codespine-0.5.1 → codespine-0.5.3}/codespine.egg-info/top_level.txt +0 -0
  45. {codespine-0.5.1 → codespine-0.5.3}/gindex.py +0 -0
  46. {codespine-0.5.1 → codespine-0.5.3}/setup.cfg +0 -0
  47. {codespine-0.5.1 → codespine-0.5.3}/tests/test_branch_diff_normalize.py +0 -0
  48. {codespine-0.5.1 → codespine-0.5.3}/tests/test_call_resolver.py +0 -0
  49. {codespine-0.5.1 → codespine-0.5.3}/tests/test_index_and_hybrid.py +0 -0
  50. {codespine-0.5.1 → codespine-0.5.3}/tests/test_java_parser.py +0 -0
  51. {codespine-0.5.1 → codespine-0.5.3}/tests/test_multimodule_index.py +0 -0
  52. {codespine-0.5.1 → codespine-0.5.3}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.1"
4
+ __version__ = "0.5.3"
@@ -3,8 +3,14 @@ from __future__ import annotations
3
3
  from collections import defaultdict
4
4
 
5
5
 
6
- def detect_communities(store) -> list[dict]:
6
+ def detect_communities(store, progress=None) -> list[dict]:
7
+ def _ping(msg: str) -> None:
8
+ if progress:
9
+ progress(msg)
10
+
11
+ _ping("loading symbols")
7
12
  symbols = store.query_records("MATCH (s:Symbol) RETURN s.id as id, s.fqname as fqname")
13
+ _ping(f"{len(symbols)} symbols, loading edges")
8
14
  edges = store.query_records(
9
15
  """
10
16
  MATCH (a:Method)-[:CALLS]->(b:Method)
@@ -17,6 +23,7 @@ def detect_communities(store) -> list[dict]:
17
23
  ids = [s["id"] for s in symbols]
18
24
  index_of = {sid: i for i, sid in enumerate(ids)}
19
25
 
26
+ _ping(f"{len(edges)} edges, clustering")
20
27
  membership: dict[str, int] = {}
21
28
  try:
22
29
  import igraph as ig
@@ -44,11 +51,17 @@ def detect_communities(store) -> list[dict]:
44
51
  for sid, cid in membership.items():
45
52
  grouped[cid].append(sid)
46
53
 
54
+ _ping(f"{len(grouped)} clusters, persisting")
47
55
  communities: list[dict] = []
56
+ done_clusters = 0
57
+ total_clusters = len(grouped)
48
58
  for cid, symbol_ids in grouped.items():
49
59
  cohesion = 1.0 / max(len(symbol_ids), 1)
50
60
  label = f"community_{cid}"
51
61
  store.set_community(str(cid), label, cohesion, symbol_ids)
62
+ done_clusters += 1
63
+ if done_clusters % 200 == 0 or done_clusters == total_clusters:
64
+ _ping(f"persisting {done_clusters}/{total_clusters} clusters")
52
65
  communities.append(
53
66
  {
54
67
  "community_id": str(cid),
@@ -46,11 +46,18 @@ def compute_coupling(
46
46
  months: int = SETTINGS.default_coupling_months,
47
47
  min_strength: float = SETTINGS.default_min_coupling_strength,
48
48
  min_cochanges: int = SETTINGS.default_min_cochanges,
49
+ progress=None,
49
50
  ) -> list[dict]:
51
+ def _ping(msg: str) -> None:
52
+ if progress:
53
+ progress(msg)
54
+
55
+ _ping("reading git history")
50
56
  changesets = _git_changed_file_sets(repo_path, months)
51
57
  if not changesets:
52
58
  return []
53
59
 
60
+ _ping(f"{len(changesets)} commits, computing co-changes")
54
61
  file_changes = Counter()
55
62
  co_changes: Counter[tuple[str, str]] = Counter()
56
63
 
@@ -60,6 +67,7 @@ def compute_coupling(
60
67
  for a, b in itertools.combinations(sorted(cs), 2):
61
68
  co_changes[(a, b)] += 1
62
69
 
70
+ _ping(f"{len(co_changes)} pairs, filtering and persisting")
63
71
  results = []
64
72
  for (a, b), pair_count in co_changes.items():
65
73
  denom = max(file_changes[a], file_changes[b])
@@ -0,0 +1,194 @@
1
+ """Cross-module call edge linker.
2
+
3
+ After all modules in a workspace have been individually indexed, each module's
4
+ call resolver only sees methods *within that module* (the class/method catalogs
5
+ are project-scoped). This module fills the gap by:
6
+
7
+ 1. Building a **global** class-name index across ALL projects.
8
+ 2. Scanning every method's signature and return type for class names that
9
+ belong to a DIFFERENT project.
10
+ 3. Creating CALLS edges between the referencing method and the methods of
11
+ the referenced class.
12
+
13
+ Two linking strategies are applied:
14
+
15
+ Strategy A — Name + arity match (confidence 0.7)
16
+ The referencing method M_src calls a method with the same name AND
17
+ parameter count as a method M_dst in the referenced class. This catches
18
+ delegation, interface-implementation forwarding, and adapter patterns.
19
+
20
+ Strategy B — Type-reference fallback (confidence 0.4)
21
+ For every *public, non-constructor* method in the referenced class that
22
+ received NO name-match edge, create ONE low-confidence edge from the
23
+ referencing method. This prevents methods that are genuinely used
24
+ cross-module from appearing as dead code.
25
+ """
26
+ from __future__ import annotations
27
+
28
+ import logging
29
+ import re
30
+ from collections import defaultdict
31
+
32
+ LOGGER = logging.getLogger(__name__)
33
+
34
+ # Very short class names produce too many false-positive matches when scanned
35
+ # as substrings of method signatures. Skip names ≤ this length.
36
+ _MIN_CLASS_NAME_LEN = 4
37
+
38
+ # Regex to split a Java signature into word tokens (class names, keywords, etc.)
39
+ _TOKEN_RE = re.compile(r"[A-Za-z_][A-Za-z0-9_]*")
40
+
41
+
42
+ def _param_count(sig: str) -> int:
43
+ """Count parameters from a method signature string."""
44
+ if not sig or "(" not in sig or ")" not in sig:
45
+ return 0
46
+ arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
47
+ return 0 if not arg_str.strip() else arg_str.count(",") + 1
48
+
49
+
50
+ def link_cross_module_calls(store, project_ids: list[str] | None = None, progress=None) -> int:
51
+ """Create CALLS edges between methods in different projects.
52
+
53
+ Returns the number of new cross-module call edges created.
54
+ *progress* is an optional ``(status_str) -> None`` callback for live updates.
55
+ """
56
+ def _ping(msg: str) -> None:
57
+ if progress:
58
+ progress(msg)
59
+ if project_ids is None:
60
+ proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
61
+ project_ids = [r["id"] for r in proj_recs]
62
+
63
+ if len(project_ids) < 2:
64
+ LOGGER.info(
65
+ "Only %d project(s) indexed — skipping cross-module linking.",
66
+ len(project_ids),
67
+ )
68
+ return 0
69
+
70
+ # ── 1. Global class index ─────────────────────────────────────────
71
+ all_classes = store.query_records(
72
+ """
73
+ MATCH (c:Class), (f:File)
74
+ WHERE c.file_id = f.id
75
+ RETURN c.id as cid, c.name as name, c.fqcn as fqcn, f.project_id as pid
76
+ """
77
+ )
78
+
79
+ _ping(f"building class index ({len(all_classes)} classes)")
80
+
81
+ # class_name → [(class_id, project_id)]
82
+ name_to_classes: dict[str, list[tuple[str, str]]] = defaultdict(list)
83
+ for c in all_classes:
84
+ name_to_classes[c["name"]].append((c["cid"], c["pid"]))
85
+
86
+ # ── 2. Per-project class name sets (for O(1) lookups) ─────────────
87
+ # For each project pair (src, dst), we need the set of class names
88
+ # that belong to the OTHER project(s). Pre-compute per-project sets.
89
+ classes_per_project: dict[str, set[str]] = defaultdict(set)
90
+ for c in all_classes:
91
+ if len(c["name"]) > _MIN_CLASS_NAME_LEN:
92
+ classes_per_project[c["pid"]].add(c["name"])
93
+
94
+ # ── 3. Scan methods for cross-project type references ─────────────
95
+ new_edges = 0
96
+ seen: set[tuple[str, str]] = set()
97
+
98
+ for src_pid in project_ids:
99
+ # Build the set of "interesting" class names from OTHER projects
100
+ other_class_names: set[str] = set()
101
+ for other_pid in project_ids:
102
+ if other_pid != src_pid:
103
+ other_class_names |= classes_per_project.get(other_pid, set())
104
+
105
+ if not other_class_names:
106
+ continue
107
+
108
+ _ping(f"scanning {src_pid} methods")
109
+
110
+ # Fetch all methods in this project
111
+ src_methods = store.query_records(
112
+ """
113
+ MATCH (m:Method), (c:Class), (f:File)
114
+ WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
115
+ RETURN m.id as mid, m.name as name, m.signature as sig,
116
+ m.return_type as rtype, c.id as cid
117
+ """,
118
+ {"pid": src_pid},
119
+ )
120
+
121
+ for sm in src_methods:
122
+ sig = sm.get("sig") or ""
123
+ rtype = sm.get("rtype") or ""
124
+ # Tokenize signature + return type into words
125
+ tokens = set(_TOKEN_RE.findall(sig + " " + rtype))
126
+ # Find which class names from other projects appear in the tokens
127
+ matched_class_names = tokens & other_class_names
128
+ if not matched_class_names:
129
+ continue
130
+
131
+ # For each matched class, create CALLS edges
132
+ for class_name in matched_class_names:
133
+ for dst_cid, dst_pid in name_to_classes.get(class_name, []):
134
+ if dst_pid == src_pid:
135
+ continue # same project — not cross-module
136
+
137
+ # Get methods of the destination class
138
+ dst_methods = store.query_records(
139
+ """MATCH (m:Method) WHERE m.class_id = $cid
140
+ RETURN m.id as mid, m.name as name, m.signature as sig,
141
+ m.modifiers as modifiers, m.is_constructor as is_ctor""",
142
+ {"cid": dst_cid},
143
+ )
144
+ if not dst_methods:
145
+ continue
146
+
147
+ # Strategy A: name + arity match
148
+ matched_dst_mids: set[str] = set()
149
+ sm_name = sm["name"]
150
+ sm_pc = _param_count(sm.get("sig") or "")
151
+ for dm in dst_methods:
152
+ if dm["name"] == sm_name:
153
+ dm_pc = _param_count(dm.get("sig") or "")
154
+ if dm_pc == sm_pc:
155
+ pair = (sm["mid"], dm["mid"])
156
+ if pair not in seen:
157
+ seen.add(pair)
158
+ try:
159
+ store.add_call(
160
+ sm["mid"], dm["mid"],
161
+ 0.7, "cross_module_name_match",
162
+ )
163
+ new_edges += 1
164
+ except Exception as exc:
165
+ LOGGER.debug("Name-match edge failed: %s", exc)
166
+ matched_dst_mids.add(dm["mid"])
167
+
168
+ # Strategy B: fallback for unmatched public dst methods
169
+ for dm in dst_methods:
170
+ if dm["mid"] in matched_dst_mids:
171
+ continue
172
+ if dm.get("is_ctor"):
173
+ continue
174
+ mods = dm.get("modifiers") or []
175
+ mod_strs = {str(m).strip() for m in mods} if mods else set()
176
+ if "private" in mod_strs:
177
+ continue
178
+
179
+ pair = (sm["mid"], dm["mid"])
180
+ if pair in seen:
181
+ continue
182
+ seen.add(pair)
183
+ try:
184
+ store.add_call(
185
+ sm["mid"], dm["mid"],
186
+ 0.4, "cross_module_type_ref",
187
+ )
188
+ new_edges += 1
189
+ except Exception as exc:
190
+ LOGGER.debug("Fallback edge failed: %s", exc)
191
+
192
+ _ping(f"{new_edges} edges created")
193
+ LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
194
+ return new_edges
@@ -48,7 +48,12 @@ def _entry_methods(store, project: str | None = None) -> list[str]:
48
48
  return [r["id"] for r in fallback]
49
49
 
50
50
 
51
- def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None) -> list[dict]:
51
+ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int = 6, project: str | None = None, progress=None) -> list[dict]:
52
+ def _ping(msg: str) -> None:
53
+ if progress:
54
+ progress(msg)
55
+
56
+ _ping("loading call graph")
52
57
  edges = store.query_records(
53
58
  """
54
59
  MATCH (a:Method)-[:CALLS]->(b:Method)
@@ -85,8 +90,11 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
85
90
  else:
86
91
  entries = _entry_methods(store, project=project)
87
92
 
93
+ _ping(f"{len(entries)} entry points, tracing")
88
94
  flows = []
89
- for e in entries:
95
+ for idx, e in enumerate(entries):
96
+ if idx % 50 == 0 and idx > 0:
97
+ _ping(f"traced {idx}/{len(entries)} entry points")
90
98
  visited = {e}
91
99
  q = deque([(e, 0)])
92
100
  nodes_with_depth = [(e, 0)]
@@ -115,6 +123,7 @@ def trace_execution_flows(store, entry_symbol: str | None = None, max_depth: int
115
123
  # need a second round-trip to resolve raw method ID hashes.
116
124
  # Collect all unique IDs across all flows, resolve in one bulk query.
117
125
  # ------------------------------------------------------------------ #
126
+ _ping(f"{len(flows)} flows, enriching metadata")
118
127
  all_ids = list({node["symbol"] for flow in flows for node in flow["nodes"]})
119
128
  meta = _resolve_method_metadata(store, all_ids)
120
129
 
@@ -217,13 +217,25 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
217
217
  elif parse_state["indexed"] < parse_state["total"]:
218
218
  _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
219
219
 
220
+ # ── Helper for in-place progress updates ────────────────────────────
221
+ def _live_phase(label: str, status: str) -> None:
222
+ """Overwrite the current line with a status update."""
223
+ click.echo(f"\r{label:<30} {status:<50}", nl=False)
224
+
225
+ def _finish_phase(label: str, result: str) -> None:
226
+ """Finalise an in-place phase line and move to the next line."""
227
+ click.echo(f"\r{label:<30} {result:<50}")
228
+
220
229
  # ── Cross-module call linking ──────────────────────────────────────
221
- # When multiple modules/projects are indexed, attempt to resolve call
222
- # edges that span module boundaries using import + REFERENCES_TYPE info.
223
230
  if is_multi and len(modules_with_ids) > 1:
231
+ xmod_label = "Cross-module linking..."
232
+ _live_phase(xmod_label, "running")
224
233
  xmod_pids = [pid for _, pid in modules_with_ids]
225
- xmod_edges = link_cross_module_calls(store, project_ids=xmod_pids)
226
- _phase("Cross-module linking...", f"{xmod_edges} cross-module call edges")
234
+ xmod_edges = link_cross_module_calls(
235
+ store, project_ids=xmod_pids,
236
+ progress=lambda s: _live_phase(xmod_label, s),
237
+ )
238
+ _finish_phase(xmod_label, f"{xmod_edges} cross-module call edges")
227
239
  else:
228
240
  _phase("Cross-module linking...", "skipped (single module)")
229
241
 
@@ -234,16 +246,29 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
234
246
 
235
247
  should_run_deep = deep or total_files_found <= 1200
236
248
  if should_run_deep:
237
- communities = detect_communities(store)
238
- _phase("Detecting communities...", f"{len(communities)} clusters found")
249
+ comm_label = "Detecting communities..."
250
+ _live_phase(comm_label, "running")
251
+ communities = detect_communities(
252
+ store,
253
+ progress=lambda s: _live_phase(comm_label, s),
254
+ )
255
+ _finish_phase(comm_label, f"{len(communities)} clusters found")
239
256
 
240
- flows = trace_execution_flows(store)
241
- _phase("Detecting execution flows...", f"{len(flows)} processes found")
257
+ flow_label = "Detecting execution flows..."
258
+ _live_phase(flow_label, "running")
259
+ flows = trace_execution_flows(
260
+ store,
261
+ progress=lambda s: _live_phase(flow_label, s),
262
+ )
263
+ _finish_phase(flow_label, f"{len(flows)} processes found")
242
264
 
265
+ dead_label = "Finding dead code..."
266
+ _live_phase(dead_label, "running")
243
267
  dead = detect_dead_code(store, limit=500)
244
- _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
268
+ _finish_phase(dead_label, f"{len(dead)} unreachable symbols")
245
269
 
246
- # Use the root path for git coupling; fallback to the single module path
270
+ coup_label = "Analyzing git history..."
271
+ _live_phase(coup_label, "running")
247
272
  coupling_root = abs_path
248
273
  coupling_project = root_basename if is_multi else (last_result.project_id if last_result else root_basename)
249
274
  coupling_pairs = compute_coupling(
@@ -253,8 +278,9 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
253
278
  months=SETTINGS.default_coupling_months,
254
279
  min_strength=SETTINGS.default_min_coupling_strength,
255
280
  min_cochanges=SETTINGS.default_min_cochanges,
281
+ progress=lambda s: _live_phase(coup_label, s),
256
282
  )
257
- _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
283
+ _finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
258
284
  else:
259
285
  _phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
260
286
  _phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
@@ -17,7 +17,7 @@ from codespine.db.schema import ensure_schema
17
17
 
18
18
  LOGGER = logging.getLogger(__name__)
19
19
 
20
- _BUFFER_POOL_SIZE = 256 * 1024 * 1024 # 256 MB – small enough for page eviction to work
20
+ _BUFFER_POOL_SIZE = 512 * 1024 * 1024 # 512 MB – room for large community detection
21
21
 
22
22
 
23
23
  @dataclass
@@ -298,15 +298,23 @@ class GraphStore:
298
298
  )
299
299
  self.execute(query, {"src_id": src_id, "dst_id": dst_id, "confidence": confidence})
300
300
 
301
+ def _recycle_conn(self) -> None:
302
+ """Drop and recreate the per-thread connection to release buffer pages."""
303
+ try:
304
+ if hasattr(self._tls, "conn") and self._tls.conn is not None:
305
+ self._tls.conn = None
306
+ except Exception:
307
+ pass
308
+
301
309
  def set_community(self, community_id: str, label: str, cohesion: float, symbol_ids: list[str]) -> None:
302
310
  self.execute(
303
311
  "MERGE (c:Community {id: $id}) SET c.label = $label, c.cohesion = $cohesion",
304
312
  {"id": community_id, "label": label, "cohesion": cohesion},
305
313
  )
306
- # Commit in batches of 50 to keep Kuzu's buffer pool from OOMing on large
307
- # communities. A single transaction over thousands of MERGE statements exhausts
308
- # the 256 MB buffer pool before it can page out.
309
- _BATCH = 50
314
+ # Commit in batches of 500 to keep Kuzu's buffer pool from OOMing on
315
+ # large communities. After each batch, recycle the connection so Kuzu
316
+ # can release buffer pages accumulated during the transaction.
317
+ _BATCH = 500
310
318
  for i in range(0, len(symbol_ids), _BATCH):
311
319
  batch = symbol_ids[i : i + _BATCH]
312
320
  with self.transaction():
@@ -315,17 +323,24 @@ class GraphStore:
315
323
  "MATCH (s:Symbol {id: $sid}), (c:Community {id: $cid}) MERGE (s)-[:IN_COMMUNITY]->(c)",
316
324
  {"sid": sid, "cid": community_id},
317
325
  )
326
+ # Recycle connection after each batch to let Kuzu free buffer pages
327
+ self._recycle_conn()
318
328
 
319
329
  def set_flow(self, flow_id: str, entry_symbol_id: str, kind: str, symbols_at_depth: list[tuple[str, int]]) -> None:
320
330
  self.execute(
321
331
  "MERGE (f:Flow {id: $id}) SET f.entry_symbol_id = $entry, f.kind = $kind",
322
332
  {"id": flow_id, "entry": entry_symbol_id, "kind": kind},
323
333
  )
324
- for sid, depth in symbols_at_depth:
325
- self.execute(
326
- "MATCH (s:Symbol {id: $sid}), (f:Flow {id: $fid}) MERGE (s)-[:IN_FLOW {depth: $depth}]->(f)",
327
- {"sid": sid, "fid": flow_id, "depth": int(depth)},
328
- )
334
+ _BATCH = 500
335
+ for i in range(0, len(symbols_at_depth), _BATCH):
336
+ batch = symbols_at_depth[i : i + _BATCH]
337
+ with self.transaction():
338
+ for sid, depth in batch:
339
+ self.execute(
340
+ "MATCH (s:Symbol {id: $sid}), (f:Flow {id: $fid}) MERGE (s)-[:IN_FLOW {depth: $depth}]->(f)",
341
+ {"sid": sid, "fid": flow_id, "depth": int(depth)},
342
+ )
343
+ self._recycle_conn()
329
344
 
330
345
  def upsert_coupling(self, file_a: str, file_b: str, strength: float, cochanges: int, months: int) -> None:
331
346
  self.execute(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.1
3
+ Version: 0.5.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.5.1"
7
+ version = "0.5.3"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,173 +0,0 @@
1
- """Cross-module call edge linker.
2
-
3
- After all modules in a workspace have been individually indexed, each module's
4
- call resolver only sees methods within that module. This module fills the gap
5
- by scanning the graph for cross-project class references (REFERENCES_TYPE and
6
- IMPLEMENTS edges) and creating CALLS edges between methods where the call is
7
- plausible.
8
-
9
- Strategy A — Name + arity match (confidence 0.7)
10
- If src_class references dst_class (cross-project) and both have a method
11
- with the same name and same parameter count, create a CALLS edge. This
12
- catches delegation, interface-implementation forwarding, and adapter
13
- patterns.
14
-
15
- Strategy B — Type-reference fallback (confidence 0.4)
16
- For each *public* method in dst_class that received NO name-match edge,
17
- create ONE low-confidence edge from a representative src method (preferring
18
- one with zero outgoing calls). This prevents methods that are genuinely
19
- used cross-module from appearing as dead code.
20
- """
21
- from __future__ import annotations
22
-
23
- import logging
24
- from collections import defaultdict
25
-
26
- LOGGER = logging.getLogger(__name__)
27
-
28
-
29
- def _param_count(sig: str) -> int:
30
- """Count parameters from a method signature string."""
31
- if not sig or "(" not in sig or ")" not in sig:
32
- return 0
33
- arg_str = sig[sig.find("(") + 1: sig.rfind(")")]
34
- return 0 if not arg_str.strip() else arg_str.count(",") + 1
35
-
36
-
37
- def link_cross_module_calls(store, project_ids: list[str] | None = None) -> int:
38
- """Create CALLS edges between methods in different projects.
39
-
40
- Returns the number of new cross-module call edges created.
41
- """
42
- if project_ids is None:
43
- proj_recs = store.query_records("MATCH (p:Project) RETURN p.id as id")
44
- project_ids = [r["id"] for r in proj_recs]
45
-
46
- if len(project_ids) < 2:
47
- LOGGER.info(
48
- "Only %d project(s) indexed — skipping cross-module linking.",
49
- len(project_ids),
50
- )
51
- return 0
52
-
53
- # ── 1. Collect cross-project class pairs ──────────────────────────
54
- ref_pairs = store.query_records(
55
- """
56
- MATCH (src:Class)-[:REFERENCES_TYPE]->(dst:Class), (sf:File), (df:File)
57
- WHERE src.file_id = sf.id AND dst.file_id = df.id
58
- AND sf.project_id <> df.project_id
59
- RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
60
- """
61
- )
62
- impl_pairs = store.query_records(
63
- """
64
- MATCH (src:Class)-[:IMPLEMENTS]->(dst:Class), (sf:File), (df:File)
65
- WHERE src.file_id = sf.id AND dst.file_id = df.id
66
- AND sf.project_id <> df.project_id
67
- RETURN DISTINCT src.id as src_cid, dst.id as dst_cid
68
- """
69
- )
70
-
71
- all_pairs: set[tuple[str, str]] = set()
72
- for p in ref_pairs:
73
- all_pairs.add((p["src_cid"], p["dst_cid"]))
74
- for p in impl_pairs:
75
- all_pairs.add((p["src_cid"], p["dst_cid"]))
76
-
77
- if not all_pairs:
78
- LOGGER.info("No cross-project class references found.")
79
- return 0
80
-
81
- LOGGER.info(
82
- "Cross-module: %d cross-project class pair(s) to process.",
83
- len(all_pairs),
84
- )
85
-
86
- # ── 2. Process each class pair ────────────────────────────────────
87
- new_edges = 0
88
- seen: set[tuple[str, str]] = set()
89
-
90
- for src_cid, dst_cid in all_pairs:
91
- src_methods = store.query_records(
92
- """MATCH (m:Method) WHERE m.class_id = $cid
93
- RETURN m.id as mid, m.name as name, m.signature as sig""",
94
- {"cid": src_cid},
95
- )
96
- dst_methods = store.query_records(
97
- """MATCH (m:Method) WHERE m.class_id = $cid
98
- RETURN m.id as mid, m.name as name, m.signature as sig,
99
- m.modifiers as modifiers, m.is_constructor as is_ctor""",
100
- {"cid": dst_cid},
101
- )
102
- if not src_methods or not dst_methods:
103
- continue
104
-
105
- # Build name → methods index for src class
106
- src_by_name: dict[str, list[dict]] = defaultdict(list)
107
- for sm in src_methods:
108
- src_by_name[sm["name"]].append(sm)
109
-
110
- # ── Strategy A: name + arity matching ─────────────────────────
111
- matched_dst_mids: set[str] = set()
112
-
113
- for dm in dst_methods:
114
- dm_name = dm["name"]
115
- dm_pc = _param_count(dm.get("sig") or "")
116
- candidates = src_by_name.get(dm_name, [])
117
- for sm in candidates:
118
- sm_pc = _param_count(sm.get("sig") or "")
119
- if sm_pc == dm_pc:
120
- pair = (sm["mid"], dm["mid"])
121
- if pair in seen:
122
- matched_dst_mids.add(dm["mid"])
123
- continue
124
- seen.add(pair)
125
- try:
126
- store.add_call(
127
- sm["mid"], dm["mid"], 0.7, "cross_module_name_match",
128
- )
129
- new_edges += 1
130
- matched_dst_mids.add(dm["mid"])
131
- except Exception as exc:
132
- LOGGER.debug("Name-match edge failed: %s", exc)
133
-
134
- # ── Strategy B: fallback for unmatched public dst methods ─────
135
- # Find a representative caller: prefer src methods with 0 outgoing calls
136
- fallback_src = None
137
- for sm in src_methods:
138
- out = store.query_records(
139
- "MATCH (m:Method {id: $mid})-[:CALLS]->(:Method) RETURN count(*) as n",
140
- {"mid": sm["mid"]},
141
- )
142
- if out and out[0]["n"] == 0:
143
- fallback_src = sm
144
- break
145
- if fallback_src is None and src_methods:
146
- fallback_src = src_methods[0]
147
-
148
- if fallback_src:
149
- for dm in dst_methods:
150
- if dm["mid"] in matched_dst_mids:
151
- continue
152
- # Skip constructors and private methods
153
- if dm.get("is_ctor"):
154
- continue
155
- mods = dm.get("modifiers") or []
156
- mod_strs = {str(m).strip() for m in mods} if mods else set()
157
- if "private" in mod_strs:
158
- continue
159
-
160
- pair = (fallback_src["mid"], dm["mid"])
161
- if pair in seen:
162
- continue
163
- seen.add(pair)
164
- try:
165
- store.add_call(
166
- fallback_src["mid"], dm["mid"], 0.4, "cross_module_type_ref",
167
- )
168
- new_edges += 1
169
- except Exception as exc:
170
- LOGGER.debug("Fallback edge failed: %s", exc)
171
-
172
- LOGGER.info("Cross-module linking: created %d new call edges.", new_edges)
173
- return new_edges
File without changes
File without changes
File without changes
File without changes
File without changes