codespine 0.1.5__tar.gz → 0.1.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {codespine-0.1.5 → codespine-0.1.8}/PKG-INFO +1 -1
  2. {codespine-0.1.5 → codespine-0.1.8}/codespine/__init__.py +1 -1
  3. {codespine-0.1.5 → codespine-0.1.8}/codespine/cli.py +56 -21
  4. {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/call_resolver.py +23 -13
  5. {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/engine.py +80 -30
  6. codespine-0.1.8/codespine/indexer/symbol_builder.py +35 -0
  7. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/PKG-INFO +1 -1
  8. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/SOURCES.txt +1 -0
  9. {codespine-0.1.5 → codespine-0.1.8}/pyproject.toml +1 -1
  10. codespine-0.1.8/tests/test_call_resolver.py +43 -0
  11. codespine-0.1.8/tests/test_multimodule_index.py +55 -0
  12. codespine-0.1.5/codespine/indexer/symbol_builder.py +0 -32
  13. codespine-0.1.5/tests/test_call_resolver.py +0 -30
  14. {codespine-0.1.5 → codespine-0.1.8}/LICENSE +0 -0
  15. {codespine-0.1.5 → codespine-0.1.8}/README.md +0 -0
  16. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/__init__.py +0 -0
  17. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/community.py +0 -0
  18. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/context.py +0 -0
  19. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/coupling.py +0 -0
  20. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/deadcode.py +0 -0
  21. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/flow.py +0 -0
  22. {codespine-0.1.5 → codespine-0.1.8}/codespine/analysis/impact.py +0 -0
  23. {codespine-0.1.5 → codespine-0.1.8}/codespine/config.py +0 -0
  24. {codespine-0.1.5 → codespine-0.1.8}/codespine/db/__init__.py +0 -0
  25. {codespine-0.1.5 → codespine-0.1.8}/codespine/db/schema.py +0 -0
  26. {codespine-0.1.5 → codespine-0.1.8}/codespine/db/store.py +0 -0
  27. {codespine-0.1.5 → codespine-0.1.8}/codespine/diff/__init__.py +0 -0
  28. {codespine-0.1.5 → codespine-0.1.8}/codespine/diff/branch_diff.py +0 -0
  29. {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/__init__.py +0 -0
  30. {codespine-0.1.5 → codespine-0.1.8}/codespine/indexer/java_parser.py +0 -0
  31. {codespine-0.1.5 → codespine-0.1.8}/codespine/mcp/__init__.py +0 -0
  32. {codespine-0.1.5 → codespine-0.1.8}/codespine/mcp/server.py +0 -0
  33. {codespine-0.1.5 → codespine-0.1.8}/codespine/noise/__init__.py +0 -0
  34. {codespine-0.1.5 → codespine-0.1.8}/codespine/noise/blocklist.py +0 -0
  35. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/__init__.py +0 -0
  36. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/bm25.py +0 -0
  37. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/fuzzy.py +0 -0
  38. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/hybrid.py +0 -0
  39. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/rrf.py +0 -0
  40. {codespine-0.1.5 → codespine-0.1.8}/codespine/search/vector.py +0 -0
  41. {codespine-0.1.5 → codespine-0.1.8}/codespine/watch/__init__.py +0 -0
  42. {codespine-0.1.5 → codespine-0.1.8}/codespine/watch/watcher.py +0 -0
  43. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/dependency_links.txt +0 -0
  44. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/entry_points.txt +0 -0
  45. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/requires.txt +0 -0
  46. {codespine-0.1.5 → codespine-0.1.8}/codespine.egg-info/top_level.txt +0 -0
  47. {codespine-0.1.5 → codespine-0.1.8}/gindex.py +0 -0
  48. {codespine-0.1.5 → codespine-0.1.8}/setup.cfg +0 -0
  49. {codespine-0.1.5 → codespine-0.1.8}/tests/test_branch_diff_normalize.py +0 -0
  50. {codespine-0.1.5 → codespine-0.1.8}/tests/test_index_and_hybrid.py +0 -0
  51. {codespine-0.1.5 → codespine-0.1.8}/tests/test_java_parser.py +0 -0
  52. {codespine-0.1.5 → codespine-0.1.8}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.5
3
+ Version: 0.1.8
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.1.5"
4
+ __version__ = "0.1.8"
@@ -78,7 +78,8 @@ def main() -> None:
78
78
  @main.command()
79
79
  @click.argument("path", type=click.Path(exists=True))
80
80
  @click.option("--full/--incremental", default=True, show_default=True)
81
- def analyse(path: str, full: bool) -> None:
81
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
82
+ def analyse(path: str, full: bool, deep: bool) -> None:
82
83
  """Index a local Java project."""
83
84
  if _is_running():
84
85
  click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -89,6 +90,7 @@ def analyse(path: str, full: bool) -> None:
89
90
  store = GraphStore(read_only=False)
90
91
  indexer = JavaIndexer(store)
91
92
  parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
93
+ call_state = {"shown": False, "count": 0, "last_ts": 0.0}
92
94
 
93
95
  def _progress(event: str, payload: dict) -> None:
94
96
  now = time.perf_counter()
@@ -119,6 +121,29 @@ def analyse(path: str, full: bool) -> None:
119
121
  if event == "resolve_calls_start" and parse_state["shown"]:
120
122
  click.echo()
121
123
  parse_state["shown"] = False
124
+ _phase("Tracing calls...", "running")
125
+ return
126
+ if event == "resolve_calls_start":
127
+ _phase("Tracing calls...", "running")
128
+ return
129
+ if event == "resolve_calls_progress":
130
+ call_state["count"] = int(payload.get("calls_resolved", 0))
131
+ if (now - call_state["last_ts"]) >= 0.25:
132
+ click.echo(f"\rTracing calls... {call_state['count']} resolved", nl=False)
133
+ call_state["shown"] = True
134
+ call_state["last_ts"] = now
135
+ return
136
+ if event == "resolve_calls_done":
137
+ if call_state["shown"]:
138
+ click.echo()
139
+ call_state["shown"] = False
140
+ _phase("Tracing calls...", f"{int(payload.get('calls_resolved', 0))} calls resolved")
141
+ return
142
+ if event == "resolve_types_start":
143
+ _phase("Analyzing types...", "running")
144
+ return
145
+ if event == "resolve_types_done":
146
+ _phase("Analyzing types...", f"{int(payload.get('type_relationships', 0))} type relationships")
122
147
  return
123
148
 
124
149
  result = indexer.index_project(abs_path, full=full, progress=_progress)
@@ -128,27 +153,37 @@ def analyse(path: str, full: bool) -> None:
128
153
  _phase("Parsing code...", "0/0")
129
154
  elif parse_state["indexed"] < parse_state["total"]:
130
155
  _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
131
- _phase("Tracing calls...", f"{result.calls_resolved} calls resolved")
132
- _phase("Analyzing types...", f"{result.type_relationships} type relationships")
133
-
134
- communities = detect_communities(store)
135
- _phase("Detecting communities...", f"{len(communities)} clusters found")
136
156
 
137
- flows = trace_execution_flows(store)
138
- _phase("Detecting execution flows...", f"{len(flows)} processes found")
139
-
140
- dead = detect_dead_code(store, limit=500)
141
- _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
142
-
143
- coupling_pairs = compute_coupling(
144
- store,
145
- abs_path,
146
- result.project_id,
147
- months=SETTINGS.default_coupling_months,
148
- min_strength=SETTINGS.default_min_coupling_strength,
149
- min_cochanges=SETTINGS.default_min_cochanges,
150
- )
151
- _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
157
+ communities: list[dict] = []
158
+ flows: list[dict] = []
159
+ dead: list[dict] = []
160
+ coupling_pairs: list[dict] = []
161
+
162
+ should_run_deep = deep or result.files_found <= 1200
163
+ if should_run_deep:
164
+ communities = detect_communities(store)
165
+ _phase("Detecting communities...", f"{len(communities)} clusters found")
166
+
167
+ flows = trace_execution_flows(store)
168
+ _phase("Detecting execution flows...", f"{len(flows)} processes found")
169
+
170
+ dead = detect_dead_code(store, limit=500)
171
+ _phase("Finding dead code...", f"{len(dead)} unreachable symbols")
172
+
173
+ coupling_pairs = compute_coupling(
174
+ store,
175
+ abs_path,
176
+ result.project_id,
177
+ months=SETTINGS.default_coupling_months,
178
+ min_strength=SETTINGS.default_min_coupling_strength,
179
+ min_cochanges=SETTINGS.default_min_cochanges,
180
+ )
181
+ _phase("Analyzing git history...", f"{len(coupling_pairs)} coupled file pairs")
182
+ else:
183
+ _phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
184
+ _phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
185
+ _phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
186
+ _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
152
187
 
153
188
  vector_count = store.query_records(
154
189
  """
@@ -1,9 +1,12 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from collections import defaultdict
4
+ from typing import Iterator
4
5
 
5
6
  from codespine.noise.blocklist import NOISE_METHOD_NAMES
6
7
 
8
+ MAX_FUZZY_TARGETS = 12
9
+
7
10
 
8
11
  def _simple_type_name(type_name: str | None) -> str:
9
12
  if not type_name:
@@ -53,23 +56,29 @@ def resolve_calls(
53
56
  calls: dict[str, list],
54
57
  method_context: dict[str, dict],
55
58
  class_catalog: dict[str, list[str]],
56
- ) -> list[tuple[str, str, float, str]]:
59
+ ) -> Iterator[tuple[str, str, float, str]]:
57
60
  """Resolve call names to known method ids.
58
61
 
59
- Returns tuples: (source_method_id, target_method_id, confidence, reason)
62
+ Yields tuples: (source_method_id, target_method_id, confidence, reason)
60
63
  """
61
64
  name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
62
- class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
65
+ class_method_index_by_id: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
66
+ class_method_index_by_fqcn: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
63
67
  for method_id, meta in method_catalog.items():
64
68
  key = (meta["name"], int(meta["param_count"]))
65
69
  name_arity_to_method_ids[key].append(method_id)
66
- class_method_index[meta["class_fqcn"]][key].append(method_id)
70
+ class_id = meta.get("class_id", "")
71
+ class_fqcn = meta.get("class_fqcn", "")
72
+ if class_id:
73
+ class_method_index_by_id[class_id][key].append(method_id)
74
+ if class_fqcn:
75
+ class_method_index_by_fqcn[class_fqcn][key].append(method_id)
67
76
 
68
- edges: list[tuple[str, str, float, str]] = []
69
77
  for source_id, call_sites in calls.items():
70
78
  src_meta = method_catalog.get(source_id, {})
71
79
  src_ctx = method_context.get(source_id, {})
72
- src_class = src_meta.get("class_fqcn", "")
80
+ src_class_id = src_meta.get("class_id", "") or src_ctx.get("class_id", "")
81
+ src_class_fqcn = src_meta.get("class_fqcn", "")
73
82
  local_types = src_ctx.get("local_types", {}) or {}
74
83
  field_types = src_ctx.get("field_types", {}) or {}
75
84
 
@@ -88,7 +97,7 @@ def resolve_calls(
88
97
  receiver_type = None
89
98
  receiver_is_this = False
90
99
  if receiver == "this":
91
- receiver_type = src_class
100
+ receiver_type = src_class_fqcn
92
101
  receiver_is_this = True
93
102
  elif receiver in local_types:
94
103
  receiver_type = local_types[receiver]
@@ -100,14 +109,14 @@ def resolve_calls(
100
109
  receiver_fqcn_candidates = _resolve_type_candidates(receiver_type, src_ctx, class_catalog)
101
110
 
102
111
  for fqcn in receiver_fqcn_candidates:
103
- targets.extend(class_method_index.get(fqcn, {}).get(key, []))
112
+ targets.extend(class_method_index_by_fqcn.get(fqcn, {}).get(key, []))
104
113
 
105
114
  if targets:
106
115
  confidence = 1.0 if receiver_is_this else 0.8
107
116
  reason = "receiver_this_exact" if receiver_is_this else "receiver_method_match"
108
117
 
109
118
  if not targets:
110
- in_class = class_method_index.get(src_class, {}).get(key, [])
119
+ in_class = class_method_index_by_id.get(src_class_id, {}).get(key, [])
111
120
  if in_class:
112
121
  targets = in_class
113
122
  confidence = 1.0
@@ -126,12 +135,13 @@ def resolve_calls(
126
135
  confidence = 1.0
127
136
  reason = "exact_name_arity_unique"
128
137
  elif len(targets) > 1:
138
+ if len(targets) > MAX_FUZZY_TARGETS:
139
+ # Avoid exploding low-confidence edges in large repos.
140
+ continue
129
141
  confidence = 0.5
130
142
  reason = "fuzzy_name_arity_ambiguous"
131
143
 
132
144
  if not targets:
133
145
  continue
134
- for target_id in targets:
135
- edges.append((source_id, target_id, confidence, reason))
136
-
137
- return edges
146
+ for target_id in set(targets):
147
+ yield source_id, target_id, confidence, reason
@@ -70,6 +70,7 @@ class JavaIndexer:
70
70
  method_calls: dict[str, list] = {}
71
71
  method_context: dict[str, dict] = {}
72
72
  class_catalog: dict[str, list[str]] = self._existing_class_catalog(project_id) if not full else {}
73
+ fqcn_to_class_ids: dict[str, list[str]] = self._existing_class_ids_by_fqcn(project_id) if not full else {}
73
74
  class_meta: dict[str, dict] = {}
74
75
  class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
75
76
 
@@ -84,6 +85,7 @@ class JavaIndexer:
84
85
  for file_path in to_reindex:
85
86
  rel_path = os.path.relpath(file_path, root_path)
86
87
  is_test = "src/test/java" in file_path.replace("\\", "/")
88
+ scope = self._scope_from_rel_path(rel_path)
87
89
 
88
90
  with open(file_path, "rb") as f:
89
91
  source = f.read()
@@ -96,20 +98,25 @@ class JavaIndexer:
96
98
  self.store.upsert_file(f_id, file_path, project_id, is_test, digest_bytes(source))
97
99
 
98
100
  for cls in parsed.classes:
99
- c_id = class_id(cls.fqcn)
101
+ c_id = class_id(cls.fqcn, scope)
100
102
  self.store.upsert_class(c_id, cls.fqcn, cls.name, cls.package, f_id)
101
103
  class_catalog.setdefault(cls.name, [])
102
104
  if cls.fqcn not in class_catalog[cls.name]:
103
105
  class_catalog[cls.name].append(cls.fqcn)
104
- class_meta[cls.fqcn] = {
106
+ fqcn_to_class_ids.setdefault(cls.fqcn, [])
107
+ if c_id not in fqcn_to_class_ids[cls.fqcn]:
108
+ fqcn_to_class_ids[cls.fqcn].append(c_id)
109
+ class_meta[c_id] = {
110
+ "fqcn": cls.fqcn,
105
111
  "package": parsed.package,
106
112
  "imports": parsed.imports,
107
113
  "extends": cls.extends,
108
114
  "interfaces": cls.interfaces,
115
+ "scope": scope,
109
116
  }
110
- class_methods.setdefault(cls.fqcn, {})
117
+ class_methods.setdefault(c_id, {})
111
118
 
112
- cls_symbol_id = symbol_id("class", cls.fqcn)
119
+ cls_symbol_id = symbol_id("class", cls.fqcn, scope)
113
120
  self.store.upsert_symbol(
114
121
  symbol_id=cls_symbol_id,
115
122
  kind="class",
@@ -123,7 +130,7 @@ class JavaIndexer:
123
130
  classes_indexed += 1
124
131
 
125
132
  for method in cls.methods:
126
- m_id = method_id(cls.fqcn, method.signature)
133
+ m_id = method_id(cls.fqcn, method.signature, scope)
127
134
  self.store.upsert_method(
128
135
  method_id=m_id,
129
136
  class_id=c_id,
@@ -136,7 +143,7 @@ class JavaIndexer:
136
143
  )
137
144
 
138
145
  fqname = f"{cls.fqcn}#{method.signature}"
139
- m_symbol_id = symbol_id("method", fqname)
146
+ m_symbol_id = symbol_id("method", fqname, scope)
140
147
  self.store.upsert_symbol(
141
148
  symbol_id=m_symbol_id,
142
149
  kind="method",
@@ -154,16 +161,18 @@ class JavaIndexer:
154
161
  "name": method.name,
155
162
  "param_count": len(method.parameter_types),
156
163
  "class_fqcn": cls.fqcn,
164
+ "class_id": c_id,
157
165
  }
158
166
  method_calls[m_id] = method.calls
159
167
  method_context[m_id] = {
168
+ "class_id": c_id,
160
169
  "class_fqcn": cls.fqcn,
161
170
  "local_types": method.local_types,
162
171
  "field_types": cls.field_types,
163
172
  "imports": parsed.imports,
164
173
  "package": parsed.package,
165
174
  }
166
- class_methods[cls.fqcn][method.signature] = m_id
175
+ class_methods[c_id][method.signature] = m_id
167
176
  files_indexed += 1
168
177
  self._emit(
169
178
  progress,
@@ -177,10 +186,17 @@ class JavaIndexer:
177
186
  for src, dst, confidence, reason in resolve_calls(method_catalog, method_calls, method_context, class_catalog):
178
187
  self.store.add_call(src, dst, confidence, reason)
179
188
  calls_resolved += 1
189
+ if calls_resolved % 2000 == 0:
190
+ self._emit(progress, "resolve_calls_progress", calls_resolved=calls_resolved)
180
191
  self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
181
192
 
182
193
  self._emit(progress, "resolve_types_start")
183
- type_relationships += self._build_inheritance_edges(class_meta, class_catalog, class_methods)
194
+ type_relationships += self._build_inheritance_edges(
195
+ class_meta,
196
+ class_catalog,
197
+ class_methods,
198
+ fqcn_to_class_ids,
199
+ )
184
200
  self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
185
201
 
186
202
  return IndexResult(
@@ -223,7 +239,7 @@ class JavaIndexer:
223
239
  """
224
240
  MATCH (m:Method), (c:Class), (f:File)
225
241
  WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
226
- RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
242
+ RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn, c.id as class_id
227
243
  """,
228
244
  {"pid": project_id},
229
245
  )
@@ -237,9 +253,30 @@ class JavaIndexer:
237
253
  "name": r.get("name", ""),
238
254
  "param_count": param_count,
239
255
  "class_fqcn": r.get("class_fqcn", ""),
256
+ "class_id": r.get("class_id", ""),
240
257
  }
241
258
  return out
242
259
 
260
+ def _existing_class_ids_by_fqcn(self, project_id: str) -> dict[str, list[str]]:
261
+ recs = self.store.query_records(
262
+ """
263
+ MATCH (c:Class), (f:File)
264
+ WHERE c.file_id = f.id AND f.project_id = $pid
265
+ RETURN c.fqcn as fqcn, c.id as class_id
266
+ """,
267
+ {"pid": project_id},
268
+ )
269
+ out: dict[str, list[str]] = {}
270
+ for r in recs:
271
+ fqcn = r.get("fqcn", "")
272
+ cid = r.get("class_id", "")
273
+ if not fqcn or not cid:
274
+ continue
275
+ out.setdefault(fqcn, [])
276
+ if cid not in out[fqcn]:
277
+ out[fqcn].append(cid)
278
+ return out
279
+
243
280
  def _existing_class_catalog(self, project_id: str) -> dict[str, list[str]]:
244
281
  recs = self.store.query_records(
245
282
  """
@@ -261,14 +298,17 @@ class JavaIndexer:
261
298
  """
262
299
  MATCH (m:Method), (c:Class), (f:File)
263
300
  WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
264
- RETURN c.fqcn as fqcn, m.signature as signature, m.id as method_id
301
+ RETURN c.id as class_id, m.signature as signature, m.id as method_id
265
302
  """,
266
303
  {"pid": project_id},
267
304
  )
268
305
  out: dict[str, dict[str, str]] = {}
269
306
  for r in recs:
270
- out.setdefault(r["fqcn"], {})
271
- out[r["fqcn"]][r["signature"]] = r["method_id"]
307
+ class_key = r.get("class_id")
308
+ if not class_key:
309
+ continue
310
+ out.setdefault(class_key, {})
311
+ out[class_key][r["signature"]] = r["method_id"]
272
312
  return out
273
313
 
274
314
  @staticmethod
@@ -300,34 +340,34 @@ class JavaIndexer:
300
340
  class_meta: dict[str, dict],
301
341
  class_catalog: dict[str, list[str]],
302
342
  class_methods: dict[str, dict[str, str]],
343
+ fqcn_to_class_ids: dict[str, list[str]],
303
344
  ) -> int:
304
345
  rel_count = 0
305
- for fqcn, meta in class_meta.items():
306
- src_id = class_id(fqcn)
346
+ for src_id, meta in class_meta.items():
307
347
  ctx = {"package": meta.get("package", ""), "imports": meta.get("imports", [])}
308
348
 
309
349
  parent_candidates = self._resolve_type_candidates(meta.get("extends"), ctx, class_catalog)
310
350
  for parent_fqcn in parent_candidates:
311
- dst_id = class_id(parent_fqcn)
312
- self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
313
- rel_count += 1
314
- for sig, method_id in class_methods.get(fqcn, {}).items():
315
- parent_method = class_methods.get(parent_fqcn, {}).get(sig)
316
- if parent_method:
317
- self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
318
- rel_count += 1
351
+ for dst_id in fqcn_to_class_ids.get(parent_fqcn, []):
352
+ self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
353
+ rel_count += 1
354
+ for sig, method_id in class_methods.get(src_id, {}).items():
355
+ parent_method = class_methods.get(dst_id, {}).get(sig)
356
+ if parent_method:
357
+ self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
358
+ rel_count += 1
319
359
 
320
360
  for iface in meta.get("interfaces", []):
321
361
  iface_candidates = self._resolve_type_candidates(iface, ctx, class_catalog)
322
362
  for iface_fqcn in iface_candidates:
323
- dst_id = class_id(iface_fqcn)
324
- self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
325
- rel_count += 1
326
- for sig, method_id in class_methods.get(fqcn, {}).items():
327
- iface_method = class_methods.get(iface_fqcn, {}).get(sig)
328
- if iface_method:
329
- self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
330
- rel_count += 1
363
+ for dst_id in fqcn_to_class_ids.get(iface_fqcn, []):
364
+ self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
365
+ rel_count += 1
366
+ for sig, method_id in class_methods.get(src_id, {}).items():
367
+ iface_method = class_methods.get(dst_id, {}).get(sig)
368
+ if iface_method:
369
+ self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
370
+ rel_count += 1
331
371
  return rel_count
332
372
 
333
373
  @staticmethod
@@ -335,3 +375,13 @@ class JavaIndexer:
335
375
  if progress is None:
336
376
  return
337
377
  progress(event, payload)
378
+
379
+ @staticmethod
380
+ def _scope_from_rel_path(rel_path: str) -> str:
381
+ normalized = rel_path.replace("\\", "/")
382
+ if "/java/" in normalized:
383
+ return normalized.split("/java/", 1)[0]
384
+ if "/src/" in normalized:
385
+ return normalized.split("/src/", 1)[0]
386
+ scope = os.path.dirname(normalized).strip()
387
+ return scope or "."
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class SymbolRef:
9
+ symbol_id: str
10
+ method_id: str
11
+ class_id: str
12
+ file_id: str
13
+
14
+
15
+ def digest_bytes(payload: bytes) -> str:
16
+ return hashlib.sha1(payload).hexdigest()
17
+
18
+
19
+ def file_id(project_id: str, rel_path: str) -> str:
20
+ return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
21
+
22
+
23
+ def class_id(fqcn: str, scope: str | None = None) -> str:
24
+ key = f"{scope}::{fqcn}" if scope else fqcn
25
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
26
+
27
+
28
+ def method_id(fqcn: str, signature: str, scope: str | None = None) -> str:
29
+ key = f"{scope}::{fqcn}#{signature}" if scope else f"{fqcn}#{signature}"
30
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
31
+
32
+
33
+ def symbol_id(kind: str, fqname: str, scope: str | None = None) -> str:
34
+ key = f"{kind}:{scope}:{fqname}" if scope else f"{kind}:{fqname}"
35
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.5
3
+ Version: 0.1.8
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -44,4 +44,5 @@ tests/test_branch_diff_normalize.py
44
44
  tests/test_call_resolver.py
45
45
  tests/test_index_and_hybrid.py
46
46
  tests/test_java_parser.py
47
+ tests/test_multimodule_index.py
47
48
  tests/test_search_ranking.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.1.5"
7
+ version = "0.1.8"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,43 @@
1
+ from types import SimpleNamespace
2
+
3
+ from codespine.indexer.call_resolver import resolve_calls
4
+
5
+
6
+ def test_resolver_prefers_receiver_type_and_arity():
7
+ method_catalog = {
8
+ "src": {
9
+ "name": "entry",
10
+ "param_count": 0,
11
+ "class_id": "c_service",
12
+ "class_fqcn": "com.example.Service",
13
+ "signature": "entry()",
14
+ },
15
+ "m1": {"name": "run", "param_count": 0, "class_id": "c_service", "class_fqcn": "com.example.Service", "signature": "run()"},
16
+ "m2": {
17
+ "name": "run",
18
+ "param_count": 1,
19
+ "class_id": "c_service",
20
+ "class_fqcn": "com.example.Service",
21
+ "signature": "run(String)",
22
+ },
23
+ "m3": {"name": "save", "param_count": 0, "class_id": "c_repo", "class_fqcn": "com.example.Repo", "signature": "save()"},
24
+ }
25
+ calls = {
26
+ "src": [
27
+ SimpleNamespace(name="run", receiver="this", arg_count=0),
28
+ SimpleNamespace(name="save", receiver="repo", arg_count=0),
29
+ ]
30
+ }
31
+ method_context = {
32
+ "src": {
33
+ "class_id": "c_service",
34
+ "class_fqcn": "com.example.Service",
35
+ "local_types": {"repo": "Repo"},
36
+ "field_types": {},
37
+ }
38
+ }
39
+ class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
40
+
41
+ out = list(resolve_calls(method_catalog, calls, method_context, class_catalog))
42
+ assert ("src", "m1", 1.0, "receiver_this_exact") in out
43
+ assert ("src", "m3", 0.8, "receiver_method_match") in out
@@ -0,0 +1,55 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ pytest.importorskip("kuzu")
6
+ pytest.importorskip("tree_sitter_java")
7
+
8
+ from codespine.db.store import GraphStore
9
+ from codespine.indexer.engine import JavaIndexer
10
+
11
+
12
+ def _write_java(path: Path, content: str) -> None:
13
+ path.parent.mkdir(parents=True, exist_ok=True)
14
+ path.write_text(content, encoding="utf-8")
15
+
16
+
17
+ def test_multimodule_duplicate_fqcn_is_indexed_without_collision(tmp_path: Path):
18
+ _write_java(
19
+ tmp_path / "module-a" / "src" / "main" / "java" / "com" / "example" / "App.java",
20
+ """
21
+ package com.example;
22
+ public class App { public void fromA() {} }
23
+ """,
24
+ )
25
+ _write_java(
26
+ tmp_path / "module-b" / "src" / "main" / "java" / "com" / "example" / "App.java",
27
+ """
28
+ package com.example;
29
+ public class App { public void fromB() {} }
30
+ """,
31
+ )
32
+
33
+ store = GraphStore(read_only=False)
34
+ result = JavaIndexer(store).index_project(str(tmp_path), full=True)
35
+
36
+ classes = store.query_records(
37
+ """
38
+ MATCH (c:Class), (f:File)
39
+ WHERE c.file_id = f.id AND f.project_id = $pid AND c.fqcn = $fqcn
40
+ RETURN c.id as id, f.path as path
41
+ """,
42
+ {"pid": result.project_id, "fqcn": "com.example.App"},
43
+ )
44
+ methods = store.query_records(
45
+ """
46
+ MATCH (m:Method), (c:Class), (f:File)
47
+ WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
48
+ RETURN m.name as name
49
+ """,
50
+ {"pid": result.project_id},
51
+ )
52
+
53
+ assert len(classes) == 2
54
+ assert len({c["id"] for c in classes}) == 2
55
+ assert {"fromA", "fromB"}.issubset({m["name"] for m in methods})
@@ -1,32 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import hashlib
4
- from dataclasses import dataclass
5
-
6
-
7
- @dataclass
8
- class SymbolRef:
9
- symbol_id: str
10
- method_id: str
11
- class_id: str
12
- file_id: str
13
-
14
-
15
- def digest_bytes(payload: bytes) -> str:
16
- return hashlib.sha1(payload).hexdigest()
17
-
18
-
19
- def file_id(project_id: str, rel_path: str) -> str:
20
- return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
21
-
22
-
23
- def class_id(fqcn: str) -> str:
24
- return hashlib.sha1(fqcn.encode("utf-8")).hexdigest()
25
-
26
-
27
- def method_id(fqcn: str, signature: str) -> str:
28
- return hashlib.sha1(f"{fqcn}#{signature}".encode("utf-8")).hexdigest()
29
-
30
-
31
- def symbol_id(kind: str, fqname: str) -> str:
32
- return hashlib.sha1(f"{kind}:{fqname}".encode("utf-8")).hexdigest()
@@ -1,30 +0,0 @@
1
- from types import SimpleNamespace
2
-
3
- from codespine.indexer.call_resolver import resolve_calls
4
-
5
-
6
- def test_resolver_prefers_receiver_type_and_arity():
7
- method_catalog = {
8
- "src": {"name": "entry", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "entry()"},
9
- "m1": {"name": "run", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "run()"},
10
- "m2": {"name": "run", "param_count": 1, "class_fqcn": "com.example.Service", "signature": "run(String)"},
11
- "m3": {"name": "save", "param_count": 0, "class_fqcn": "com.example.Repo", "signature": "save()"},
12
- }
13
- calls = {
14
- "src": [
15
- SimpleNamespace(name="run", receiver="this", arg_count=0),
16
- SimpleNamespace(name="save", receiver="repo", arg_count=0),
17
- ]
18
- }
19
- method_context = {
20
- "src": {
21
- "class_fqcn": "com.example.Service",
22
- "local_types": {"repo": "Repo"},
23
- "field_types": {},
24
- }
25
- }
26
- class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
27
-
28
- out = resolve_calls(method_catalog, calls, method_context, class_catalog)
29
- assert ("src", "m1", 1.0, "receiver_this_exact") in out
30
- assert ("src", "m3", 0.8, "receiver_method_match") in out
File without changes
File without changes
File without changes
File without changes
File without changes