codespine 0.1.7__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. {codespine-0.1.7 → codespine-0.1.9}/PKG-INFO +1 -1
  2. {codespine-0.1.7 → codespine-0.1.9}/codespine/__init__.py +1 -1
  3. {codespine-0.1.7 → codespine-0.1.9}/codespine/cli.py +4 -3
  4. {codespine-0.1.7 → codespine-0.1.9}/codespine/config.py +1 -0
  5. {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/call_resolver.py +13 -6
  6. {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/engine.py +210 -50
  7. codespine-0.1.9/codespine/indexer/symbol_builder.py +35 -0
  8. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/PKG-INFO +1 -1
  9. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/SOURCES.txt +1 -0
  10. {codespine-0.1.7 → codespine-0.1.9}/pyproject.toml +1 -1
  11. codespine-0.1.9/tests/test_call_resolver.py +43 -0
  12. {codespine-0.1.7 → codespine-0.1.9}/tests/test_index_and_hybrid.py +13 -0
  13. codespine-0.1.9/tests/test_multimodule_index.py +55 -0
  14. codespine-0.1.7/codespine/indexer/symbol_builder.py +0 -32
  15. codespine-0.1.7/tests/test_call_resolver.py +0 -30
  16. {codespine-0.1.7 → codespine-0.1.9}/LICENSE +0 -0
  17. {codespine-0.1.7 → codespine-0.1.9}/README.md +0 -0
  18. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/__init__.py +0 -0
  19. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/community.py +0 -0
  20. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/context.py +0 -0
  21. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/coupling.py +0 -0
  22. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/deadcode.py +0 -0
  23. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/flow.py +0 -0
  24. {codespine-0.1.7 → codespine-0.1.9}/codespine/analysis/impact.py +0 -0
  25. {codespine-0.1.7 → codespine-0.1.9}/codespine/db/__init__.py +0 -0
  26. {codespine-0.1.7 → codespine-0.1.9}/codespine/db/schema.py +0 -0
  27. {codespine-0.1.7 → codespine-0.1.9}/codespine/db/store.py +0 -0
  28. {codespine-0.1.7 → codespine-0.1.9}/codespine/diff/__init__.py +0 -0
  29. {codespine-0.1.7 → codespine-0.1.9}/codespine/diff/branch_diff.py +0 -0
  30. {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/__init__.py +0 -0
  31. {codespine-0.1.7 → codespine-0.1.9}/codespine/indexer/java_parser.py +0 -0
  32. {codespine-0.1.7 → codespine-0.1.9}/codespine/mcp/__init__.py +0 -0
  33. {codespine-0.1.7 → codespine-0.1.9}/codespine/mcp/server.py +0 -0
  34. {codespine-0.1.7 → codespine-0.1.9}/codespine/noise/__init__.py +0 -0
  35. {codespine-0.1.7 → codespine-0.1.9}/codespine/noise/blocklist.py +0 -0
  36. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/__init__.py +0 -0
  37. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/bm25.py +0 -0
  38. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/fuzzy.py +0 -0
  39. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/hybrid.py +0 -0
  40. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/rrf.py +0 -0
  41. {codespine-0.1.7 → codespine-0.1.9}/codespine/search/vector.py +0 -0
  42. {codespine-0.1.7 → codespine-0.1.9}/codespine/watch/__init__.py +0 -0
  43. {codespine-0.1.7 → codespine-0.1.9}/codespine/watch/watcher.py +0 -0
  44. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/dependency_links.txt +0 -0
  45. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/entry_points.txt +0 -0
  46. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/requires.txt +0 -0
  47. {codespine-0.1.7 → codespine-0.1.9}/codespine.egg-info/top_level.txt +0 -0
  48. {codespine-0.1.7 → codespine-0.1.9}/gindex.py +0 -0
  49. {codespine-0.1.7 → codespine-0.1.9}/setup.cfg +0 -0
  50. {codespine-0.1.7 → codespine-0.1.9}/tests/test_branch_diff_normalize.py +0 -0
  51. {codespine-0.1.7 → codespine-0.1.9}/tests/test_java_parser.py +0 -0
  52. {codespine-0.1.7 → codespine-0.1.9}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.1.7"
4
+ __version__ = "0.1.9"
@@ -77,7 +77,7 @@ def main() -> None:
77
77
 
78
78
  @main.command()
79
79
  @click.argument("path", type=click.Path(exists=True))
80
- @click.option("--full/--incremental", default=True, show_default=True)
80
+ @click.option("--full/--incremental", default=False, show_default=True)
81
81
  @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
82
82
  def analyse(path: str, full: bool, deep: bool) -> None:
83
83
  """Index a local Java project."""
@@ -89,7 +89,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
89
89
  abs_path = os.path.abspath(path)
90
90
  store = GraphStore(read_only=False)
91
91
  indexer = JavaIndexer(store)
92
- parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
92
+ parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0, "printed_zero": False}
93
93
  call_state = {"shown": False, "count": 0, "last_ts": 0.0}
94
94
 
95
95
  def _progress(event: str, payload: dict) -> None:
@@ -105,6 +105,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
105
105
  _phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
106
106
  if to_index == 0:
107
107
  _phase("Parsing code...", "0/0")
108
+ parse_state["printed_zero"] = True
108
109
  return
109
110
  if event == "parse_progress":
110
111
  indexed = int(payload.get("indexed", 0))
@@ -149,7 +150,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
149
150
  result = indexer.index_project(abs_path, full=full, progress=_progress)
150
151
  if parse_state["shown"]:
151
152
  click.echo()
152
- if parse_state["total"] == 0:
153
+ if parse_state["total"] == 0 and not parse_state["printed_zero"]:
153
154
  _phase("Parsing code...", "0/0")
154
155
  elif parse_state["indexed"] < parse_state["total"]:
155
156
  _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
@@ -8,6 +8,7 @@ class Settings:
8
8
  pid_file: str = os.path.expanduser("~/.codespine.pid")
9
9
  log_file: str = os.path.expanduser("~/.codespine.log")
10
10
  embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
11
+ index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
11
12
  embedding_model: str = "BAAI/bge-small-en-v1.5"
12
13
  vector_dim: int = 384
13
14
  rrf_k: int = 60
@@ -62,16 +62,23 @@ def resolve_calls(
62
62
  Yields tuples: (source_method_id, target_method_id, confidence, reason)
63
63
  """
64
64
  name_arity_to_method_ids: dict[tuple[str, int], list[str]] = defaultdict(list)
65
- class_method_index: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
65
+ class_method_index_by_id: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
66
+ class_method_index_by_fqcn: dict[str, dict[tuple[str, int], list[str]]] = defaultdict(lambda: defaultdict(list))
66
67
  for method_id, meta in method_catalog.items():
67
68
  key = (meta["name"], int(meta["param_count"]))
68
69
  name_arity_to_method_ids[key].append(method_id)
69
- class_method_index[meta["class_fqcn"]][key].append(method_id)
70
+ class_id = meta.get("class_id", "")
71
+ class_fqcn = meta.get("class_fqcn", "")
72
+ if class_id:
73
+ class_method_index_by_id[class_id][key].append(method_id)
74
+ if class_fqcn:
75
+ class_method_index_by_fqcn[class_fqcn][key].append(method_id)
70
76
 
71
77
  for source_id, call_sites in calls.items():
72
78
  src_meta = method_catalog.get(source_id, {})
73
79
  src_ctx = method_context.get(source_id, {})
74
- src_class = src_meta.get("class_fqcn", "")
80
+ src_class_id = src_meta.get("class_id", "") or src_ctx.get("class_id", "")
81
+ src_class_fqcn = src_meta.get("class_fqcn", "")
75
82
  local_types = src_ctx.get("local_types", {}) or {}
76
83
  field_types = src_ctx.get("field_types", {}) or {}
77
84
 
@@ -90,7 +97,7 @@ def resolve_calls(
90
97
  receiver_type = None
91
98
  receiver_is_this = False
92
99
  if receiver == "this":
93
- receiver_type = src_class
100
+ receiver_type = src_class_fqcn
94
101
  receiver_is_this = True
95
102
  elif receiver in local_types:
96
103
  receiver_type = local_types[receiver]
@@ -102,14 +109,14 @@ def resolve_calls(
102
109
  receiver_fqcn_candidates = _resolve_type_candidates(receiver_type, src_ctx, class_catalog)
103
110
 
104
111
  for fqcn in receiver_fqcn_candidates:
105
- targets.extend(class_method_index.get(fqcn, {}).get(key, []))
112
+ targets.extend(class_method_index_by_fqcn.get(fqcn, {}).get(key, []))
106
113
 
107
114
  if targets:
108
115
  confidence = 1.0 if receiver_is_this else 0.8
109
116
  reason = "receiver_this_exact" if receiver_is_this else "receiver_method_match"
110
117
 
111
118
  if not targets:
112
- in_class = class_method_index.get(src_class, {}).get(key, [])
119
+ in_class = class_method_index_by_id.get(src_class_id, {}).get(key, [])
113
120
  if in_class:
114
121
  targets = in_class
115
122
  confidence = 1.0
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import os
4
5
  from dataclasses import dataclass
5
6
  from typing import Callable
6
7
 
8
+ from codespine.config import SETTINGS
7
9
  from codespine.indexer.call_resolver import resolve_calls
8
10
  from codespine.indexer.java_parser import parse_java_source
9
11
  from codespine.indexer.symbol_builder import class_id, digest_bytes, file_id, method_id, symbol_id
@@ -37,21 +39,24 @@ class JavaIndexer:
37
39
  current_files = self._collect_java_files(root_path)
38
40
  self._emit(progress, "scan_done", files_found=len(current_files))
39
41
  db_files = self.store.project_file_hashes(project_id) if not full else {}
40
- current_hashes = self._hash_files(project_id, root_path, current_files) if not full else {}
42
+ meta_cache = self._load_file_meta_cache(project_id)
43
+ current_file_ids = {
44
+ file_id(project_id, os.path.relpath(fp, root_path))
45
+ for fp in current_files
46
+ }
41
47
 
42
48
  if full:
43
49
  to_reindex = current_files
44
50
  deleted_file_ids = []
51
+ meta_cache = {}
45
52
  else:
46
- to_reindex = []
47
- deleted_file_ids = [fid for fid in db_files if fid not in current_hashes]
48
- for file_path in current_files:
49
- rel_path = os.path.relpath(file_path, root_path)
50
- fid = file_id(project_id, rel_path)
51
- digest = current_hashes[fid]
52
- old = db_files.get(fid, {}).get("hash")
53
- if old != digest:
54
- to_reindex.append(file_path)
53
+ to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
54
+ project_id,
55
+ root_path,
56
+ current_files,
57
+ db_files,
58
+ meta_cache,
59
+ )
55
60
  self._emit(
56
61
  progress,
57
62
  "plan_done",
@@ -59,6 +64,21 @@ class JavaIndexer:
59
64
  deleted_files=len(deleted_file_ids),
60
65
  mode="full" if full else "incremental",
61
66
  )
67
+ if not full and not to_reindex and not deleted_file_ids:
68
+ self._prune_meta_cache(meta_cache, current_file_ids)
69
+ self._save_file_meta_cache(project_id, meta_cache)
70
+ self._emit(progress, "resolve_calls_done", calls_resolved=0)
71
+ self._emit(progress, "resolve_types_done", type_relationships=0)
72
+ return IndexResult(
73
+ project_id=project_id,
74
+ files_found=len(current_files),
75
+ files_indexed=0,
76
+ classes_indexed=0,
77
+ methods_indexed=0,
78
+ calls_resolved=0,
79
+ type_relationships=0,
80
+ embeddings_generated=0,
81
+ )
62
82
 
63
83
  files_indexed = 0
64
84
  classes_indexed = 0
@@ -70,6 +90,7 @@ class JavaIndexer:
70
90
  method_calls: dict[str, list] = {}
71
91
  method_context: dict[str, dict] = {}
72
92
  class_catalog: dict[str, list[str]] = self._existing_class_catalog(project_id) if not full else {}
93
+ fqcn_to_class_ids: dict[str, list[str]] = self._existing_class_ids_by_fqcn(project_id) if not full else {}
73
94
  class_meta: dict[str, dict] = {}
74
95
  class_methods: dict[str, dict[str, str]] = self._existing_class_methods(project_id) if not full else {}
75
96
 
@@ -84,32 +105,40 @@ class JavaIndexer:
84
105
  for file_path in to_reindex:
85
106
  rel_path = os.path.relpath(file_path, root_path)
86
107
  is_test = "src/test/java" in file_path.replace("\\", "/")
108
+ scope = self._scope_from_rel_path(rel_path)
87
109
 
88
110
  with open(file_path, "rb") as f:
89
111
  source = f.read()
90
112
 
91
113
  parsed = parse_java_source(source)
92
114
  f_id = file_id(project_id, rel_path)
115
+ file_digest = digest_bytes(source)
93
116
  if not full:
94
117
  # Drop old symbols/methods/classes for changed files before reinserting.
95
118
  self.store.clear_file(f_id)
96
- self.store.upsert_file(f_id, file_path, project_id, is_test, digest_bytes(source))
119
+ self.store.upsert_file(f_id, file_path, project_id, is_test, file_digest)
120
+ self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
97
121
 
98
122
  for cls in parsed.classes:
99
- c_id = class_id(cls.fqcn)
123
+ c_id = class_id(cls.fqcn, scope)
100
124
  self.store.upsert_class(c_id, cls.fqcn, cls.name, cls.package, f_id)
101
125
  class_catalog.setdefault(cls.name, [])
102
126
  if cls.fqcn not in class_catalog[cls.name]:
103
127
  class_catalog[cls.name].append(cls.fqcn)
104
- class_meta[cls.fqcn] = {
128
+ fqcn_to_class_ids.setdefault(cls.fqcn, [])
129
+ if c_id not in fqcn_to_class_ids[cls.fqcn]:
130
+ fqcn_to_class_ids[cls.fqcn].append(c_id)
131
+ class_meta[c_id] = {
132
+ "fqcn": cls.fqcn,
105
133
  "package": parsed.package,
106
134
  "imports": parsed.imports,
107
135
  "extends": cls.extends,
108
136
  "interfaces": cls.interfaces,
137
+ "scope": scope,
109
138
  }
110
- class_methods.setdefault(cls.fqcn, {})
139
+ class_methods.setdefault(c_id, {})
111
140
 
112
- cls_symbol_id = symbol_id("class", cls.fqcn)
141
+ cls_symbol_id = symbol_id("class", cls.fqcn, scope)
113
142
  self.store.upsert_symbol(
114
143
  symbol_id=cls_symbol_id,
115
144
  kind="class",
@@ -123,7 +152,7 @@ class JavaIndexer:
123
152
  classes_indexed += 1
124
153
 
125
154
  for method in cls.methods:
126
- m_id = method_id(cls.fqcn, method.signature)
155
+ m_id = method_id(cls.fqcn, method.signature, scope)
127
156
  self.store.upsert_method(
128
157
  method_id=m_id,
129
158
  class_id=c_id,
@@ -136,7 +165,7 @@ class JavaIndexer:
136
165
  )
137
166
 
138
167
  fqname = f"{cls.fqcn}#{method.signature}"
139
- m_symbol_id = symbol_id("method", fqname)
168
+ m_symbol_id = symbol_id("method", fqname, scope)
140
169
  self.store.upsert_symbol(
141
170
  symbol_id=m_symbol_id,
142
171
  kind="method",
@@ -154,16 +183,18 @@ class JavaIndexer:
154
183
  "name": method.name,
155
184
  "param_count": len(method.parameter_types),
156
185
  "class_fqcn": cls.fqcn,
186
+ "class_id": c_id,
157
187
  }
158
188
  method_calls[m_id] = method.calls
159
189
  method_context[m_id] = {
190
+ "class_id": c_id,
160
191
  "class_fqcn": cls.fqcn,
161
192
  "local_types": method.local_types,
162
193
  "field_types": cls.field_types,
163
194
  "imports": parsed.imports,
164
195
  "package": parsed.package,
165
196
  }
166
- class_methods[cls.fqcn][method.signature] = m_id
197
+ class_methods[c_id][method.signature] = m_id
167
198
  files_indexed += 1
168
199
  self._emit(
169
200
  progress,
@@ -182,9 +213,17 @@ class JavaIndexer:
182
213
  self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
183
214
 
184
215
  self._emit(progress, "resolve_types_start")
185
- type_relationships += self._build_inheritance_edges(class_meta, class_catalog, class_methods)
216
+ type_relationships += self._build_inheritance_edges(
217
+ class_meta,
218
+ class_catalog,
219
+ class_methods,
220
+ fqcn_to_class_ids,
221
+ )
186
222
  self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
187
223
 
224
+ self._prune_meta_cache(meta_cache, current_file_ids)
225
+ self._save_file_meta_cache(project_id, meta_cache)
226
+
188
227
  return IndexResult(
189
228
  project_id=project_id,
190
229
  files_found=len(current_files),
@@ -210,22 +249,59 @@ class JavaIndexer:
210
249
  out.append(os.path.join(root, filename))
211
250
  return out
212
251
 
213
- @staticmethod
214
- def _hash_files(project_id: str, root_path: str, files: list[str]) -> dict[str, str]:
215
- hashes: dict[str, str] = {}
216
- for fp in files:
217
- rel = os.path.relpath(fp, root_path)
218
- fid = file_id(project_id, rel)
219
- with open(fp, "rb") as f:
220
- hashes[fid] = digest_bytes(f.read())
221
- return hashes
252
+ def _plan_incremental(
253
+ self,
254
+ project_id: str,
255
+ root_path: str,
256
+ files: list[str],
257
+ db_files: dict[str, dict[str, str]],
258
+ meta_cache: dict[str, dict],
259
+ ) -> tuple[list[str], list[str], dict[str, dict]]:
260
+ current_ids = {
261
+ file_id(project_id, os.path.relpath(fp, root_path))
262
+ for fp in files
263
+ }
264
+ deleted_file_ids = [fid for fid in db_files if fid not in current_ids]
265
+ to_reindex: list[str] = []
266
+
267
+ for file_path in files:
268
+ rel_path = os.path.relpath(file_path, root_path)
269
+ fid = file_id(project_id, rel_path)
270
+ old_hash = db_files.get(fid, {}).get("hash")
271
+ try:
272
+ st = os.stat(file_path)
273
+ except OSError:
274
+ continue
275
+ mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
276
+ size = int(st.st_size)
277
+ cached = meta_cache.get(fid, {})
278
+
279
+ if (
280
+ cached
281
+ and int(cached.get("mtime_ns", -1)) == mtime_ns
282
+ and int(cached.get("size", -1)) == size
283
+ and cached.get("hash")
284
+ and cached.get("hash") == old_hash
285
+ ):
286
+ continue
287
+
288
+ with open(file_path, "rb") as f:
289
+ digest = digest_bytes(f.read())
290
+ meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
291
+ if old_hash != digest:
292
+ to_reindex.append(file_path)
293
+
294
+ for fid in deleted_file_ids:
295
+ meta_cache.pop(fid, None)
296
+
297
+ return to_reindex, deleted_file_ids, meta_cache
222
298
 
223
299
  def _existing_method_catalog(self, project_id: str) -> dict[str, dict]:
224
300
  recs = self.store.query_records(
225
301
  """
226
302
  MATCH (m:Method), (c:Class), (f:File)
227
303
  WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
228
- RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn
304
+ RETURN m.id as method_id, m.name as name, m.signature as signature, c.fqcn as class_fqcn, c.id as class_id
229
305
  """,
230
306
  {"pid": project_id},
231
307
  )
@@ -239,9 +315,30 @@ class JavaIndexer:
239
315
  "name": r.get("name", ""),
240
316
  "param_count": param_count,
241
317
  "class_fqcn": r.get("class_fqcn", ""),
318
+ "class_id": r.get("class_id", ""),
242
319
  }
243
320
  return out
244
321
 
322
+ def _existing_class_ids_by_fqcn(self, project_id: str) -> dict[str, list[str]]:
323
+ recs = self.store.query_records(
324
+ """
325
+ MATCH (c:Class), (f:File)
326
+ WHERE c.file_id = f.id AND f.project_id = $pid
327
+ RETURN c.fqcn as fqcn, c.id as class_id
328
+ """,
329
+ {"pid": project_id},
330
+ )
331
+ out: dict[str, list[str]] = {}
332
+ for r in recs:
333
+ fqcn = r.get("fqcn", "")
334
+ cid = r.get("class_id", "")
335
+ if not fqcn or not cid:
336
+ continue
337
+ out.setdefault(fqcn, [])
338
+ if cid not in out[fqcn]:
339
+ out[fqcn].append(cid)
340
+ return out
341
+
245
342
  def _existing_class_catalog(self, project_id: str) -> dict[str, list[str]]:
246
343
  recs = self.store.query_records(
247
344
  """
@@ -263,14 +360,17 @@ class JavaIndexer:
263
360
  """
264
361
  MATCH (m:Method), (c:Class), (f:File)
265
362
  WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
266
- RETURN c.fqcn as fqcn, m.signature as signature, m.id as method_id
363
+ RETURN c.id as class_id, m.signature as signature, m.id as method_id
267
364
  """,
268
365
  {"pid": project_id},
269
366
  )
270
367
  out: dict[str, dict[str, str]] = {}
271
368
  for r in recs:
272
- out.setdefault(r["fqcn"], {})
273
- out[r["fqcn"]][r["signature"]] = r["method_id"]
369
+ class_key = r.get("class_id")
370
+ if not class_key:
371
+ continue
372
+ out.setdefault(class_key, {})
373
+ out[class_key][r["signature"]] = r["method_id"]
274
374
  return out
275
375
 
276
376
  @staticmethod
@@ -302,38 +402,98 @@ class JavaIndexer:
302
402
  class_meta: dict[str, dict],
303
403
  class_catalog: dict[str, list[str]],
304
404
  class_methods: dict[str, dict[str, str]],
405
+ fqcn_to_class_ids: dict[str, list[str]],
305
406
  ) -> int:
306
407
  rel_count = 0
307
- for fqcn, meta in class_meta.items():
308
- src_id = class_id(fqcn)
408
+ for src_id, meta in class_meta.items():
309
409
  ctx = {"package": meta.get("package", ""), "imports": meta.get("imports", [])}
310
410
 
311
411
  parent_candidates = self._resolve_type_candidates(meta.get("extends"), ctx, class_catalog)
312
412
  for parent_fqcn in parent_candidates:
313
- dst_id = class_id(parent_fqcn)
314
- self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
315
- rel_count += 1
316
- for sig, method_id in class_methods.get(fqcn, {}).items():
317
- parent_method = class_methods.get(parent_fqcn, {}).get(sig)
318
- if parent_method:
319
- self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
320
- rel_count += 1
413
+ for dst_id in fqcn_to_class_ids.get(parent_fqcn, []):
414
+ self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 0.8)
415
+ rel_count += 1
416
+ for sig, method_id in class_methods.get(src_id, {}).items():
417
+ parent_method = class_methods.get(dst_id, {}).get(sig)
418
+ if parent_method:
419
+ self.store.add_reference("OVERRIDES", "Method", method_id, "Method", parent_method, 1.0)
420
+ rel_count += 1
321
421
 
322
422
  for iface in meta.get("interfaces", []):
323
423
  iface_candidates = self._resolve_type_candidates(iface, ctx, class_catalog)
324
424
  for iface_fqcn in iface_candidates:
325
- dst_id = class_id(iface_fqcn)
326
- self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
327
- rel_count += 1
328
- for sig, method_id in class_methods.get(fqcn, {}).items():
329
- iface_method = class_methods.get(iface_fqcn, {}).get(sig)
330
- if iface_method:
331
- self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
332
- rel_count += 1
425
+ for dst_id in fqcn_to_class_ids.get(iface_fqcn, []):
426
+ self.store.add_reference("IMPLEMENTS", "Class", src_id, "Class", dst_id, 1.0)
427
+ rel_count += 1
428
+ for sig, method_id in class_methods.get(src_id, {}).items():
429
+ iface_method = class_methods.get(dst_id, {}).get(sig)
430
+ if iface_method:
431
+ self.store.add_reference("OVERRIDES", "Method", method_id, "Method", iface_method, 1.0)
432
+ rel_count += 1
333
433
  return rel_count
334
434
 
435
+ @staticmethod
436
+ def _meta_cache_path(project_id: str) -> str:
437
+ base = SETTINGS.index_meta_dir
438
+ try:
439
+ os.makedirs(base, exist_ok=True)
440
+ except OSError:
441
+ base = "/tmp/.codespine_index_meta"
442
+ os.makedirs(base, exist_ok=True)
443
+ return os.path.join(base, f"{project_id}.json")
444
+
445
+ def _load_file_meta_cache(self, project_id: str) -> dict[str, dict]:
446
+ path = self._meta_cache_path(project_id)
447
+ if not os.path.exists(path):
448
+ return {}
449
+ try:
450
+ with open(path, "r", encoding="utf-8") as f:
451
+ data = json.load(f)
452
+ if isinstance(data, dict):
453
+ return data
454
+ except (OSError, ValueError, TypeError):
455
+ return {}
456
+ return {}
457
+
458
+ def _save_file_meta_cache(self, project_id: str, data: dict[str, dict]) -> None:
459
+ path = self._meta_cache_path(project_id)
460
+ tmp_path = f"{path}.tmp"
461
+ try:
462
+ with open(tmp_path, "w", encoding="utf-8") as f:
463
+ json.dump(data, f, separators=(",", ":"))
464
+ os.replace(tmp_path, path)
465
+ except OSError:
466
+ return
467
+
468
+ @staticmethod
469
+ def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
470
+ try:
471
+ st = os.stat(file_path)
472
+ mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
473
+ size = int(st.st_size)
474
+ except OSError:
475
+ mtime_ns = -1
476
+ size = size_hint
477
+ meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
478
+
479
+ @staticmethod
480
+ def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
481
+ for fid in list(meta_cache.keys()):
482
+ if fid not in current_file_ids:
483
+ del meta_cache[fid]
484
+
335
485
  @staticmethod
336
486
  def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
337
487
  if progress is None:
338
488
  return
339
489
  progress(event, payload)
490
+
491
+ @staticmethod
492
+ def _scope_from_rel_path(rel_path: str) -> str:
493
+ normalized = rel_path.replace("\\", "/")
494
+ if "/java/" in normalized:
495
+ return normalized.split("/java/", 1)[0]
496
+ if "/src/" in normalized:
497
+ return normalized.split("/src/", 1)[0]
498
+ scope = os.path.dirname(normalized).strip()
499
+ return scope or "."
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class SymbolRef:
9
+ symbol_id: str
10
+ method_id: str
11
+ class_id: str
12
+ file_id: str
13
+
14
+
15
+ def digest_bytes(payload: bytes) -> str:
16
+ return hashlib.sha1(payload).hexdigest()
17
+
18
+
19
+ def file_id(project_id: str, rel_path: str) -> str:
20
+ return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
21
+
22
+
23
+ def class_id(fqcn: str, scope: str | None = None) -> str:
24
+ key = f"{scope}::{fqcn}" if scope else fqcn
25
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
26
+
27
+
28
+ def method_id(fqcn: str, signature: str, scope: str | None = None) -> str:
29
+ key = f"{scope}::{fqcn}#{signature}" if scope else f"{fqcn}#{signature}"
30
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
31
+
32
+
33
+ def symbol_id(kind: str, fqname: str, scope: str | None = None) -> str:
34
+ key = f"{kind}:{scope}:{fqname}" if scope else f"{kind}:{fqname}"
35
+ return hashlib.sha1(key.encode("utf-8")).hexdigest()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.7
3
+ Version: 0.1.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -44,4 +44,5 @@ tests/test_branch_diff_normalize.py
44
44
  tests/test_call_resolver.py
45
45
  tests/test_index_and_hybrid.py
46
46
  tests/test_java_parser.py
47
+ tests/test_multimodule_index.py
47
48
  tests/test_search_ranking.py
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.1.7"
7
+ version = "0.1.9"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -0,0 +1,43 @@
1
+ from types import SimpleNamespace
2
+
3
+ from codespine.indexer.call_resolver import resolve_calls
4
+
5
+
6
+ def test_resolver_prefers_receiver_type_and_arity():
7
+ method_catalog = {
8
+ "src": {
9
+ "name": "entry",
10
+ "param_count": 0,
11
+ "class_id": "c_service",
12
+ "class_fqcn": "com.example.Service",
13
+ "signature": "entry()",
14
+ },
15
+ "m1": {"name": "run", "param_count": 0, "class_id": "c_service", "class_fqcn": "com.example.Service", "signature": "run()"},
16
+ "m2": {
17
+ "name": "run",
18
+ "param_count": 1,
19
+ "class_id": "c_service",
20
+ "class_fqcn": "com.example.Service",
21
+ "signature": "run(String)",
22
+ },
23
+ "m3": {"name": "save", "param_count": 0, "class_id": "c_repo", "class_fqcn": "com.example.Repo", "signature": "save()"},
24
+ }
25
+ calls = {
26
+ "src": [
27
+ SimpleNamespace(name="run", receiver="this", arg_count=0),
28
+ SimpleNamespace(name="save", receiver="repo", arg_count=0),
29
+ ]
30
+ }
31
+ method_context = {
32
+ "src": {
33
+ "class_id": "c_service",
34
+ "class_fqcn": "com.example.Service",
35
+ "local_types": {"repo": "Repo"},
36
+ "field_types": {},
37
+ }
38
+ }
39
+ class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
40
+
41
+ out = list(resolve_calls(method_catalog, calls, method_context, class_catalog))
42
+ assert ("src", "m1", 1.0, "receiver_this_exact") in out
43
+ assert ("src", "m3", 0.8, "receiver_method_match") in out
@@ -19,3 +19,16 @@ def test_index_and_hybrid_search():
19
19
  results = hybrid_search(store, "process payment", k=5)
20
20
  assert results
21
21
  assert any("processPayment" in (r.get("fqname") or "") for r in results)
22
+
23
+
24
+ def test_incremental_no_change_reindexes_zero_files():
25
+ fixture = Path(__file__).parent / "fixtures" / "java_simple"
26
+ store = GraphStore(read_only=False)
27
+ indexer = JavaIndexer(store)
28
+
29
+ first = indexer.index_project(str(fixture), full=True)
30
+ second = indexer.index_project(str(fixture), full=False)
31
+
32
+ assert first.files_found >= 2
33
+ assert second.files_found == first.files_found
34
+ assert second.files_indexed == 0
@@ -0,0 +1,55 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+
5
+ pytest.importorskip("kuzu")
6
+ pytest.importorskip("tree_sitter_java")
7
+
8
+ from codespine.db.store import GraphStore
9
+ from codespine.indexer.engine import JavaIndexer
10
+
11
+
12
+ def _write_java(path: Path, content: str) -> None:
13
+ path.parent.mkdir(parents=True, exist_ok=True)
14
+ path.write_text(content, encoding="utf-8")
15
+
16
+
17
+ def test_multimodule_duplicate_fqcn_is_indexed_without_collision(tmp_path: Path):
18
+ _write_java(
19
+ tmp_path / "module-a" / "src" / "main" / "java" / "com" / "example" / "App.java",
20
+ """
21
+ package com.example;
22
+ public class App { public void fromA() {} }
23
+ """,
24
+ )
25
+ _write_java(
26
+ tmp_path / "module-b" / "src" / "main" / "java" / "com" / "example" / "App.java",
27
+ """
28
+ package com.example;
29
+ public class App { public void fromB() {} }
30
+ """,
31
+ )
32
+
33
+ store = GraphStore(read_only=False)
34
+ result = JavaIndexer(store).index_project(str(tmp_path), full=True)
35
+
36
+ classes = store.query_records(
37
+ """
38
+ MATCH (c:Class), (f:File)
39
+ WHERE c.file_id = f.id AND f.project_id = $pid AND c.fqcn = $fqcn
40
+ RETURN c.id as id, f.path as path
41
+ """,
42
+ {"pid": result.project_id, "fqcn": "com.example.App"},
43
+ )
44
+ methods = store.query_records(
45
+ """
46
+ MATCH (m:Method), (c:Class), (f:File)
47
+ WHERE m.class_id = c.id AND c.file_id = f.id AND f.project_id = $pid
48
+ RETURN m.name as name
49
+ """,
50
+ {"pid": result.project_id},
51
+ )
52
+
53
+ assert len(classes) == 2
54
+ assert len({c["id"] for c in classes}) == 2
55
+ assert {"fromA", "fromB"}.issubset({m["name"] for m in methods})
@@ -1,32 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import hashlib
4
- from dataclasses import dataclass
5
-
6
-
7
- @dataclass
8
- class SymbolRef:
9
- symbol_id: str
10
- method_id: str
11
- class_id: str
12
- file_id: str
13
-
14
-
15
- def digest_bytes(payload: bytes) -> str:
16
- return hashlib.sha1(payload).hexdigest()
17
-
18
-
19
- def file_id(project_id: str, rel_path: str) -> str:
20
- return hashlib.sha1(f"{project_id}:{rel_path}".encode("utf-8")).hexdigest()
21
-
22
-
23
- def class_id(fqcn: str) -> str:
24
- return hashlib.sha1(fqcn.encode("utf-8")).hexdigest()
25
-
26
-
27
- def method_id(fqcn: str, signature: str) -> str:
28
- return hashlib.sha1(f"{fqcn}#{signature}".encode("utf-8")).hexdigest()
29
-
30
-
31
- def symbol_id(kind: str, fqname: str) -> str:
32
- return hashlib.sha1(f"{kind}:{fqname}".encode("utf-8")).hexdigest()
@@ -1,30 +0,0 @@
1
- from types import SimpleNamespace
2
-
3
- from codespine.indexer.call_resolver import resolve_calls
4
-
5
-
6
- def test_resolver_prefers_receiver_type_and_arity():
7
- method_catalog = {
8
- "src": {"name": "entry", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "entry()"},
9
- "m1": {"name": "run", "param_count": 0, "class_fqcn": "com.example.Service", "signature": "run()"},
10
- "m2": {"name": "run", "param_count": 1, "class_fqcn": "com.example.Service", "signature": "run(String)"},
11
- "m3": {"name": "save", "param_count": 0, "class_fqcn": "com.example.Repo", "signature": "save()"},
12
- }
13
- calls = {
14
- "src": [
15
- SimpleNamespace(name="run", receiver="this", arg_count=0),
16
- SimpleNamespace(name="save", receiver="repo", arg_count=0),
17
- ]
18
- }
19
- method_context = {
20
- "src": {
21
- "class_fqcn": "com.example.Service",
22
- "local_types": {"repo": "Repo"},
23
- "field_types": {},
24
- }
25
- }
26
- class_catalog = {"Service": ["com.example.Service"], "Repo": ["com.example.Repo"]}
27
-
28
- out = resolve_calls(method_catalog, calls, method_context, class_catalog)
29
- assert ("src", "m1", 1.0, "receiver_this_exact") in out
30
- assert ("src", "m3", 0.8, "receiver_method_match") in out
File without changes
File without changes
File without changes
File without changes