codespine 0.1.8__tar.gz → 0.1.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. {codespine-0.1.8 → codespine-0.1.9}/PKG-INFO +1 -1
  2. {codespine-0.1.8 → codespine-0.1.9}/codespine/__init__.py +1 -1
  3. {codespine-0.1.8 → codespine-0.1.9}/codespine/cli.py +4 -3
  4. {codespine-0.1.8 → codespine-0.1.9}/codespine/config.py +1 -0
  5. {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/engine.py +132 -20
  6. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/PKG-INFO +1 -1
  7. {codespine-0.1.8 → codespine-0.1.9}/pyproject.toml +1 -1
  8. {codespine-0.1.8 → codespine-0.1.9}/tests/test_index_and_hybrid.py +13 -0
  9. {codespine-0.1.8 → codespine-0.1.9}/LICENSE +0 -0
  10. {codespine-0.1.8 → codespine-0.1.9}/README.md +0 -0
  11. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/__init__.py +0 -0
  12. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/community.py +0 -0
  13. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/context.py +0 -0
  14. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/coupling.py +0 -0
  15. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/deadcode.py +0 -0
  16. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/flow.py +0 -0
  17. {codespine-0.1.8 → codespine-0.1.9}/codespine/analysis/impact.py +0 -0
  18. {codespine-0.1.8 → codespine-0.1.9}/codespine/db/__init__.py +0 -0
  19. {codespine-0.1.8 → codespine-0.1.9}/codespine/db/schema.py +0 -0
  20. {codespine-0.1.8 → codespine-0.1.9}/codespine/db/store.py +0 -0
  21. {codespine-0.1.8 → codespine-0.1.9}/codespine/diff/__init__.py +0 -0
  22. {codespine-0.1.8 → codespine-0.1.9}/codespine/diff/branch_diff.py +0 -0
  23. {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/__init__.py +0 -0
  24. {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/call_resolver.py +0 -0
  25. {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/java_parser.py +0 -0
  26. {codespine-0.1.8 → codespine-0.1.9}/codespine/indexer/symbol_builder.py +0 -0
  27. {codespine-0.1.8 → codespine-0.1.9}/codespine/mcp/__init__.py +0 -0
  28. {codespine-0.1.8 → codespine-0.1.9}/codespine/mcp/server.py +0 -0
  29. {codespine-0.1.8 → codespine-0.1.9}/codespine/noise/__init__.py +0 -0
  30. {codespine-0.1.8 → codespine-0.1.9}/codespine/noise/blocklist.py +0 -0
  31. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/__init__.py +0 -0
  32. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/bm25.py +0 -0
  33. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/fuzzy.py +0 -0
  34. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/hybrid.py +0 -0
  35. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/rrf.py +0 -0
  36. {codespine-0.1.8 → codespine-0.1.9}/codespine/search/vector.py +0 -0
  37. {codespine-0.1.8 → codespine-0.1.9}/codespine/watch/__init__.py +0 -0
  38. {codespine-0.1.8 → codespine-0.1.9}/codespine/watch/watcher.py +0 -0
  39. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/SOURCES.txt +0 -0
  40. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/dependency_links.txt +0 -0
  41. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/entry_points.txt +0 -0
  42. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/requires.txt +0 -0
  43. {codespine-0.1.8 → codespine-0.1.9}/codespine.egg-info/top_level.txt +0 -0
  44. {codespine-0.1.8 → codespine-0.1.9}/gindex.py +0 -0
  45. {codespine-0.1.8 → codespine-0.1.9}/setup.cfg +0 -0
  46. {codespine-0.1.8 → codespine-0.1.9}/tests/test_branch_diff_normalize.py +0 -0
  47. {codespine-0.1.8 → codespine-0.1.9}/tests/test_call_resolver.py +0 -0
  48. {codespine-0.1.8 → codespine-0.1.9}/tests/test_java_parser.py +0 -0
  49. {codespine-0.1.8 → codespine-0.1.9}/tests/test_multimodule_index.py +0 -0
  50. {codespine-0.1.8 → codespine-0.1.9}/tests/test_search_ranking.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.1.8"
4
+ __version__ = "0.1.9"
@@ -77,7 +77,7 @@ def main() -> None:
77
77
 
78
78
  @main.command()
79
79
  @click.argument("path", type=click.Path(exists=True))
80
- @click.option("--full/--incremental", default=True, show_default=True)
80
+ @click.option("--full/--incremental", default=False, show_default=True)
81
81
  @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
82
82
  def analyse(path: str, full: bool, deep: bool) -> None:
83
83
  """Index a local Java project."""
@@ -89,7 +89,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
89
89
  abs_path = os.path.abspath(path)
90
90
  store = GraphStore(read_only=False)
91
91
  indexer = JavaIndexer(store)
92
- parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0}
92
+ parse_state = {"shown": False, "indexed": 0, "total": 0, "last_ts": 0.0, "printed_zero": False}
93
93
  call_state = {"shown": False, "count": 0, "last_ts": 0.0}
94
94
 
95
95
  def _progress(event: str, payload: dict) -> None:
@@ -105,6 +105,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
105
105
  _phase("Index mode...", f"{mode} ({to_index} files to index, {deleted} deleted)")
106
106
  if to_index == 0:
107
107
  _phase("Parsing code...", "0/0")
108
+ parse_state["printed_zero"] = True
108
109
  return
109
110
  if event == "parse_progress":
110
111
  indexed = int(payload.get("indexed", 0))
@@ -149,7 +150,7 @@ def analyse(path: str, full: bool, deep: bool) -> None:
149
150
  result = indexer.index_project(abs_path, full=full, progress=_progress)
150
151
  if parse_state["shown"]:
151
152
  click.echo()
152
- if parse_state["total"] == 0:
153
+ if parse_state["total"] == 0 and not parse_state["printed_zero"]:
153
154
  _phase("Parsing code...", "0/0")
154
155
  elif parse_state["indexed"] < parse_state["total"]:
155
156
  _phase("Parsing code...", f"{parse_state['indexed']}/{parse_state['total']}")
@@ -8,6 +8,7 @@ class Settings:
8
8
  pid_file: str = os.path.expanduser("~/.codespine.pid")
9
9
  log_file: str = os.path.expanduser("~/.codespine.log")
10
10
  embedding_cache_db: str = os.path.expanduser("~/.codespine_embedding_cache.sqlite3")
11
+ index_meta_dir: str = os.path.expanduser("~/.codespine_index_meta")
11
12
  embedding_model: str = "BAAI/bge-small-en-v1.5"
12
13
  vector_dim: int = 384
13
14
  rrf_k: int = 60
@@ -1,9 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import json
3
4
  import os
4
5
  from dataclasses import dataclass
5
6
  from typing import Callable
6
7
 
8
+ from codespine.config import SETTINGS
7
9
  from codespine.indexer.call_resolver import resolve_calls
8
10
  from codespine.indexer.java_parser import parse_java_source
9
11
  from codespine.indexer.symbol_builder import class_id, digest_bytes, file_id, method_id, symbol_id
@@ -37,21 +39,24 @@ class JavaIndexer:
37
39
  current_files = self._collect_java_files(root_path)
38
40
  self._emit(progress, "scan_done", files_found=len(current_files))
39
41
  db_files = self.store.project_file_hashes(project_id) if not full else {}
40
- current_hashes = self._hash_files(project_id, root_path, current_files) if not full else {}
42
+ meta_cache = self._load_file_meta_cache(project_id)
43
+ current_file_ids = {
44
+ file_id(project_id, os.path.relpath(fp, root_path))
45
+ for fp in current_files
46
+ }
41
47
 
42
48
  if full:
43
49
  to_reindex = current_files
44
50
  deleted_file_ids = []
51
+ meta_cache = {}
45
52
  else:
46
- to_reindex = []
47
- deleted_file_ids = [fid for fid in db_files if fid not in current_hashes]
48
- for file_path in current_files:
49
- rel_path = os.path.relpath(file_path, root_path)
50
- fid = file_id(project_id, rel_path)
51
- digest = current_hashes[fid]
52
- old = db_files.get(fid, {}).get("hash")
53
- if old != digest:
54
- to_reindex.append(file_path)
53
+ to_reindex, deleted_file_ids, meta_cache = self._plan_incremental(
54
+ project_id,
55
+ root_path,
56
+ current_files,
57
+ db_files,
58
+ meta_cache,
59
+ )
55
60
  self._emit(
56
61
  progress,
57
62
  "plan_done",
@@ -59,6 +64,21 @@ class JavaIndexer:
59
64
  deleted_files=len(deleted_file_ids),
60
65
  mode="full" if full else "incremental",
61
66
  )
67
+ if not full and not to_reindex and not deleted_file_ids:
68
+ self._prune_meta_cache(meta_cache, current_file_ids)
69
+ self._save_file_meta_cache(project_id, meta_cache)
70
+ self._emit(progress, "resolve_calls_done", calls_resolved=0)
71
+ self._emit(progress, "resolve_types_done", type_relationships=0)
72
+ return IndexResult(
73
+ project_id=project_id,
74
+ files_found=len(current_files),
75
+ files_indexed=0,
76
+ classes_indexed=0,
77
+ methods_indexed=0,
78
+ calls_resolved=0,
79
+ type_relationships=0,
80
+ embeddings_generated=0,
81
+ )
62
82
 
63
83
  files_indexed = 0
64
84
  classes_indexed = 0
@@ -92,10 +112,12 @@ class JavaIndexer:
92
112
 
93
113
  parsed = parse_java_source(source)
94
114
  f_id = file_id(project_id, rel_path)
115
+ file_digest = digest_bytes(source)
95
116
  if not full:
96
117
  # Drop old symbols/methods/classes for changed files before reinserting.
97
118
  self.store.clear_file(f_id)
98
- self.store.upsert_file(f_id, file_path, project_id, is_test, digest_bytes(source))
119
+ self.store.upsert_file(f_id, file_path, project_id, is_test, file_digest)
120
+ self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
99
121
 
100
122
  for cls in parsed.classes:
101
123
  c_id = class_id(cls.fqcn, scope)
@@ -199,6 +221,9 @@ class JavaIndexer:
199
221
  )
200
222
  self._emit(progress, "resolve_types_done", type_relationships=type_relationships)
201
223
 
224
+ self._prune_meta_cache(meta_cache, current_file_ids)
225
+ self._save_file_meta_cache(project_id, meta_cache)
226
+
202
227
  return IndexResult(
203
228
  project_id=project_id,
204
229
  files_found=len(current_files),
@@ -224,15 +249,52 @@ class JavaIndexer:
224
249
  out.append(os.path.join(root, filename))
225
250
  return out
226
251
 
227
- @staticmethod
228
- def _hash_files(project_id: str, root_path: str, files: list[str]) -> dict[str, str]:
229
- hashes: dict[str, str] = {}
230
- for fp in files:
231
- rel = os.path.relpath(fp, root_path)
232
- fid = file_id(project_id, rel)
233
- with open(fp, "rb") as f:
234
- hashes[fid] = digest_bytes(f.read())
235
- return hashes
252
+ def _plan_incremental(
253
+ self,
254
+ project_id: str,
255
+ root_path: str,
256
+ files: list[str],
257
+ db_files: dict[str, dict[str, str]],
258
+ meta_cache: dict[str, dict],
259
+ ) -> tuple[list[str], list[str], dict[str, dict]]:
260
+ current_ids = {
261
+ file_id(project_id, os.path.relpath(fp, root_path))
262
+ for fp in files
263
+ }
264
+ deleted_file_ids = [fid for fid in db_files if fid not in current_ids]
265
+ to_reindex: list[str] = []
266
+
267
+ for file_path in files:
268
+ rel_path = os.path.relpath(file_path, root_path)
269
+ fid = file_id(project_id, rel_path)
270
+ old_hash = db_files.get(fid, {}).get("hash")
271
+ try:
272
+ st = os.stat(file_path)
273
+ except OSError:
274
+ continue
275
+ mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
276
+ size = int(st.st_size)
277
+ cached = meta_cache.get(fid, {})
278
+
279
+ if (
280
+ cached
281
+ and int(cached.get("mtime_ns", -1)) == mtime_ns
282
+ and int(cached.get("size", -1)) == size
283
+ and cached.get("hash")
284
+ and cached.get("hash") == old_hash
285
+ ):
286
+ continue
287
+
288
+ with open(file_path, "rb") as f:
289
+ digest = digest_bytes(f.read())
290
+ meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
291
+ if old_hash != digest:
292
+ to_reindex.append(file_path)
293
+
294
+ for fid in deleted_file_ids:
295
+ meta_cache.pop(fid, None)
296
+
297
+ return to_reindex, deleted_file_ids, meta_cache
236
298
 
237
299
  def _existing_method_catalog(self, project_id: str) -> dict[str, dict]:
238
300
  recs = self.store.query_records(
@@ -370,6 +432,56 @@ class JavaIndexer:
370
432
  rel_count += 1
371
433
  return rel_count
372
434
 
435
+ @staticmethod
436
+ def _meta_cache_path(project_id: str) -> str:
437
+ base = SETTINGS.index_meta_dir
438
+ try:
439
+ os.makedirs(base, exist_ok=True)
440
+ except OSError:
441
+ base = "/tmp/.codespine_index_meta"
442
+ os.makedirs(base, exist_ok=True)
443
+ return os.path.join(base, f"{project_id}.json")
444
+
445
+ def _load_file_meta_cache(self, project_id: str) -> dict[str, dict]:
446
+ path = self._meta_cache_path(project_id)
447
+ if not os.path.exists(path):
448
+ return {}
449
+ try:
450
+ with open(path, "r", encoding="utf-8") as f:
451
+ data = json.load(f)
452
+ if isinstance(data, dict):
453
+ return data
454
+ except (OSError, ValueError, TypeError):
455
+ return {}
456
+ return {}
457
+
458
+ def _save_file_meta_cache(self, project_id: str, data: dict[str, dict]) -> None:
459
+ path = self._meta_cache_path(project_id)
460
+ tmp_path = f"{path}.tmp"
461
+ try:
462
+ with open(tmp_path, "w", encoding="utf-8") as f:
463
+ json.dump(data, f, separators=(",", ":"))
464
+ os.replace(tmp_path, path)
465
+ except OSError:
466
+ return
467
+
468
+ @staticmethod
469
+ def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
470
+ try:
471
+ st = os.stat(file_path)
472
+ mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
473
+ size = int(st.st_size)
474
+ except OSError:
475
+ mtime_ns = -1
476
+ size = size_hint
477
+ meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
478
+
479
+ @staticmethod
480
+ def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
481
+ for fid in list(meta_cache.keys()):
482
+ if fid not in current_file_ids:
483
+ del meta_cache[fid]
484
+
373
485
  @staticmethod
374
486
  def _emit(progress: Callable[[str, dict], None] | None, event: str, **payload: object) -> None:
375
487
  if progress is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.1.8
3
+ Version: 0.1.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.1.8"
7
+ version = "0.1.9"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -19,3 +19,16 @@ def test_index_and_hybrid_search():
19
19
  results = hybrid_search(store, "process payment", k=5)
20
20
  assert results
21
21
  assert any("processPayment" in (r.get("fqname") or "") for r in results)
22
+
23
+
24
+ def test_incremental_no_change_reindexes_zero_files():
25
+ fixture = Path(__file__).parent / "fixtures" / "java_simple"
26
+ store = GraphStore(read_only=False)
27
+ indexer = JavaIndexer(store)
28
+
29
+ first = indexer.index_project(str(fixture), full=True)
30
+ second = indexer.index_project(str(fixture), full=False)
31
+
32
+ assert first.files_found >= 2
33
+ assert second.files_found == first.files_found
34
+ assert second.files_indexed == 0
File without changes
File without changes
File without changes
File without changes