codespine 1.0.1__tar.gz → 1.0.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {codespine-1.0.1 → codespine-1.0.3}/PKG-INFO +1 -1
  2. {codespine-1.0.1 → codespine-1.0.3}/codespine/__init__.py +1 -1
  3. {codespine-1.0.1 → codespine-1.0.3}/codespine/db/duckdb_store.py +69 -25
  4. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/PKG-INFO +1 -1
  5. {codespine-1.0.1 → codespine-1.0.3}/pyproject.toml +1 -1
  6. {codespine-1.0.1 → codespine-1.0.3}/tests/test_duckdb_store.py +47 -0
  7. {codespine-1.0.1 → codespine-1.0.3}/LICENSE +0 -0
  8. {codespine-1.0.1 → codespine-1.0.3}/README.md +0 -0
  9. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/__init__.py +0 -0
  10. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/community.py +0 -0
  11. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/context.py +0 -0
  12. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/coupling.py +0 -0
  13. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/crossmodule.py +0 -0
  14. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/deadcode.py +0 -0
  15. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/flow.py +0 -0
  16. {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/impact.py +0 -0
  17. {codespine-1.0.1 → codespine-1.0.3}/codespine/cache/__init__.py +0 -0
  18. {codespine-1.0.1 → codespine-1.0.3}/codespine/cache/result_cache.py +0 -0
  19. {codespine-1.0.1 → codespine-1.0.3}/codespine/cli.py +0 -0
  20. {codespine-1.0.1 → codespine-1.0.3}/codespine/config.py +0 -0
  21. {codespine-1.0.1 → codespine-1.0.3}/codespine/db/__init__.py +0 -0
  22. {codespine-1.0.1 → codespine-1.0.3}/codespine/db/_cypher_compat.py +0 -0
  23. {codespine-1.0.1 → codespine-1.0.3}/codespine/db/schema.py +0 -0
  24. {codespine-1.0.1 → codespine-1.0.3}/codespine/db/store.py +0 -0
  25. {codespine-1.0.1 → codespine-1.0.3}/codespine/diff/__init__.py +0 -0
  26. {codespine-1.0.1 → codespine-1.0.3}/codespine/diff/branch_diff.py +0 -0
  27. {codespine-1.0.1 → codespine-1.0.3}/codespine/guide.py +0 -0
  28. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/__init__.py +0 -0
  29. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/call_resolver.py +0 -0
  30. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/di_resolver.py +0 -0
  31. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/engine.py +0 -0
  32. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/java_parser.py +0 -0
  33. {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/symbol_builder.py +0 -0
  34. {codespine-1.0.1 → codespine-1.0.3}/codespine/mcp/__init__.py +0 -0
  35. {codespine-1.0.1 → codespine-1.0.3}/codespine/mcp/server.py +0 -0
  36. {codespine-1.0.1 → codespine-1.0.3}/codespine/noise/__init__.py +0 -0
  37. {codespine-1.0.1 → codespine-1.0.3}/codespine/noise/blocklist.py +0 -0
  38. {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/__init__.py +0 -0
  39. {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/git_state.py +0 -0
  40. {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/merge.py +0 -0
  41. {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/store.py +0 -0
  42. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/__init__.py +0 -0
  43. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/bm25.py +0 -0
  44. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/fuzzy.py +0 -0
  45. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/hybrid.py +0 -0
  46. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/rrf.py +0 -0
  47. {codespine-1.0.1 → codespine-1.0.3}/codespine/search/vector.py +0 -0
  48. {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/__init__.py +0 -0
  49. {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/router.py +0 -0
  50. {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/store.py +0 -0
  51. {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/__init__.py +0 -0
  52. {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/git_hook.py +0 -0
  53. {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/watcher.py +0 -0
  54. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/SOURCES.txt +0 -0
  55. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/dependency_links.txt +0 -0
  56. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/entry_points.txt +0 -0
  57. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/requires.txt +0 -0
  58. {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/top_level.txt +0 -0
  59. {codespine-1.0.1 → codespine-1.0.3}/gindex.py +0 -0
  60. {codespine-1.0.1 → codespine-1.0.3}/setup.cfg +0 -0
  61. {codespine-1.0.1 → codespine-1.0.3}/tests/test_branch_diff_normalize.py +0 -0
  62. {codespine-1.0.1 → codespine-1.0.3}/tests/test_call_resolver.py +0 -0
  63. {codespine-1.0.1 → codespine-1.0.3}/tests/test_community_detection.py +0 -0
  64. {codespine-1.0.1 → codespine-1.0.3}/tests/test_cypher_compat.py +0 -0
  65. {codespine-1.0.1 → codespine-1.0.3}/tests/test_deadcode.py +0 -0
  66. {codespine-1.0.1 → codespine-1.0.3}/tests/test_index_and_hybrid.py +0 -0
  67. {codespine-1.0.1 → codespine-1.0.3}/tests/test_java_parser.py +0 -0
  68. {codespine-1.0.1 → codespine-1.0.3}/tests/test_multimodule_index.py +0 -0
  69. {codespine-1.0.1 → codespine-1.0.3}/tests/test_overlay.py +0 -0
  70. {codespine-1.0.1 → codespine-1.0.3}/tests/test_result_cache.py +0 -0
  71. {codespine-1.0.1 → codespine-1.0.3}/tests/test_search_ranking.py +0 -0
  72. {codespine-1.0.1 → codespine-1.0.3}/tests/test_sharding.py +0 -0
  73. {codespine-1.0.1 → codespine-1.0.3}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.1"
4
+ __version__ = "1.0.3"
@@ -41,6 +41,60 @@ LOGGER = logging.getLogger(__name__)
41
41
  _VECTOR_DIM = SETTINGS.vector_dim # 384
42
42
 
43
43
 
44
+ def _remove_path(path: str) -> None:
45
+ """Remove *path* whether it is a file, symlink, or directory tree."""
46
+ try:
47
+ if os.path.isdir(path) and not os.path.islink(path):
48
+ shutil.rmtree(path)
49
+ elif os.path.exists(path) or os.path.islink(path):
50
+ os.remove(path)
51
+ except OSError as exc:
52
+ LOGGER.warning("Could not remove %s: %s", path, exc)
53
+
54
+
55
+ def _sanitize_db_path(path: str) -> None:
56
+ """Ensure *path* is either absent or a valid DuckDB database file.
57
+
58
+ KùzuDB leaves directory-trees at the same paths DuckDB expects as files,
59
+ and half-written snapshots can leave zero-byte or corrupt files. We
60
+ cheaply probe each path with a read-only DuckDB connect; if that raises
61
+ any ``IOException``, whatever is there isn't a valid DuckDB database and
62
+ we delete it so the subsequent real open starts from a clean slate.
63
+ """
64
+ if not os.path.exists(path) and not os.path.islink(path):
65
+ return # nothing there — nothing to do
66
+
67
+ # Any directory is by definition not a DuckDB database file.
68
+ if os.path.isdir(path) and not os.path.islink(path):
69
+ LOGGER.info(
70
+ "Removing non-DuckDB directory at %s (likely legacy KùzuDB layout) — "
71
+ "re-index with 'codespine analyse' to rebuild.",
72
+ path,
73
+ )
74
+ _remove_path(path)
75
+ return
76
+
77
+ # Regular file — try a throw-away read-only open to verify it's a DB.
78
+ # IOException → file exists but is not a valid DuckDB database → remove.
79
+ # Connection/Catalog/Other exceptions → file is valid DuckDB (possibly
80
+ # already open by another connection in this process); leave it alone.
81
+ try:
82
+ probe = duckdb.connect(path, read_only=True)
83
+ probe.close()
84
+ except duckdb.IOException as exc:
85
+ LOGGER.info(
86
+ "Removing invalid DB file at %s (%s) — re-index with "
87
+ "'codespine analyse' to rebuild.",
88
+ path,
89
+ exc,
90
+ )
91
+ _remove_path(path)
92
+ except Exception:
93
+ # File is a valid DuckDB but we can't open it right now (in-use, perms,
94
+ # etc.) — not our problem to fix here; let the real open surface it.
95
+ pass
96
+
97
+
44
98
  # ---------------------------------------------------------------------------
45
99
  # Schema DDL
46
100
  # ---------------------------------------------------------------------------
@@ -201,41 +255,31 @@ class DuckDBStore:
201
255
  from codespine.overlay.store import OverlayStore
202
256
  self.overlay_store = OverlayStore()
203
257
 
204
- # Prefer snapshot for read-only access; fall back to write path.
205
- snap_exists = os.path.exists(self._snapshot_path)
206
- db_file = self._snapshot_path if read_only and snap_exists else self._db_path
207
-
208
258
  # ----------------------------------------------------------------
209
- # Legacy KùzuDB migration guard
210
- # KùzuDB stores its database as a *directory*; DuckDB uses a single
211
- # file. When CODESPINE_BACKEND is changed to "duckdb" an existing
212
- # KùzuDB shard directory sits at the same path and DuckDB refuses to
213
- # open it. Detect this case, wipe the directory, and start fresh.
259
+ # Pre-flight sanitize: KùzuDB may have left directories or partial
260
+ # files at the paths DuckDB is about to use. Probe each path with a
261
+ # throw-away read-only connect if it fails, whatever is there is
262
+ # not a valid DuckDB database, so remove it. This runs BEFORE the
263
+ # real open so we never hit a mid-fallback failure mode.
214
264
  # ----------------------------------------------------------------
215
- for legacy_path in (self._db_path, self._snapshot_path):
216
- if os.path.isdir(legacy_path):
217
- LOGGER.info(
218
- "Removing legacy KùzuDB directory at %s — "
219
- "re-index with 'codespine analyse' to rebuild.",
220
- legacy_path,
221
- )
222
- shutil.rmtree(legacy_path)
265
+ os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
266
+ for p in (self._db_path, self._snapshot_path):
267
+ _sanitize_db_path(p)
223
268
 
224
- # Re-evaluate db_file after possible cleanup.
269
+ # After sanitize, pick the file we actually open.
225
270
  snap_exists = os.path.exists(self._snapshot_path)
226
271
  db_file = self._snapshot_path if read_only and snap_exists else self._db_path
227
272
 
228
- # When opening read-only and no snapshot exists yet, open the write
229
- # DB in read-only mode so callers get an empty-but-valid store rather
230
- # than an error.
231
- if read_only and not snap_exists and not os.path.exists(self._db_path):
232
- # No data at all — open an in-memory DB so queries return [] cleanly.
273
+ # Read-only open with nothing on disk in-memory empty DB so queries
274
+ # return [] cleanly instead of "database does not exist".
275
+ if read_only and not os.path.exists(db_file):
233
276
  self._conn = duckdb.connect(":memory:")
234
277
  self._ensure_schema()
235
278
  return
236
279
 
237
- os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
238
- self._conn: duckdb.DuckDBPyConnection = duckdb.connect(db_file, read_only=read_only)
280
+ self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
281
+ db_file, read_only=read_only
282
+ )
239
283
  if not read_only:
240
284
  self._ensure_schema()
241
285
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.1
3
+ Version: 1.0.3
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "1.0.1"
7
+ version = "1.0.3"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -399,3 +399,50 @@ def test_sharded_duckdb_multi_project_isolation(tmp_path: Path):
399
399
  all_ids = {p["id"] for p in sg.list_project_metadata()}
400
400
  assert pid_a in all_ids
401
401
  assert pid_b in all_ids
402
+
403
+
404
+ # ---------------------------------------------------------------------------
405
+ # Legacy KùzuDB artifact recovery
406
+ # ---------------------------------------------------------------------------
407
+
408
+
409
+ def test_legacy_kuzu_directory_at_db_path_is_removed(tmp_path: Path):
410
+ """DuckDBStore auto-removes a KùzuDB directory left at the DB path."""
411
+ db_path = str(tmp_path / "db")
412
+ snap_path = str(tmp_path / "db_read")
413
+
414
+ # Simulate a KùzuDB directory at the write path
415
+ os.makedirs(db_path)
416
+ (Path(db_path) / "nodes.index").write_bytes(b"\x00" * 16)
417
+
418
+ store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
419
+ rows = store.query_records("SELECT * FROM projects")
420
+ assert rows == [] # fresh empty DB, no crash
421
+
422
+
423
+ def test_legacy_kuzu_file_at_snapshot_path_is_removed(tmp_path: Path):
424
+ """DuckDBStore auto-removes a non-DuckDB file left at the snapshot path."""
425
+ db_path = str(tmp_path / "db")
426
+ snap_path = str(tmp_path / "db_read")
427
+
428
+ # Write a KùzuDB-style snapshot *directory* at the snap path
429
+ os.makedirs(snap_path)
430
+ (Path(snap_path) / "catalog.json").write_bytes(b"{}")
431
+
432
+ # Open read-only — should clear the bad snapshot and open fresh
433
+ store = DuckDBStore(read_only=True, db_path_override=db_path, snapshot_path_override=snap_path)
434
+ rows = store.query_records("SELECT * FROM projects")
435
+ assert rows == []
436
+
437
+
438
+ def test_corrupt_file_at_db_path_is_replaced(tmp_path: Path):
439
+ """DuckDBStore replaces a corrupt (non-DuckDB) regular file at the DB path."""
440
+ db_path = str(tmp_path / "db")
441
+ snap_path = str(tmp_path / "db_read")
442
+
443
+ # Write garbage that DuckDB cannot open
444
+ Path(db_path).write_bytes(b"NOT A DUCKDB FILE\x00\x01\x02")
445
+
446
+ store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
447
+ rows = store.query_records("SELECT * FROM projects")
448
+ assert rows == []
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes