codespine 1.0.1__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.1 → codespine-1.0.3}/PKG-INFO +1 -1
- {codespine-1.0.1 → codespine-1.0.3}/codespine/__init__.py +1 -1
- {codespine-1.0.1 → codespine-1.0.3}/codespine/db/duckdb_store.py +69 -25
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.1 → codespine-1.0.3}/pyproject.toml +1 -1
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_duckdb_store.py +47 -0
- {codespine-1.0.1 → codespine-1.0.3}/LICENSE +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/README.md +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/community.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/context.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/cli.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/config.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/db/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/db/schema.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/db/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/guide.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/call_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/engine.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/mcp/server.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/overlay/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/bm25.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/rrf.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/search/vector.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/router.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/sharding/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/gindex.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/setup.cfg +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_community_detection.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_deadcode.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_java_parser.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_overlay.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_result_cache.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_sharding.py +0 -0
- {codespine-1.0.1 → codespine-1.0.3}/tests/test_store_recovery.py +0 -0
|
@@ -41,6 +41,60 @@ LOGGER = logging.getLogger(__name__)
|
|
|
41
41
|
_VECTOR_DIM = SETTINGS.vector_dim # 384
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
def _remove_path(path: str) -> None:
|
|
45
|
+
"""Remove *path* whether it is a file, symlink, or directory tree."""
|
|
46
|
+
try:
|
|
47
|
+
if os.path.isdir(path) and not os.path.islink(path):
|
|
48
|
+
shutil.rmtree(path)
|
|
49
|
+
elif os.path.exists(path) or os.path.islink(path):
|
|
50
|
+
os.remove(path)
|
|
51
|
+
except OSError as exc:
|
|
52
|
+
LOGGER.warning("Could not remove %s: %s", path, exc)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _sanitize_db_path(path: str) -> None:
|
|
56
|
+
"""Ensure *path* is either absent or a valid DuckDB database file.
|
|
57
|
+
|
|
58
|
+
KùzuDB leaves directory-trees at the same paths DuckDB expects as files,
|
|
59
|
+
and half-written snapshots can leave zero-byte or corrupt files. We
|
|
60
|
+
cheaply probe each path with a read-only DuckDB connect; if that raises
|
|
61
|
+
any ``IOException``, whatever is there isn't a valid DuckDB database and
|
|
62
|
+
we delete it so the subsequent real open starts from a clean slate.
|
|
63
|
+
"""
|
|
64
|
+
if not os.path.exists(path) and not os.path.islink(path):
|
|
65
|
+
return # nothing there — nothing to do
|
|
66
|
+
|
|
67
|
+
# Any directory is by definition not a DuckDB database file.
|
|
68
|
+
if os.path.isdir(path) and not os.path.islink(path):
|
|
69
|
+
LOGGER.info(
|
|
70
|
+
"Removing non-DuckDB directory at %s (likely legacy KùzuDB layout) — "
|
|
71
|
+
"re-index with 'codespine analyse' to rebuild.",
|
|
72
|
+
path,
|
|
73
|
+
)
|
|
74
|
+
_remove_path(path)
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
# Regular file — try a throw-away read-only open to verify it's a DB.
|
|
78
|
+
# IOException → file exists but is not a valid DuckDB database → remove.
|
|
79
|
+
# Connection/Catalog/Other exceptions → file is valid DuckDB (possibly
|
|
80
|
+
# already open by another connection in this process); leave it alone.
|
|
81
|
+
try:
|
|
82
|
+
probe = duckdb.connect(path, read_only=True)
|
|
83
|
+
probe.close()
|
|
84
|
+
except duckdb.IOException as exc:
|
|
85
|
+
LOGGER.info(
|
|
86
|
+
"Removing invalid DB file at %s (%s) — re-index with "
|
|
87
|
+
"'codespine analyse' to rebuild.",
|
|
88
|
+
path,
|
|
89
|
+
exc,
|
|
90
|
+
)
|
|
91
|
+
_remove_path(path)
|
|
92
|
+
except Exception:
|
|
93
|
+
# File is a valid DuckDB but we can't open it right now (in-use, perms,
|
|
94
|
+
# etc.) — not our problem to fix here; let the real open surface it.
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
44
98
|
# ---------------------------------------------------------------------------
|
|
45
99
|
# Schema DDL
|
|
46
100
|
# ---------------------------------------------------------------------------
|
|
@@ -201,41 +255,31 @@ class DuckDBStore:
|
|
|
201
255
|
from codespine.overlay.store import OverlayStore
|
|
202
256
|
self.overlay_store = OverlayStore()
|
|
203
257
|
|
|
204
|
-
# Prefer snapshot for read-only access; fall back to write path.
|
|
205
|
-
snap_exists = os.path.exists(self._snapshot_path)
|
|
206
|
-
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
207
|
-
|
|
208
258
|
# ----------------------------------------------------------------
|
|
209
|
-
#
|
|
210
|
-
#
|
|
211
|
-
#
|
|
212
|
-
#
|
|
213
|
-
# open
|
|
259
|
+
# Pre-flight sanitize: KùzuDB may have left directories or partial
|
|
260
|
+
# files at the paths DuckDB is about to use. Probe each path with a
|
|
261
|
+
# throw-away read-only connect — if it fails, whatever is there is
|
|
262
|
+
# not a valid DuckDB database, so remove it. This runs BEFORE the
|
|
263
|
+
# real open so we never hit a mid-fallback failure mode.
|
|
214
264
|
# ----------------------------------------------------------------
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
"Removing legacy KùzuDB directory at %s — "
|
|
219
|
-
"re-index with 'codespine analyse' to rebuild.",
|
|
220
|
-
legacy_path,
|
|
221
|
-
)
|
|
222
|
-
shutil.rmtree(legacy_path)
|
|
265
|
+
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
|
|
266
|
+
for p in (self._db_path, self._snapshot_path):
|
|
267
|
+
_sanitize_db_path(p)
|
|
223
268
|
|
|
224
|
-
#
|
|
269
|
+
# After sanitize, pick the file we actually open.
|
|
225
270
|
snap_exists = os.path.exists(self._snapshot_path)
|
|
226
271
|
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
227
272
|
|
|
228
|
-
#
|
|
229
|
-
#
|
|
230
|
-
|
|
231
|
-
if read_only and not snap_exists and not os.path.exists(self._db_path):
|
|
232
|
-
# No data at all — open an in-memory DB so queries return [] cleanly.
|
|
273
|
+
# Read-only open with nothing on disk → in-memory empty DB so queries
|
|
274
|
+
# return [] cleanly instead of "database does not exist".
|
|
275
|
+
if read_only and not os.path.exists(db_file):
|
|
233
276
|
self._conn = duckdb.connect(":memory:")
|
|
234
277
|
self._ensure_schema()
|
|
235
278
|
return
|
|
236
279
|
|
|
237
|
-
|
|
238
|
-
|
|
280
|
+
self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
|
|
281
|
+
db_file, read_only=read_only
|
|
282
|
+
)
|
|
239
283
|
if not read_only:
|
|
240
284
|
self._ensure_schema()
|
|
241
285
|
|
|
@@ -399,3 +399,50 @@ def test_sharded_duckdb_multi_project_isolation(tmp_path: Path):
|
|
|
399
399
|
all_ids = {p["id"] for p in sg.list_project_metadata()}
|
|
400
400
|
assert pid_a in all_ids
|
|
401
401
|
assert pid_b in all_ids
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ---------------------------------------------------------------------------
|
|
405
|
+
# Legacy KùzuDB artifact recovery
|
|
406
|
+
# ---------------------------------------------------------------------------
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def test_legacy_kuzu_directory_at_db_path_is_removed(tmp_path: Path):
|
|
410
|
+
"""DuckDBStore auto-removes a KùzuDB directory left at the DB path."""
|
|
411
|
+
db_path = str(tmp_path / "db")
|
|
412
|
+
snap_path = str(tmp_path / "db_read")
|
|
413
|
+
|
|
414
|
+
# Simulate a KùzuDB directory at the write path
|
|
415
|
+
os.makedirs(db_path)
|
|
416
|
+
(Path(db_path) / "nodes.index").write_bytes(b"\x00" * 16)
|
|
417
|
+
|
|
418
|
+
store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
|
|
419
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
420
|
+
assert rows == [] # fresh empty DB, no crash
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def test_legacy_kuzu_file_at_snapshot_path_is_removed(tmp_path: Path):
|
|
424
|
+
"""DuckDBStore auto-removes a non-DuckDB file left at the snapshot path."""
|
|
425
|
+
db_path = str(tmp_path / "db")
|
|
426
|
+
snap_path = str(tmp_path / "db_read")
|
|
427
|
+
|
|
428
|
+
# Write a KùzuDB-style snapshot *directory* at the snap path
|
|
429
|
+
os.makedirs(snap_path)
|
|
430
|
+
(Path(snap_path) / "catalog.json").write_bytes(b"{}")
|
|
431
|
+
|
|
432
|
+
# Open read-only — should clear the bad snapshot and open fresh
|
|
433
|
+
store = DuckDBStore(read_only=True, db_path_override=db_path, snapshot_path_override=snap_path)
|
|
434
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
435
|
+
assert rows == []
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def test_corrupt_file_at_db_path_is_replaced(tmp_path: Path):
|
|
439
|
+
"""DuckDBStore replaces a corrupt (non-DuckDB) regular file at the DB path."""
|
|
440
|
+
db_path = str(tmp_path / "db")
|
|
441
|
+
snap_path = str(tmp_path / "db_read")
|
|
442
|
+
|
|
443
|
+
# Write garbage that DuckDB cannot open
|
|
444
|
+
Path(db_path).write_bytes(b"NOT A DUCKDB FILE\x00\x01\x02")
|
|
445
|
+
|
|
446
|
+
store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
|
|
447
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
448
|
+
assert rows == []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|