codespine 1.0.1__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.1 → codespine-1.0.2}/PKG-INFO +1 -1
- {codespine-1.0.1 → codespine-1.0.2}/codespine/__init__.py +1 -1
- {codespine-1.0.1 → codespine-1.0.2}/codespine/db/duckdb_store.py +42 -26
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.1 → codespine-1.0.2}/pyproject.toml +1 -1
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_duckdb_store.py +47 -0
- {codespine-1.0.1 → codespine-1.0.2}/LICENSE +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/README.md +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/community.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/context.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/cli.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/config.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/db/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/db/schema.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/db/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/guide.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/call_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/engine.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/mcp/server.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/overlay/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/bm25.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/rrf.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/search/vector.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/sharding/router.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/sharding/store.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/gindex.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/setup.cfg +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_community_detection.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_deadcode.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_java_parser.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_overlay.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_result_cache.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_sharding.py +0 -0
- {codespine-1.0.1 → codespine-1.0.2}/tests/test_store_recovery.py +0 -0
|
@@ -41,6 +41,17 @@ LOGGER = logging.getLogger(__name__)
|
|
|
41
41
|
_VECTOR_DIM = SETTINGS.vector_dim # 384
|
|
42
42
|
|
|
43
43
|
|
|
44
|
+
def _remove_path(path: str) -> None:
|
|
45
|
+
"""Remove *path* whether it is a file, symlink, or directory tree."""
|
|
46
|
+
try:
|
|
47
|
+
if os.path.isdir(path) and not os.path.islink(path):
|
|
48
|
+
shutil.rmtree(path)
|
|
49
|
+
elif os.path.exists(path) or os.path.islink(path):
|
|
50
|
+
os.remove(path)
|
|
51
|
+
except OSError as exc:
|
|
52
|
+
LOGGER.warning("Could not remove %s: %s", path, exc)
|
|
53
|
+
|
|
54
|
+
|
|
44
55
|
# ---------------------------------------------------------------------------
|
|
45
56
|
# Schema DDL
|
|
46
57
|
# ---------------------------------------------------------------------------
|
|
@@ -206,36 +217,41 @@ class DuckDBStore:
|
|
|
206
217
|
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
207
218
|
|
|
208
219
|
# ----------------------------------------------------------------
|
|
209
|
-
#
|
|
210
|
-
# KùzuDB
|
|
211
|
-
#
|
|
212
|
-
#
|
|
213
|
-
#
|
|
220
|
+
# Robust open: handle legacy KùzuDB artifacts at the target path.
|
|
221
|
+
# KùzuDB may leave a directory, a partial file, or a 0-byte sentinel
|
|
222
|
+
# at the same path DuckDB expects. Rather than guessing the type,
|
|
223
|
+
# we attempt to connect and on any IOException we wipe whatever is
|
|
224
|
+
# there and retry once with a clean slate.
|
|
214
225
|
# ----------------------------------------------------------------
|
|
215
|
-
|
|
216
|
-
|
|
226
|
+
os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
|
|
227
|
+
for attempt in range(2):
|
|
228
|
+
# If read-only and the target file doesn't exist, we have nothing
|
|
229
|
+
# to read — use an in-memory DB so callers get [] instead of crash.
|
|
230
|
+
if read_only and not os.path.exists(db_file):
|
|
231
|
+
self._conn = duckdb.connect(":memory:")
|
|
232
|
+
self._ensure_schema()
|
|
233
|
+
return
|
|
234
|
+
|
|
235
|
+
try:
|
|
236
|
+
self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
|
|
237
|
+
db_file, read_only=read_only
|
|
238
|
+
)
|
|
239
|
+
break
|
|
240
|
+
except duckdb.IOException as exc:
|
|
241
|
+
if attempt > 0:
|
|
242
|
+
raise # second attempt also failed — give up
|
|
217
243
|
LOGGER.info(
|
|
218
|
-
"
|
|
219
|
-
"
|
|
220
|
-
|
|
244
|
+
"Cannot open DB at %s (%s) — removing stale artifact "
|
|
245
|
+
"and starting fresh. Re-index with 'codespine analyse'.",
|
|
246
|
+
db_file,
|
|
247
|
+
exc,
|
|
221
248
|
)
|
|
222
|
-
|
|
249
|
+
_remove_path(db_file)
|
|
250
|
+
# If the bad path was the snapshot, fall back to the write DB.
|
|
251
|
+
if db_file == self._snapshot_path:
|
|
252
|
+
db_file = self._db_path
|
|
253
|
+
os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
|
|
223
254
|
|
|
224
|
-
# Re-evaluate db_file after possible cleanup.
|
|
225
|
-
snap_exists = os.path.exists(self._snapshot_path)
|
|
226
|
-
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
227
|
-
|
|
228
|
-
# When opening read-only and no snapshot exists yet, open the write
|
|
229
|
-
# DB in read-only mode so callers get an empty-but-valid store rather
|
|
230
|
-
# than an error.
|
|
231
|
-
if read_only and not snap_exists and not os.path.exists(self._db_path):
|
|
232
|
-
# No data at all — open an in-memory DB so queries return [] cleanly.
|
|
233
|
-
self._conn = duckdb.connect(":memory:")
|
|
234
|
-
self._ensure_schema()
|
|
235
|
-
return
|
|
236
|
-
|
|
237
|
-
os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
|
|
238
|
-
self._conn: duckdb.DuckDBPyConnection = duckdb.connect(db_file, read_only=read_only)
|
|
239
255
|
if not read_only:
|
|
240
256
|
self._ensure_schema()
|
|
241
257
|
|
|
@@ -399,3 +399,50 @@ def test_sharded_duckdb_multi_project_isolation(tmp_path: Path):
|
|
|
399
399
|
all_ids = {p["id"] for p in sg.list_project_metadata()}
|
|
400
400
|
assert pid_a in all_ids
|
|
401
401
|
assert pid_b in all_ids
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
# ---------------------------------------------------------------------------
|
|
405
|
+
# Legacy KùzuDB artifact recovery
|
|
406
|
+
# ---------------------------------------------------------------------------
|
|
407
|
+
|
|
408
|
+
|
|
409
|
+
def test_legacy_kuzu_directory_at_db_path_is_removed(tmp_path: Path):
|
|
410
|
+
"""DuckDBStore auto-removes a KùzuDB directory left at the DB path."""
|
|
411
|
+
db_path = str(tmp_path / "db")
|
|
412
|
+
snap_path = str(tmp_path / "db_read")
|
|
413
|
+
|
|
414
|
+
# Simulate a KùzuDB directory at the write path
|
|
415
|
+
os.makedirs(db_path)
|
|
416
|
+
(Path(db_path) / "nodes.index").write_bytes(b"\x00" * 16)
|
|
417
|
+
|
|
418
|
+
store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
|
|
419
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
420
|
+
assert rows == [] # fresh empty DB, no crash
|
|
421
|
+
|
|
422
|
+
|
|
423
|
+
def test_legacy_kuzu_file_at_snapshot_path_is_removed(tmp_path: Path):
|
|
424
|
+
"""DuckDBStore auto-removes a non-DuckDB file left at the snapshot path."""
|
|
425
|
+
db_path = str(tmp_path / "db")
|
|
426
|
+
snap_path = str(tmp_path / "db_read")
|
|
427
|
+
|
|
428
|
+
# Write a KùzuDB-style snapshot *directory* at the snap path
|
|
429
|
+
os.makedirs(snap_path)
|
|
430
|
+
(Path(snap_path) / "catalog.json").write_bytes(b"{}")
|
|
431
|
+
|
|
432
|
+
# Open read-only — should clear the bad snapshot and open fresh
|
|
433
|
+
store = DuckDBStore(read_only=True, db_path_override=db_path, snapshot_path_override=snap_path)
|
|
434
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
435
|
+
assert rows == []
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def test_corrupt_file_at_db_path_is_replaced(tmp_path: Path):
|
|
439
|
+
"""DuckDBStore replaces a corrupt (non-DuckDB) regular file at the DB path."""
|
|
440
|
+
db_path = str(tmp_path / "db")
|
|
441
|
+
snap_path = str(tmp_path / "db_read")
|
|
442
|
+
|
|
443
|
+
# Write garbage that DuckDB cannot open
|
|
444
|
+
Path(db_path).write_bytes(b"NOT A DUCKDB FILE\x00\x01\x02")
|
|
445
|
+
|
|
446
|
+
store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
|
|
447
|
+
rows = store.query_records("SELECT * FROM projects")
|
|
448
|
+
assert rows == []
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|