codespine 1.0.2__tar.gz → 1.0.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.2 → codespine-1.0.3}/PKG-INFO +1 -1
- {codespine-1.0.2 → codespine-1.0.3}/codespine/__init__.py +1 -1
- {codespine-1.0.2 → codespine-1.0.3}/codespine/db/duckdb_store.py +64 -36
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.2 → codespine-1.0.3}/pyproject.toml +1 -1
- {codespine-1.0.2 → codespine-1.0.3}/LICENSE +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/README.md +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/community.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/context.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/cli.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/config.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/db/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/db/_cypher_compat.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/db/schema.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/db/store.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/guide.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/call_resolver.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/engine.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/mcp/server.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/overlay/store.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/bm25.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/rrf.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/search/vector.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/sharding/router.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/sharding/store.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/gindex.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/setup.cfg +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_community_detection.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_cypher_compat.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_deadcode.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_duckdb_store.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_java_parser.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_overlay.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_result_cache.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_sharding.py +0 -0
- {codespine-1.0.2 → codespine-1.0.3}/tests/test_store_recovery.py +0 -0
|
@@ -52,6 +52,49 @@ def _remove_path(path: str) -> None:
|
|
|
52
52
|
LOGGER.warning("Could not remove %s: %s", path, exc)
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
def _sanitize_db_path(path: str) -> None:
|
|
56
|
+
"""Ensure *path* is either absent or a valid DuckDB database file.
|
|
57
|
+
|
|
58
|
+
KùzuDB leaves directory-trees at the same paths DuckDB expects as files,
|
|
59
|
+
and half-written snapshots can leave zero-byte or corrupt files. We
|
|
60
|
+
cheaply probe each path with a read-only DuckDB connect; if that raises
|
|
61
|
+
any ``IOException``, whatever is there isn't a valid DuckDB database and
|
|
62
|
+
we delete it so the subsequent real open starts from a clean slate.
|
|
63
|
+
"""
|
|
64
|
+
if not os.path.exists(path) and not os.path.islink(path):
|
|
65
|
+
return # nothing there — nothing to do
|
|
66
|
+
|
|
67
|
+
# Any directory is by definition not a DuckDB database file.
|
|
68
|
+
if os.path.isdir(path) and not os.path.islink(path):
|
|
69
|
+
LOGGER.info(
|
|
70
|
+
"Removing non-DuckDB directory at %s (likely legacy KùzuDB layout) — "
|
|
71
|
+
"re-index with 'codespine analyse' to rebuild.",
|
|
72
|
+
path,
|
|
73
|
+
)
|
|
74
|
+
_remove_path(path)
|
|
75
|
+
return
|
|
76
|
+
|
|
77
|
+
# Regular file — try a throw-away read-only open to verify it's a DB.
|
|
78
|
+
# IOException → file exists but is not a valid DuckDB database → remove.
|
|
79
|
+
# Connection/Catalog/Other exceptions → file is valid DuckDB (possibly
|
|
80
|
+
# already open by another connection in this process); leave it alone.
|
|
81
|
+
try:
|
|
82
|
+
probe = duckdb.connect(path, read_only=True)
|
|
83
|
+
probe.close()
|
|
84
|
+
except duckdb.IOException as exc:
|
|
85
|
+
LOGGER.info(
|
|
86
|
+
"Removing invalid DB file at %s (%s) — re-index with "
|
|
87
|
+
"'codespine analyse' to rebuild.",
|
|
88
|
+
path,
|
|
89
|
+
exc,
|
|
90
|
+
)
|
|
91
|
+
_remove_path(path)
|
|
92
|
+
except Exception:
|
|
93
|
+
# File is a valid DuckDB but we can't open it right now (in-use, perms,
|
|
94
|
+
# etc.) — not our problem to fix here; let the real open surface it.
|
|
95
|
+
pass
|
|
96
|
+
|
|
97
|
+
|
|
55
98
|
# ---------------------------------------------------------------------------
|
|
56
99
|
# Schema DDL
|
|
57
100
|
# ---------------------------------------------------------------------------
|
|
@@ -212,46 +255,31 @@ class DuckDBStore:
|
|
|
212
255
|
from codespine.overlay.store import OverlayStore
|
|
213
256
|
self.overlay_store = OverlayStore()
|
|
214
257
|
|
|
215
|
-
# Prefer snapshot for read-only access; fall back to write path.
|
|
216
|
-
snap_exists = os.path.exists(self._snapshot_path)
|
|
217
|
-
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
218
|
-
|
|
219
258
|
# ----------------------------------------------------------------
|
|
220
|
-
#
|
|
221
|
-
#
|
|
222
|
-
#
|
|
223
|
-
#
|
|
224
|
-
#
|
|
259
|
+
# Pre-flight sanitize: KùzuDB may have left directories or partial
|
|
260
|
+
# files at the paths DuckDB is about to use. Probe each path with a
|
|
261
|
+
# throw-away read-only connect — if it fails, whatever is there is
|
|
262
|
+
# not a valid DuckDB database, so remove it. This runs BEFORE the
|
|
263
|
+
# real open so we never hit a mid-fallback failure mode.
|
|
225
264
|
# ----------------------------------------------------------------
|
|
226
|
-
os.makedirs(os.path.dirname(
|
|
227
|
-
for
|
|
228
|
-
|
|
229
|
-
# to read — use an in-memory DB so callers get [] instead of crash.
|
|
230
|
-
if read_only and not os.path.exists(db_file):
|
|
231
|
-
self._conn = duckdb.connect(":memory:")
|
|
232
|
-
self._ensure_schema()
|
|
233
|
-
return
|
|
265
|
+
os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
|
|
266
|
+
for p in (self._db_path, self._snapshot_path):
|
|
267
|
+
_sanitize_db_path(p)
|
|
234
268
|
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
)
|
|
239
|
-
break
|
|
240
|
-
except duckdb.IOException as exc:
|
|
241
|
-
if attempt > 0:
|
|
242
|
-
raise # second attempt also failed — give up
|
|
243
|
-
LOGGER.info(
|
|
244
|
-
"Cannot open DB at %s (%s) — removing stale artifact "
|
|
245
|
-
"and starting fresh. Re-index with 'codespine analyse'.",
|
|
246
|
-
db_file,
|
|
247
|
-
exc,
|
|
248
|
-
)
|
|
249
|
-
_remove_path(db_file)
|
|
250
|
-
# If the bad path was the snapshot, fall back to the write DB.
|
|
251
|
-
if db_file == self._snapshot_path:
|
|
252
|
-
db_file = self._db_path
|
|
253
|
-
os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
|
|
269
|
+
# After sanitize, pick the file we actually open.
|
|
270
|
+
snap_exists = os.path.exists(self._snapshot_path)
|
|
271
|
+
db_file = self._snapshot_path if read_only and snap_exists else self._db_path
|
|
254
272
|
|
|
273
|
+
# Read-only open with nothing on disk → in-memory empty DB so queries
|
|
274
|
+
# return [] cleanly instead of "database does not exist".
|
|
275
|
+
if read_only and not os.path.exists(db_file):
|
|
276
|
+
self._conn = duckdb.connect(":memory:")
|
|
277
|
+
self._ensure_schema()
|
|
278
|
+
return
|
|
279
|
+
|
|
280
|
+
self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
|
|
281
|
+
db_file, read_only=read_only
|
|
282
|
+
)
|
|
255
283
|
if not read_only:
|
|
256
284
|
self._ensure_schema()
|
|
257
285
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|