codespine 1.0.2__tar.gz → 1.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. {codespine-1.0.2 → codespine-1.0.4}/PKG-INFO +1 -1
  2. {codespine-1.0.2 → codespine-1.0.4}/codespine/__init__.py +1 -1
  3. {codespine-1.0.2 → codespine-1.0.4}/codespine/cli.py +35 -25
  4. {codespine-1.0.2 → codespine-1.0.4}/codespine/db/duckdb_store.py +64 -36
  5. {codespine-1.0.2 → codespine-1.0.4}/codespine/sharding/store.py +21 -0
  6. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/PKG-INFO +1 -1
  7. {codespine-1.0.2 → codespine-1.0.4}/pyproject.toml +1 -1
  8. {codespine-1.0.2 → codespine-1.0.4}/tests/test_duckdb_store.py +39 -0
  9. {codespine-1.0.2 → codespine-1.0.4}/LICENSE +0 -0
  10. {codespine-1.0.2 → codespine-1.0.4}/README.md +0 -0
  11. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/__init__.py +0 -0
  12. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/community.py +0 -0
  13. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/context.py +0 -0
  14. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/coupling.py +0 -0
  15. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/crossmodule.py +0 -0
  16. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/deadcode.py +0 -0
  17. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/flow.py +0 -0
  18. {codespine-1.0.2 → codespine-1.0.4}/codespine/analysis/impact.py +0 -0
  19. {codespine-1.0.2 → codespine-1.0.4}/codespine/cache/__init__.py +0 -0
  20. {codespine-1.0.2 → codespine-1.0.4}/codespine/cache/result_cache.py +0 -0
  21. {codespine-1.0.2 → codespine-1.0.4}/codespine/config.py +0 -0
  22. {codespine-1.0.2 → codespine-1.0.4}/codespine/db/__init__.py +0 -0
  23. {codespine-1.0.2 → codespine-1.0.4}/codespine/db/_cypher_compat.py +0 -0
  24. {codespine-1.0.2 → codespine-1.0.4}/codespine/db/schema.py +0 -0
  25. {codespine-1.0.2 → codespine-1.0.4}/codespine/db/store.py +0 -0
  26. {codespine-1.0.2 → codespine-1.0.4}/codespine/diff/__init__.py +0 -0
  27. {codespine-1.0.2 → codespine-1.0.4}/codespine/diff/branch_diff.py +0 -0
  28. {codespine-1.0.2 → codespine-1.0.4}/codespine/guide.py +0 -0
  29. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/__init__.py +0 -0
  30. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/call_resolver.py +0 -0
  31. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/di_resolver.py +0 -0
  32. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/engine.py +0 -0
  33. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/java_parser.py +0 -0
  34. {codespine-1.0.2 → codespine-1.0.4}/codespine/indexer/symbol_builder.py +0 -0
  35. {codespine-1.0.2 → codespine-1.0.4}/codespine/mcp/__init__.py +0 -0
  36. {codespine-1.0.2 → codespine-1.0.4}/codespine/mcp/server.py +0 -0
  37. {codespine-1.0.2 → codespine-1.0.4}/codespine/noise/__init__.py +0 -0
  38. {codespine-1.0.2 → codespine-1.0.4}/codespine/noise/blocklist.py +0 -0
  39. {codespine-1.0.2 → codespine-1.0.4}/codespine/overlay/__init__.py +0 -0
  40. {codespine-1.0.2 → codespine-1.0.4}/codespine/overlay/git_state.py +0 -0
  41. {codespine-1.0.2 → codespine-1.0.4}/codespine/overlay/merge.py +0 -0
  42. {codespine-1.0.2 → codespine-1.0.4}/codespine/overlay/store.py +0 -0
  43. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/__init__.py +0 -0
  44. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/bm25.py +0 -0
  45. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/fuzzy.py +0 -0
  46. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/hybrid.py +0 -0
  47. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/rrf.py +0 -0
  48. {codespine-1.0.2 → codespine-1.0.4}/codespine/search/vector.py +0 -0
  49. {codespine-1.0.2 → codespine-1.0.4}/codespine/sharding/__init__.py +0 -0
  50. {codespine-1.0.2 → codespine-1.0.4}/codespine/sharding/router.py +0 -0
  51. {codespine-1.0.2 → codespine-1.0.4}/codespine/watch/__init__.py +0 -0
  52. {codespine-1.0.2 → codespine-1.0.4}/codespine/watch/git_hook.py +0 -0
  53. {codespine-1.0.2 → codespine-1.0.4}/codespine/watch/watcher.py +0 -0
  54. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/SOURCES.txt +0 -0
  55. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/dependency_links.txt +0 -0
  56. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/entry_points.txt +0 -0
  57. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/requires.txt +0 -0
  58. {codespine-1.0.2 → codespine-1.0.4}/codespine.egg-info/top_level.txt +0 -0
  59. {codespine-1.0.2 → codespine-1.0.4}/gindex.py +0 -0
  60. {codespine-1.0.2 → codespine-1.0.4}/setup.cfg +0 -0
  61. {codespine-1.0.2 → codespine-1.0.4}/tests/test_branch_diff_normalize.py +0 -0
  62. {codespine-1.0.2 → codespine-1.0.4}/tests/test_call_resolver.py +0 -0
  63. {codespine-1.0.2 → codespine-1.0.4}/tests/test_community_detection.py +0 -0
  64. {codespine-1.0.2 → codespine-1.0.4}/tests/test_cypher_compat.py +0 -0
  65. {codespine-1.0.2 → codespine-1.0.4}/tests/test_deadcode.py +0 -0
  66. {codespine-1.0.2 → codespine-1.0.4}/tests/test_index_and_hybrid.py +0 -0
  67. {codespine-1.0.2 → codespine-1.0.4}/tests/test_java_parser.py +0 -0
  68. {codespine-1.0.2 → codespine-1.0.4}/tests/test_multimodule_index.py +0 -0
  69. {codespine-1.0.2 → codespine-1.0.4}/tests/test_overlay.py +0 -0
  70. {codespine-1.0.2 → codespine-1.0.4}/tests/test_result_cache.py +0 -0
  71. {codespine-1.0.2 → codespine-1.0.4}/tests/test_search_ranking.py +0 -0
  72. {codespine-1.0.2 → codespine-1.0.4}/tests/test_sharding.py +0 -0
  73. {codespine-1.0.2 → codespine-1.0.4}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.2"
4
+ __version__ = "1.0.4"
@@ -22,7 +22,6 @@ from codespine.analysis.deadcode import detect_dead_code
22
22
  from codespine.analysis.flow import trace_execution_flows
23
23
  from codespine.analysis.impact import analyze_impact
24
24
  from codespine.config import SETTINGS
25
- from codespine.db.store import GraphStore
26
25
  from codespine.sharding import ShardedGraphStore, ShardRouter
27
26
  from codespine.diff.branch_diff import compare_branches
28
27
  from codespine.indexer.engine import JavaIndexer
@@ -56,6 +55,17 @@ def _current_repo_path() -> str:
56
55
  return os.getcwd()
57
56
 
58
57
 
58
+ def _open_store(read_only: bool = True) -> ShardedGraphStore:
59
+ """Open the sharded store with the backend configured in SETTINGS.
60
+
61
+ Every CLI command must go through this helper so the correct backend
62
+ (DuckDB or KùzuDB) is selected transparently. Direct ``GraphStore(...)``
63
+ calls were tied to the legacy single-DB KùzuDB layout and will fail on
64
+ any machine running the default DuckDB backend with sharded storage.
65
+ """
66
+ return ShardedGraphStore(read_only=read_only)
67
+
68
+
59
69
  def _db_size_bytes(path: str) -> int:
60
70
  if os.path.isfile(path):
61
71
  return os.path.getsize(path)
@@ -584,7 +594,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
584
594
  @click.option("--json", "as_json", is_flag=True)
585
595
  def search(query: str, k: int, as_json: bool) -> None:
586
596
  """Hybrid search (BM25 + vector + fuzzy + RRF)."""
587
- store = GraphStore(read_only=True)
597
+ store = _open_store(read_only=True)
588
598
  results = hybrid_search(store, query, k=k)
589
599
  _echo_json(results, as_json)
590
600
 
@@ -595,7 +605,7 @@ def search(query: str, k: int, as_json: bool) -> None:
595
605
  @click.option("--json", "as_json", is_flag=True)
596
606
  def context(query: str, max_depth: int, as_json: bool) -> None:
597
607
  """Get one-shot symbol context: search + impact + community + flows."""
598
- store = GraphStore(read_only=True)
608
+ store = _open_store(read_only=True)
599
609
  result = build_symbol_context(store, query, max_depth=max_depth)
600
610
  _echo_json(result, as_json)
601
611
 
@@ -606,7 +616,7 @@ def context(query: str, max_depth: int, as_json: bool) -> None:
606
616
  @click.option("--json", "as_json", is_flag=True)
607
617
  def impact(symbol: str, max_depth: int, as_json: bool) -> None:
608
618
  """Impact analysis grouped by depth with confidence scores."""
609
- store = GraphStore(read_only=True)
619
+ store = _open_store(read_only=True)
610
620
  result = analyze_impact(store, symbol, max_depth=max_depth)
611
621
  _echo_json(result, as_json)
612
622
 
@@ -616,7 +626,7 @@ def impact(symbol: str, max_depth: int, as_json: bool) -> None:
616
626
  @click.option("--json", "as_json", is_flag=True)
617
627
  def deadcode(limit: int, as_json: bool) -> None:
618
628
  """Detect dead code candidates with Java-aware exemptions."""
619
- store = GraphStore(read_only=True)
629
+ store = _open_store(read_only=True)
620
630
  result = detect_dead_code(store, limit=limit)
621
631
  _echo_json(result, as_json)
622
632
 
@@ -627,7 +637,7 @@ def deadcode(limit: int, as_json: bool) -> None:
627
637
  @click.option("--json", "as_json", is_flag=True)
628
638
  def flow(entry_symbol: str | None, max_depth: int, as_json: bool) -> None:
629
639
  """Trace execution flows from detected entry points."""
630
- store = GraphStore(read_only=True)
640
+ store = _open_store(read_only=True)
631
641
  result = trace_execution_flows(store, entry_symbol=entry_symbol, max_depth=max_depth)
632
642
  _echo_json(result, as_json)
633
643
 
@@ -637,7 +647,7 @@ def flow(entry_symbol: str | None, max_depth: int, as_json: bool) -> None:
637
647
  @click.option("--json", "as_json", is_flag=True)
638
648
  def community(symbol: str | None, as_json: bool) -> None:
639
649
  """Detect communities or lookup community for a symbol."""
640
- store = GraphStore(read_only=False)
650
+ store = _open_store(read_only=False)
641
651
  detect_communities(store)
642
652
  if symbol:
643
653
  _echo_json(symbol_community(store, symbol), as_json)
@@ -655,7 +665,7 @@ def community(symbol: str | None, as_json: bool) -> None:
655
665
  @click.option("--json", "as_json", is_flag=True)
656
666
  def coupling(days: int, min_strength: float, min_cochanges: int, as_json: bool) -> None:
657
667
  """Compute and query git change coupling."""
658
- store = GraphStore(read_only=False)
668
+ store = _open_store(read_only=False)
659
669
  project = store.query_records("MATCH (p:Project) RETURN p.id as id LIMIT 1")
660
670
  project_id = project[0]["id"] if project else os.path.basename(os.getcwd())
661
671
  compute_coupling(store, os.getcwd(), project_id, days=days, min_strength=min_strength, min_cochanges=min_cochanges)
@@ -681,7 +691,7 @@ def coupling(days: int, min_strength: float, min_cochanges: int, as_json: bool)
681
691
  @click.option("--promote-on-commit/--no-promote-on-commit", default=True, show_default=True)
682
692
  def watch(path: str, global_interval: int, overlay_debounce_ms: int, promote_on_commit: bool) -> None:
683
693
  """Live re-indexing and periodic global analysis refresh."""
684
- store = GraphStore(read_only=False)
694
+ store = _open_store(read_only=False)
685
695
  run_watch_mode(
686
696
  store,
687
697
  os.path.abspath(path),
@@ -720,12 +730,12 @@ def stats(as_json: bool, show_shards: bool) -> None:
720
730
  def _project_store(pid: str):
721
731
  return sg.shard(pid)
722
732
 
723
- if not projects:
733
+ if not all_projects_meta:
724
734
  click.secho("No projects indexed yet. Run 'codespine analyse <path>'.", fg="yellow")
725
735
  return
726
736
 
727
737
  rows = []
728
- for p in projects:
738
+ for p in all_projects_meta:
729
739
  pid = p["id"]
730
740
  # Route each query to the project's owning shard.
731
741
  ps = _project_store(pid)
@@ -813,7 +823,7 @@ def stats(as_json: bool, show_shards: bool) -> None:
813
823
  @click.option("--json", "as_json", is_flag=True)
814
824
  def list_projects(as_json: bool) -> None:
815
825
  """List indexed projects."""
816
- store = GraphStore(read_only=True)
826
+ store = _open_store(read_only=True)
817
827
  projects = store.query_records("MATCH (p:Project) RETURN p.id as id, p.path as path, p.language as language ORDER BY p.id")
818
828
  _echo_json(projects, as_json)
819
829
 
@@ -837,7 +847,7 @@ def status(as_json: bool) -> None:
837
847
  pid = int(f.read().strip())
838
848
  except Exception:
839
849
  pid = None
840
- store = GraphStore(read_only=True)
850
+ store = _open_store(read_only=True)
841
851
  overlay = get_overlay_status(store)
842
852
 
843
853
  # Check for stale PID file
@@ -875,7 +885,7 @@ def status(as_json: bool) -> None:
875
885
  @click.option("--json", "as_json", is_flag=True)
876
886
  def overlay_status_cmd(project: str | None, as_json: bool) -> None:
877
887
  """Show dirty overlay status by project/module."""
878
- store = GraphStore(read_only=True)
888
+ store = _open_store(read_only=True)
879
889
  _echo_json(get_overlay_status(store, project=project), as_json)
880
890
 
881
891
 
@@ -884,7 +894,7 @@ def overlay_status_cmd(project: str | None, as_json: bool) -> None:
884
894
  @click.option("--json", "as_json", is_flag=True)
885
895
  def overlay_clear_cmd(project: str | None, as_json: bool) -> None:
886
896
  """Clear dirty overlay data without touching the committed base index."""
887
- store = GraphStore(read_only=False)
897
+ store = _open_store(read_only=False)
888
898
  result = {"cleared": clear_overlay(store, project=project)}
889
899
  _echo_json(result, as_json)
890
900
 
@@ -894,7 +904,7 @@ def overlay_clear_cmd(project: str | None, as_json: bool) -> None:
894
904
  @click.option("--json", "as_json", is_flag=True)
895
905
  def overlay_promote_cmd(project: str | None, as_json: bool) -> None:
896
906
  """Promote dirty overlay changes into the committed base index now."""
897
- store = GraphStore(read_only=False)
907
+ store = _open_store(read_only=False)
898
908
  result = {"promoted": promote_overlay(store, project=project, require_head_change=False)}
899
909
  _echo_json(result, as_json)
900
910
 
@@ -904,7 +914,7 @@ def overlay_promote_cmd(project: str | None, as_json: bool) -> None:
904
914
  @click.option("--json", "as_json", is_flag=True)
905
915
  def cypher(query: str, as_json: bool) -> None:
906
916
  """Run a raw Cypher query against the graph DB."""
907
- store = GraphStore(read_only=True)
917
+ store = _open_store(read_only=True)
908
918
  try:
909
919
  result = store.query_records(query)
910
920
  except Exception as exc:
@@ -948,7 +958,7 @@ def clear_project_cmd(project_id: str, allow_running: bool) -> None:
948
958
  click.secho("Stop MCP first ('codespine stop') to modify index.", fg="yellow")
949
959
  return
950
960
  try:
951
- store = GraphStore(read_only=False)
961
+ store = _open_store(read_only=False)
952
962
  recs = store.query_records(
953
963
  "MATCH (p:Project) WHERE p.id = $pid RETURN p.id as id, p.path as path",
954
964
  {"pid": project_id},
@@ -974,7 +984,7 @@ def clear_project_cmd(project_id: str, allow_running: bool) -> None:
974
984
  except OSError:
975
985
  pass
976
986
  # Update the read replica so read-only callers (stats, MCP) see the change.
977
- GraphStore.snapshot_to_read_replica()
987
+ store.snapshot_to_read_replica()
978
988
  click.secho(f"Cleared project '{project_id}' (was at {project_path}).", fg="green")
979
989
 
980
990
 
@@ -991,12 +1001,12 @@ def clear_index_cmd(allow_running: bool) -> None:
991
1001
  click.secho("Stop MCP first ('codespine stop') to modify index.", fg="yellow")
992
1002
  return
993
1003
  try:
994
- store = GraphStore(read_only=False)
1004
+ store = _open_store(read_only=False)
995
1005
  projects = store.query_records("MATCH (p:Project) RETURN p.id as id")
996
1006
  except Exception:
997
1007
  # DB is corrupted — can't even open it. Force-delete everything.
998
1008
  click.secho("DB is corrupted. Running force-reset instead...", fg="yellow")
999
- removed = GraphStore.force_delete_all_data()
1009
+ removed = ShardedGraphStore(read_only=False).force_delete_all_data()
1000
1010
  click.secho(f"Force-reset complete. {len(removed)} path(s) removed. Index is now empty.", fg="green")
1001
1011
  return
1002
1012
  try:
@@ -1004,7 +1014,7 @@ def clear_index_cmd(allow_running: bool) -> None:
1004
1014
  except Exception as exc:
1005
1015
  # rebuild_empty_db failed even with fallbacks — force-delete.
1006
1016
  click.secho(f"rebuild failed ({exc}). Running force-reset...", fg="yellow")
1007
- GraphStore.force_delete_all_data()
1017
+ store.force_delete_all_data()
1008
1018
  click.secho("Force-reset complete. Index is now empty.", fg="green")
1009
1019
  return
1010
1020
  store.overlay_store.clear_all()
@@ -1017,7 +1027,7 @@ def clear_index_cmd(allow_running: bool) -> None:
1017
1027
  pass
1018
1028
  # Publish an empty read replica so that read-only callers (stats, MCP)
1019
1029
  # immediately see the cleared state and the MCP daemon hot-reloads.
1020
- GraphStore.snapshot_to_read_replica()
1030
+ store.snapshot_to_read_replica()
1021
1031
  click.secho(f"Cleared {len(projects)} project(s). Index is now empty.", fg="green")
1022
1032
 
1023
1033
 
@@ -1038,7 +1048,7 @@ def force_reset_cmd(force: bool) -> None:
1038
1048
  ):
1039
1049
  click.echo("Aborted.")
1040
1050
  return
1041
- removed = GraphStore.force_delete_all_data()
1051
+ removed = ShardedGraphStore(read_only=False).force_delete_all_data()
1042
1052
  if removed:
1043
1053
  for p in removed:
1044
1054
  click.echo(f" removed: {p}")
@@ -1177,7 +1187,7 @@ def install_model() -> None:
1177
1187
  @main.command("run-mcp", hidden=True)
1178
1188
  def run_mcp() -> None:
1179
1189
  """Run MCP server in stdio mode."""
1180
- store = GraphStore(read_only=True)
1190
+ store = _open_store(read_only=True)
1181
1191
  mcp = build_mcp_server(store, repo_path_provider=_current_repo_path)
1182
1192
  mcp.run()
1183
1193
 
@@ -52,6 +52,49 @@ def _remove_path(path: str) -> None:
52
52
  LOGGER.warning("Could not remove %s: %s", path, exc)
53
53
 
54
54
 
55
+ def _sanitize_db_path(path: str) -> None:
56
+ """Ensure *path* is either absent or a valid DuckDB database file.
57
+
58
+ KùzuDB leaves directory-trees at the same paths DuckDB expects as files,
59
+ and half-written snapshots can leave zero-byte or corrupt files. We
60
+ cheaply probe each path with a read-only DuckDB connect; if that raises
61
+ any ``IOException``, whatever is there isn't a valid DuckDB database and
62
+ we delete it so the subsequent real open starts from a clean slate.
63
+ """
64
+ if not os.path.exists(path) and not os.path.islink(path):
65
+ return # nothing there — nothing to do
66
+
67
+ # Any directory is by definition not a DuckDB database file.
68
+ if os.path.isdir(path) and not os.path.islink(path):
69
+ LOGGER.info(
70
+ "Removing non-DuckDB directory at %s (likely legacy KùzuDB layout) — "
71
+ "re-index with 'codespine analyse' to rebuild.",
72
+ path,
73
+ )
74
+ _remove_path(path)
75
+ return
76
+
77
+ # Regular file — try a throw-away read-only open to verify it's a DB.
78
+ # IOException → file exists but is not a valid DuckDB database → remove.
79
+ # Connection/Catalog/Other exceptions → file is valid DuckDB (possibly
80
+ # already open by another connection in this process); leave it alone.
81
+ try:
82
+ probe = duckdb.connect(path, read_only=True)
83
+ probe.close()
84
+ except duckdb.IOException as exc:
85
+ LOGGER.info(
86
+ "Removing invalid DB file at %s (%s) — re-index with "
87
+ "'codespine analyse' to rebuild.",
88
+ path,
89
+ exc,
90
+ )
91
+ _remove_path(path)
92
+ except Exception:
93
+ # File is a valid DuckDB but we can't open it right now (in-use, perms,
94
+ # etc.) — not our problem to fix here; let the real open surface it.
95
+ pass
96
+
97
+
55
98
  # ---------------------------------------------------------------------------
56
99
  # Schema DDL
57
100
  # ---------------------------------------------------------------------------
@@ -212,46 +255,31 @@ class DuckDBStore:
212
255
  from codespine.overlay.store import OverlayStore
213
256
  self.overlay_store = OverlayStore()
214
257
 
215
- # Prefer snapshot for read-only access; fall back to write path.
216
- snap_exists = os.path.exists(self._snapshot_path)
217
- db_file = self._snapshot_path if read_only and snap_exists else self._db_path
218
-
219
258
  # ----------------------------------------------------------------
220
- # Robust open: handle legacy KùzuDB artifacts at the target path.
221
- # KùzuDB may leave a directory, a partial file, or a 0-byte sentinel
222
- # at the same path DuckDB expects. Rather than guessing the type,
223
- # we attempt to connect and on any IOException we wipe whatever is
224
- # there and retry once with a clean slate.
259
+ # Pre-flight sanitize: KùzuDB may have left directories or partial
260
+ # files at the paths DuckDB is about to use. Probe each path with a
261
+ # throw-away read-only connect if it fails, whatever is there is
262
+ # not a valid DuckDB database, so remove it. This runs BEFORE the
263
+ # real open so we never hit a mid-fallback failure mode.
225
264
  # ----------------------------------------------------------------
226
- os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
227
- for attempt in range(2):
228
- # If read-only and the target file doesn't exist, we have nothing
229
- # to read — use an in-memory DB so callers get [] instead of crash.
230
- if read_only and not os.path.exists(db_file):
231
- self._conn = duckdb.connect(":memory:")
232
- self._ensure_schema()
233
- return
265
+ os.makedirs(os.path.dirname(self._db_path) or ".", exist_ok=True)
266
+ for p in (self._db_path, self._snapshot_path):
267
+ _sanitize_db_path(p)
234
268
 
235
- try:
236
- self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
237
- db_file, read_only=read_only
238
- )
239
- break
240
- except duckdb.IOException as exc:
241
- if attempt > 0:
242
- raise # second attempt also failed — give up
243
- LOGGER.info(
244
- "Cannot open DB at %s (%s) — removing stale artifact "
245
- "and starting fresh. Re-index with 'codespine analyse'.",
246
- db_file,
247
- exc,
248
- )
249
- _remove_path(db_file)
250
- # If the bad path was the snapshot, fall back to the write DB.
251
- if db_file == self._snapshot_path:
252
- db_file = self._db_path
253
- os.makedirs(os.path.dirname(db_file) or ".", exist_ok=True)
269
+ # After sanitize, pick the file we actually open.
270
+ snap_exists = os.path.exists(self._snapshot_path)
271
+ db_file = self._snapshot_path if read_only and snap_exists else self._db_path
254
272
 
273
+ # Read-only open with nothing on disk → in-memory empty DB so queries
274
+ # return [] cleanly instead of "database does not exist".
275
+ if read_only and not os.path.exists(db_file):
276
+ self._conn = duckdb.connect(":memory:")
277
+ self._ensure_schema()
278
+ return
279
+
280
+ self._conn: duckdb.DuckDBPyConnection = duckdb.connect(
281
+ db_file, read_only=read_only
282
+ )
255
283
  if not read_only:
256
284
  self._ensure_schema()
257
285
 
@@ -299,6 +299,27 @@ class ShardedGraphStore:
299
299
  removed.extend(store.force_delete_all_data())
300
300
  return removed
301
301
 
302
+ def clear_analysis_artifacts(self) -> None:
303
+ """Fan-out: clear analysis artifacts (communities, flows, dead code) on every shard."""
304
+ for store in self.all_shards():
305
+ try:
306
+ store.clear_analysis_artifacts()
307
+ except Exception as exc:
308
+ LOGGER.warning("clear_analysis_artifacts failed on shard: %s", exc)
309
+
310
+ def rebuild_empty_db(self) -> None:
311
+ """Fan-out: rebuild each shard as an empty database."""
312
+ for store in self.all_shards():
313
+ try:
314
+ store.rebuild_empty_db()
315
+ except Exception as exc:
316
+ LOGGER.warning("rebuild_empty_db failed on shard: %s", exc)
317
+
318
+ def snapshot_to_read_replica(self, background: bool = False) -> bool:
319
+ """Alias for ``snapshot_all`` — matches GraphStore's API."""
320
+ self.snapshot_all(background=background)
321
+ return True
322
+
302
323
  def describe(self) -> dict:
303
324
  """Return a human-readable description of the shard topology."""
304
325
  shard_info = []
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.2
3
+ Version: 1.0.4
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "1.0.2"
7
+ version = "1.0.4"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -446,3 +446,42 @@ def test_corrupt_file_at_db_path_is_replaced(tmp_path: Path):
446
446
  store = DuckDBStore(db_path_override=db_path, snapshot_path_override=snap_path)
447
447
  rows = store.query_records("SELECT * FROM projects")
448
448
  assert rows == []
449
+
450
+
451
+ def test_legacy_kuzu_dirs_at_both_paths_are_removed(tmp_path: Path):
452
+ """Regression: both db and db_read as KùzuDB directories (the exact
453
+ scenario from the field bug in v1.0.2)."""
454
+ db_path = str(tmp_path / "db")
455
+ snap_path = str(tmp_path / "db_read")
456
+
457
+ # Simulate KùzuDB directories at BOTH paths
458
+ for p in (db_path, snap_path):
459
+ os.makedirs(p)
460
+ (Path(p) / "catalog.kz").write_bytes(b"\x00" * 64)
461
+ (Path(p) / "data.kz").write_bytes(b"\x00" * 1024)
462
+
463
+ # Read-only open: legacy code would pick snap, fail, fall back to db, fail, raise.
464
+ # New code pre-sanitizes both paths, then returns an in-memory empty DB.
465
+ store = DuckDBStore(read_only=True, db_path_override=db_path, snapshot_path_override=snap_path)
466
+ rows = store.query_records("SELECT * FROM projects")
467
+ assert rows == []
468
+ # Both paths should be gone
469
+ assert not os.path.exists(db_path)
470
+ assert not os.path.exists(snap_path)
471
+
472
+
473
+ def test_sharded_store_stats_flow_with_stale_kuzu_dirs(tmp_path: Path):
474
+ """Regression: ShardedGraphStore.list_project_metadata() must not crash
475
+ when every shard path is a stale KùzuDB directory (the failing
476
+ 'codespine stats' scenario)."""
477
+ shards_dir = tmp_path / "shards"
478
+ # Pre-create 4 shards with legacy KùzuDB-style directories at both paths
479
+ for i in range(4):
480
+ (shards_dir / str(i)).mkdir(parents=True)
481
+ (shards_dir / str(i) / "db").mkdir()
482
+ (shards_dir / str(i) / "db" / "catalog.kz").write_bytes(b"\x00" * 32)
483
+ (shards_dir / str(i) / "db_read").mkdir()
484
+
485
+ sg = ShardedGraphStore(read_only=True, shards_dir=str(shards_dir), backend="duckdb")
486
+ # This is what `codespine stats` does — it must not raise.
487
+ assert sg.list_project_metadata() == []
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes