codespine 0.5.10__tar.gz → 0.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {codespine-0.5.10 → codespine-0.6.0}/PKG-INFO +1 -1
  2. {codespine-0.5.10 → codespine-0.6.0}/codespine/__init__.py +1 -1
  3. {codespine-0.5.10 → codespine-0.6.0}/codespine/cli.py +119 -12
  4. {codespine-0.5.10 → codespine-0.6.0}/codespine/config.py +2 -2
  5. {codespine-0.5.10 → codespine-0.6.0}/codespine/db/store.py +47 -2
  6. {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/engine.py +117 -8
  7. {codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/server.py +146 -51
  8. {codespine-0.5.10 → codespine-0.6.0}/codespine/watch/watcher.py +106 -59
  9. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/PKG-INFO +1 -1
  10. {codespine-0.5.10 → codespine-0.6.0}/pyproject.toml +1 -1
  11. {codespine-0.5.10 → codespine-0.6.0}/LICENSE +0 -0
  12. {codespine-0.5.10 → codespine-0.6.0}/README.md +0 -0
  13. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/__init__.py +0 -0
  14. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/community.py +0 -0
  15. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/context.py +0 -0
  16. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/coupling.py +0 -0
  17. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/crossmodule.py +0 -0
  18. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/deadcode.py +0 -0
  19. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/flow.py +0 -0
  20. {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/impact.py +0 -0
  21. {codespine-0.5.10 → codespine-0.6.0}/codespine/db/__init__.py +0 -0
  22. {codespine-0.5.10 → codespine-0.6.0}/codespine/db/schema.py +0 -0
  23. {codespine-0.5.10 → codespine-0.6.0}/codespine/diff/__init__.py +0 -0
  24. {codespine-0.5.10 → codespine-0.6.0}/codespine/diff/branch_diff.py +0 -0
  25. {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/__init__.py +0 -0
  26. {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/call_resolver.py +0 -0
  27. {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/java_parser.py +0 -0
  28. {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/symbol_builder.py +0 -0
  29. {codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/__init__.py +0 -0
  30. {codespine-0.5.10 → codespine-0.6.0}/codespine/noise/__init__.py +0 -0
  31. {codespine-0.5.10 → codespine-0.6.0}/codespine/noise/blocklist.py +0 -0
  32. {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/__init__.py +0 -0
  33. {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/git_state.py +0 -0
  34. {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/merge.py +0 -0
  35. {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/store.py +0 -0
  36. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/__init__.py +0 -0
  37. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/bm25.py +0 -0
  38. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/fuzzy.py +0 -0
  39. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/hybrid.py +0 -0
  40. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/rrf.py +0 -0
  41. {codespine-0.5.10 → codespine-0.6.0}/codespine/search/vector.py +0 -0
  42. {codespine-0.5.10 → codespine-0.6.0}/codespine/watch/__init__.py +0 -0
  43. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/SOURCES.txt +0 -0
  44. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/dependency_links.txt +0 -0
  45. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/entry_points.txt +0 -0
  46. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/requires.txt +0 -0
  47. {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/top_level.txt +0 -0
  48. {codespine-0.5.10 → codespine-0.6.0}/gindex.py +0 -0
  49. {codespine-0.5.10 → codespine-0.6.0}/setup.cfg +0 -0
  50. {codespine-0.5.10 → codespine-0.6.0}/tests/test_branch_diff_normalize.py +0 -0
  51. {codespine-0.5.10 → codespine-0.6.0}/tests/test_call_resolver.py +0 -0
  52. {codespine-0.5.10 → codespine-0.6.0}/tests/test_community_detection.py +0 -0
  53. {codespine-0.5.10 → codespine-0.6.0}/tests/test_deadcode.py +0 -0
  54. {codespine-0.5.10 → codespine-0.6.0}/tests/test_index_and_hybrid.py +0 -0
  55. {codespine-0.5.10 → codespine-0.6.0}/tests/test_java_parser.py +0 -0
  56. {codespine-0.5.10 → codespine-0.6.0}/tests/test_multimodule_index.py +0 -0
  57. {codespine-0.5.10 → codespine-0.6.0}/tests/test_overlay.py +0 -0
  58. {codespine-0.5.10 → codespine-0.6.0}/tests/test_search_ranking.py +0 -0
  59. {codespine-0.5.10 → codespine-0.6.0}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.10
3
+ Version: 0.6.0
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "0.5.10"
4
+ __version__ = "0.6.0"
@@ -101,16 +101,17 @@ def main() -> None:
101
101
  @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
102
102
  @click.option(
103
103
  "--embed/--no-embed",
104
- default=False,
104
+ default=True,
105
105
  show_default=True,
106
- help="Generate vector embeddings (slow if sentence-transformers installed; enables semantic search).",
106
+ help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
107
107
  )
108
108
  @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
109
109
  def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
110
110
  """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
111
111
 
112
- By default embeddings are skipped for speed. Pass --embed to generate
113
- vector embeddings for semantic search (requires sentence-transformers).
112
+ Embeddings are generated by default. If sentence-transformers is installed
113
+ (pip install codespine[ml]), high-quality semantic vectors are used; otherwise
114
+ a fast hash-based fallback provides basic vector search.
114
115
  """
115
116
  if not allow_running and _is_running():
116
117
  click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -118,6 +119,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
118
119
 
119
120
  started = time.perf_counter()
120
121
  abs_path = os.path.abspath(path)
122
+
123
+ # Warn about hash fallback early so users know to install [ml]
124
+ if embed:
125
+ from codespine.search.vector import _load_model
126
+ if _load_model() is None:
127
+ click.secho(
128
+ "⚠ sentence-transformers not found — using hash-based embeddings.\n"
129
+ " For better semantic search: pip install codespine[ml]\n",
130
+ fg="yellow",
131
+ )
132
+
121
133
  store = GraphStore(read_only=False)
122
134
  indexer = JavaIndexer(store)
123
135
 
@@ -309,9 +321,27 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
309
321
  )
310
322
  _finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
311
323
  else:
324
+ # Run lightweight versions of flow tracing and dead code from the call
325
+ # graph already built — no community detection or coupling (those are
326
+ # genuinely expensive). This gives partial results without --deep.
312
327
  _phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
313
- _phase("Detecting execution flows...", "skipped (large repo; rerun with --deep)")
314
- _phase("Finding dead code...", "skipped (large repo; rerun with --deep)")
328
+
329
+ flow_label = "Detecting execution flows..."
330
+ _live_phase(flow_label, "running (lightweight)")
331
+ try:
332
+ flows = trace_execution_flows(store, max_depth=3)
333
+ except Exception:
334
+ flows = []
335
+ _finish_phase(flow_label, f"{len(flows)} flows (lightweight; rerun with --deep for full)")
336
+
337
+ dead_label = "Finding dead code..."
338
+ _live_phase(dead_label, "running (lightweight)")
339
+ try:
340
+ dead = detect_dead_code(store, limit=100)
341
+ except Exception:
342
+ dead = []
343
+ _finish_phase(dead_label, f"{_dead_result_count(dead)} candidates (lightweight; rerun with --deep for full)")
344
+
315
345
  _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
316
346
 
317
347
  vector_count = store.query_records(
@@ -331,7 +361,12 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
331
361
  edges = int(edge_count[0]["count"]) if edge_count else 0
332
362
  elapsed = time.perf_counter() - started
333
363
 
334
- embed_note = "" if embed else " (no embeddings; rerun with --embed for semantic search)"
364
+ if not embed:
365
+ embed_note = " (no embeddings; rerun with --embed for semantic search)"
366
+ elif _load_model() is None:
367
+ embed_note = " (hash embeddings; pip install codespine[ml] for better search)"
368
+ else:
369
+ embed_note = ""
335
370
  module_info = f"{len(modules_with_ids)} modules/projects, " if is_multi else ""
336
371
  click.echo()
337
372
  click.secho(
@@ -339,6 +374,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
339
374
  fg="green",
340
375
  )
341
376
 
377
+ # Detect unresolved imports → hint about unindexed sibling projects
378
+ try:
379
+ unresolved = JavaIndexer.detect_unresolved_imports(store)
380
+ if unresolved:
381
+ click.echo()
382
+ click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
383
+ for pkg, samples in sorted(unresolved.items())[:8]:
384
+ click.echo(f" {pkg} (e.g. {samples[0]})")
385
+ except Exception:
386
+ pass # best-effort
387
+
342
388
  # Publish a read replica so MCP and read-only CLI commands (search, stats…)
343
389
  # run against an isolated snapshot rather than competing with the write
344
390
  # process's buffer pool. The MCP daemon detects the sentinel file and
@@ -579,7 +625,14 @@ def list_projects(as_json: bool) -> None:
579
625
  @main.command()
580
626
  @click.option("--json", "as_json", is_flag=True)
581
627
  def status(as_json: bool) -> None:
582
- """Show service and database status."""
628
+ """Show service and database status.
629
+
630
+ Quick reference for MCP server management:
631
+ codespine start – launch background MCP server
632
+ codespine stop – stop background MCP server
633
+ codespine status – this command
634
+ codespine mcp – run MCP in foreground (stdio, for IDE integration)
635
+ """
583
636
  running = _is_running()
584
637
  pid = None
585
638
  if os.path.exists(SETTINGS.pid_file):
@@ -590,17 +643,35 @@ def status(as_json: bool) -> None:
590
643
  pid = None
591
644
  store = GraphStore(read_only=True)
592
645
  overlay = get_overlay_status(store)
646
+
647
+ # Check for stale PID file
648
+ stale_pid = pid is not None and not running
649
+ has_snapshot = os.path.exists(SETTINGS.db_snapshot_path)
650
+
593
651
  payload = {
594
652
  "running": running,
595
653
  "pid": pid,
654
+ "stale_pid": stale_pid,
596
655
  "pid_file": SETTINGS.pid_file,
597
656
  "db_path": SETTINGS.db_path,
598
657
  "db_size_bytes": _db_size_bytes(SETTINGS.db_path),
658
+ "read_replica": SETTINGS.db_snapshot_path if has_snapshot else None,
659
+ "read_replica_size_bytes": _db_size_bytes(SETTINGS.db_snapshot_path) if has_snapshot else 0,
599
660
  "log_file": SETTINGS.log_file,
600
661
  "overlay_dir": SETTINGS.overlay_dir,
601
662
  "overlay_projects": overlay,
602
663
  }
603
- _echo_json(payload, as_json)
664
+ if as_json:
665
+ _echo_json(payload, True)
666
+ else:
667
+ _echo_json(payload, True)
668
+ if stale_pid:
669
+ click.secho(f"\n⚠ Stale PID file found (PID {pid} not running). Run 'codespine stop' to clean up.", fg="yellow")
670
+ if not running:
671
+ click.echo("\nTo start: codespine start")
672
+ click.echo("For IDE: codespine mcp (stdio mode)")
673
+ else:
674
+ click.echo(f"\nMCP server running (PID {pid}). Stop with: codespine stop")
604
675
 
605
676
 
606
677
  @main.command("overlay-status")
@@ -727,6 +798,33 @@ def clear_index_cmd(allow_running: bool) -> None:
727
798
  click.secho(f"Cleared {len(projects)} project(s). Index is now empty.", fg="green")
728
799
 
729
800
 
801
+ @main.command("force-reset")
802
+ @click.option("--force", is_flag=True, help="Skip confirmation prompt.")
803
+ def force_reset_cmd(force: bool) -> None:
804
+ """Emergency reset: delete ALL CodeSpine data files without touching the DB engine.
805
+
806
+ Use this when the buffer pool is exhausted and normal reset/clear commands
807
+ also fail with OOM. This bypasses Kuzu entirely by removing data files
808
+ from disk, including the DB, read replica, overlay, meta cache, and
809
+ embedding cache.
810
+
811
+ After running this, restart the MCP server and re-index your projects.
812
+ """
813
+ if not force and not click.confirm(
814
+ "This will DELETE all CodeSpine data (DB, overlay, caches). Continue?"
815
+ ):
816
+ click.echo("Aborted.")
817
+ return
818
+ removed = GraphStore.force_delete_all_data()
819
+ if removed:
820
+ for p in removed:
821
+ click.echo(f" removed: {p}")
822
+ click.secho(f"\nForce-reset complete. {len(removed)} path(s) removed.", fg="green")
823
+ click.echo("Next: restart MCP ('codespine stop && codespine start') and re-index.")
824
+ else:
825
+ click.secho("Nothing to remove — already clean.", fg="yellow")
826
+
827
+
730
828
  @main.command()
731
829
  def setup() -> None:
732
830
  """Print local setup checks and next steps."""
@@ -743,12 +841,21 @@ def setup() -> None:
743
841
  checks[mod] = True
744
842
  except Exception:
745
843
  checks[mod] = False
746
- click.echo("Dependency check:")
844
+ click.echo("Core dependencies:")
747
845
  for mod, ok in checks.items():
748
846
  click.echo(f" - {mod}: {'OK' if ok else 'MISSING'}")
749
- click.echo("\\nRecommended:")
750
- click.echo(" pip install -e .")
847
+ # Check optional ML dependencies
848
+ try:
849
+ from sentence_transformers import SentenceTransformer
850
+ click.echo(" - sentence-transformers: OK (semantic embeddings active)")
851
+ except ImportError:
852
+ click.secho(" - sentence-transformers: NOT INSTALLED (hash fallback; install for better search)", fg="yellow")
853
+ click.echo("\nRecommended setup:")
854
+ click.echo(" pip install -e '.[full]' # core + ML + community detection")
855
+ click.echo(" pip install -e '.[ml]' # just ML embeddings")
856
+ click.echo("\nQuick start:")
751
857
  click.echo(" codespine analyse /path/to/java-project --full")
858
+ click.echo(" codespine start # launch MCP server")
752
859
  click.echo(" codespine search payment --json")
753
860
 
754
861
 
@@ -16,8 +16,8 @@ class Settings:
16
16
  rrf_k: int = 60
17
17
  semantic_candidate_pool: int = 2000
18
18
  write_batch_size: int = 500
19
- index_file_batch_size: int = 64
20
- edge_write_batch_size: int = 2000
19
+ index_file_batch_size: int = 20
20
+ edge_write_batch_size: int = 500
21
21
  default_coupling_months: int = 6
22
22
  default_min_coupling_strength: float = 0.3
23
23
  default_min_cochanges: int = 3
@@ -147,10 +147,13 @@ class GraphStore:
147
147
 
148
148
  def clear_project(self, project_id: str) -> None:
149
149
  file_recs = self.query_records("MATCH (f:File) WHERE f.project_id = $pid RETURN f.id as id", {"pid": project_id})
150
+ # Small batches (10 files per tx) prevent buffer pool OOM on large projects.
150
151
  for idx, rec in enumerate(file_recs, start=1):
151
- self.clear_file(rec["id"])
152
- if idx % 50 == 0:
152
+ with self.transaction():
153
+ self.clear_file(rec["id"])
154
+ if idx % 10 == 0:
153
155
  self._recycle_conn()
156
+ self._recycle_conn()
154
157
  self.execute("MATCH (p:Project) WHERE p.id = $pid DETACH DELETE p", {"pid": project_id})
155
158
  self._recycle_conn()
156
159
 
@@ -502,6 +505,48 @@ class GraphStore:
502
505
  self.clear_flows()
503
506
  self.clear_coupling()
504
507
 
508
+ @staticmethod
509
+ def force_delete_all_data() -> list[str]:
510
+ """Delete all CodeSpine data files without touching the Kuzu engine.
511
+
512
+ This is the nuclear option for OOM recovery: when the buffer pool is
513
+ exhausted, normal DB writes (including reset_project / clear_project)
514
+ also fail. This bypasses Kuzu entirely by removing the data files
515
+ from disk, allowing a fresh start.
516
+
517
+ Returns the list of paths that were removed.
518
+ """
519
+ removed: list[str] = []
520
+ for path in [
521
+ SETTINGS.db_path,
522
+ SETTINGS.db_snapshot_path,
523
+ SETTINGS.db_snapshot_path + ".updated",
524
+ SETTINGS.db_snapshot_path + ".tmp",
525
+ SETTINGS.embedding_cache_path,
526
+ SETTINGS.overlay_dir,
527
+ SETTINGS.index_meta_dir,
528
+ ]:
529
+ if not os.path.exists(path):
530
+ continue
531
+ try:
532
+ if os.path.isdir(path):
533
+ shutil.rmtree(path, ignore_errors=True)
534
+ else:
535
+ os.remove(path)
536
+ removed.append(path)
537
+ except OSError:
538
+ pass
539
+ # Also remove any stale WAL files next to the DB
540
+ for suffix in (".wal", ".lock"):
541
+ wal_path = SETTINGS.db_path + suffix
542
+ if os.path.exists(wal_path):
543
+ try:
544
+ os.remove(wal_path)
545
+ removed.append(wal_path)
546
+ except OSError:
547
+ pass
548
+ return removed
549
+
505
550
  def rebuild_empty_db(self) -> None:
506
551
  self._recycle_conn()
507
552
  path = SETTINGS.db_path
@@ -253,6 +253,20 @@ class JavaIndexer:
253
253
  for fid in delete_chunk:
254
254
  self.store.clear_file(fid)
255
255
  self.store._recycle_conn()
256
+
257
+ # Clean up stale project entries that point to the same path under a
258
+ # different ID (e.g. re-indexing "vision-server" directly after it was
259
+ # previously indexed as "vision::vision-server" from a workspace root).
260
+ try:
261
+ stale = self.store.query_records(
262
+ "MATCH (p:Project) WHERE p.path = $path AND p.id <> $pid RETURN p.id as id",
263
+ {"path": root_path, "pid": project_id},
264
+ )
265
+ for old in stale:
266
+ self.store.clear_project(old["id"])
267
+ except Exception:
268
+ pass # best-effort cleanup
269
+
256
270
  self.store.upsert_project(project_id, root_path)
257
271
 
258
272
  for parse_chunk in self._chunked(parse_results, file_batch_size):
@@ -279,7 +293,7 @@ class JavaIndexer:
279
293
  "hash": file_digest,
280
294
  }
281
295
  )
282
- self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
296
+ self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source), imports=parsed.imports)
283
297
 
284
298
  for cls in parsed.classes:
285
299
  c_id = class_id(cls.fqcn, scope)
@@ -372,15 +386,31 @@ class JavaIndexer:
372
386
  class_methods[c_id][method.signature] = m_id
373
387
  files_indexed += 1
374
388
 
389
+ # Split writes into smaller transactions and recycle between each
390
+ # to prevent Kuzu WAL from exhausting the buffer pool on large
391
+ # incremental re-indexes (GH feedback: 1,604-file OOM).
392
+ if not full:
393
+ for clear_sub in self._chunked(file_rows, 10):
394
+ with self.store.transaction():
395
+ for row in clear_sub:
396
+ self.store.clear_file(row["id"])
397
+ self.store._recycle_conn()
375
398
  with self.store.transaction():
376
- for row in file_rows:
377
- if not full:
378
- self.store.clear_file(row["id"])
379
399
  self.store.upsert_files_batch(file_rows)
400
+ self.store._recycle_conn()
401
+ with self.store.transaction():
380
402
  self.store.upsert_classes_batch(class_rows)
381
- self.store.upsert_methods_batch(method_rows)
382
- self.store.upsert_symbols_batch(symbol_rows)
383
403
  self.store._recycle_conn()
404
+ _METHOD_SUB_BATCH = 200
405
+ for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
406
+ with self.store.transaction():
407
+ self.store.upsert_methods_batch(method_sub)
408
+ self.store._recycle_conn()
409
+ _SYMBOL_SUB_BATCH = 200
410
+ for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
411
+ with self.store.transaction():
412
+ self.store.upsert_symbols_batch(symbol_sub)
413
+ self.store._recycle_conn()
384
414
 
385
415
  self._emit(progress, "resolve_calls_start")
386
416
  call_rows: list[dict] = []
@@ -697,7 +727,10 @@ class JavaIndexer:
697
727
  return
698
728
 
699
729
  @staticmethod
700
- def _update_meta_cache_entry(meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int) -> None:
730
+ def _update_meta_cache_entry(
731
+ meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int,
732
+ imports: list[str] | None = None,
733
+ ) -> None:
701
734
  try:
702
735
  st = os.stat(file_path)
703
736
  mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
@@ -705,7 +738,10 @@ class JavaIndexer:
705
738
  except OSError:
706
739
  mtime_ns = -1
707
740
  size = size_hint
708
- meta_cache[fid] = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
741
+ entry: dict = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
742
+ if imports is not None:
743
+ entry["imports"] = imports
744
+ meta_cache[fid] = entry
709
745
 
710
746
  @staticmethod
711
747
  def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
@@ -728,3 +764,76 @@ class JavaIndexer:
728
764
  return normalized.split("/src/", 1)[0]
729
765
  scope = os.path.dirname(normalized).strip()
730
766
  return scope or "."
767
+
768
+ @staticmethod
769
+ def detect_unresolved_imports(store) -> dict[str, list[str]]:
770
+ """Detect imports that reference packages not covered by any indexed project.
771
+
772
+ Returns a dict mapping unresolved base packages (e.g. "com.foo.bar")
773
+ to a list of sample import FQCNs. Useful for suggesting which sibling
774
+ projects to index.
775
+
776
+ Only reports project-internal packages (not java.*, javax.*, org.apache.*
777
+ etc.).
778
+ """
779
+ # 1. Collect all indexed class FQCNs
780
+ try:
781
+ recs = store.query_records("MATCH (c:Class) RETURN c.fqcn as fqcn")
782
+ except Exception:
783
+ return {}
784
+ indexed_fqcns = {r["fqcn"] for r in recs if r.get("fqcn")}
785
+ indexed_packages = set()
786
+ for fqcn in indexed_fqcns:
787
+ parts = fqcn.rsplit(".", 1)
788
+ if len(parts) == 2:
789
+ indexed_packages.add(parts[0])
790
+
791
+ # 2. Collect all imports from overlay + any stored file data
792
+ # Parse imports from the parsed file metadata if available
793
+ meta_dir = SETTINGS.index_meta_dir
794
+ all_imports: set[str] = set()
795
+ if os.path.isdir(meta_dir):
796
+ for fname in os.listdir(meta_dir):
797
+ if not fname.endswith(".json"):
798
+ continue
799
+ try:
800
+ with open(os.path.join(meta_dir, fname), "r") as f:
801
+ data = json.load(f)
802
+ for fid, fmeta in data.items():
803
+ for imp in fmeta.get("imports", []):
804
+ all_imports.add(imp)
805
+ except Exception:
806
+ pass
807
+
808
+ # 3. Also scan the DB for CALLS edges that reference unknown targets
809
+ # (lightweight — just check which classes were resolved vs not)
810
+
811
+ # 4. Filter: skip standard library / well-known third-party packages
812
+ _SKIP_PREFIXES = (
813
+ "java.", "javax.", "jakarta.",
814
+ "org.apache.", "org.springframework.", "org.hibernate.",
815
+ "org.slf4j.", "org.junit.", "org.mockito.",
816
+ "com.google.", "com.fasterxml.", "com.sun.",
817
+ "io.micrometer.", "io.netty.", "io.lettuce.",
818
+ "lombok.", "reactor.", "rx.",
819
+ )
820
+
821
+ unresolved: dict[str, list[str]] = {}
822
+ for imp in all_imports:
823
+ if any(imp.startswith(prefix) for prefix in _SKIP_PREFIXES):
824
+ continue
825
+ # Check if this import's class exists in the index
826
+ simple_name = imp.rsplit(".", 1)[-1]
827
+ pkg = imp.rsplit(".", 1)[0] if "." in imp else ""
828
+ if imp in indexed_fqcns:
829
+ continue
830
+ if pkg in indexed_packages:
831
+ continue # same package, just not this specific class
832
+ # Group by top 3 package segments
833
+ parts = imp.split(".")
834
+ base_pkg = ".".join(parts[:min(3, len(parts))])
835
+ if base_pkg not in unresolved:
836
+ unresolved[base_pkg] = []
837
+ if len(unresolved[base_pkg]) < 5:
838
+ unresolved[base_pkg].append(imp)
839
+ return unresolved
@@ -59,12 +59,15 @@ def _git_available(path: str) -> bool:
59
59
  def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
60
60
  """Resolve the filesystem path for a given project_id, falling back to cwd."""
61
61
  if project:
62
- recs = store.query_records(
63
- "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
64
- {"pid": project},
65
- )
66
- if recs and recs[0].get("path"):
67
- return recs[0]["path"]
62
+ try:
63
+ recs = store.query_records(
64
+ "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
65
+ {"pid": project},
66
+ )
67
+ if recs and recs[0].get("path"):
68
+ return recs[0]["path"]
69
+ except Exception:
70
+ pass
68
71
  return repo_path_provider()
69
72
 
70
73
 
@@ -305,6 +308,20 @@ def build_mcp_server(store, repo_path_provider):
305
308
  "RECOMMENDED: start watch mode during active development."
306
309
  )
307
310
 
311
+ # Detect unresolved imports → hint about unindexed sibling projects
312
+ unresolved_imports: dict[str, list[str]] = {}
313
+ try:
314
+ from codespine.indexer.engine import JavaIndexer as _JI
315
+ unresolved_imports = _JI.detect_unresolved_imports(store)
316
+ if unresolved_imports:
317
+ pkgs = list(unresolved_imports.keys())[:5]
318
+ notes["unresolved_imports"] = (
319
+ f"Imports from unindexed packages detected: {', '.join(pkgs)}. "
320
+ "Consider indexing these projects for complete cross-project tracing."
321
+ )
322
+ except Exception:
323
+ pass
324
+
308
325
  return {
309
326
  "available": True,
310
327
  "indexed_projects": projects,
@@ -333,6 +350,7 @@ def build_mcp_server(store, repo_path_provider):
333
350
  "get_overlay_status": True,
334
351
  "promote_overlay": True,
335
352
  "clear_overlay": True,
353
+ "force_reset_index": True,
336
354
  },
337
355
  "background_jobs": {
338
356
  "watch_running": watch_running,
@@ -787,20 +805,36 @@ def build_mcp_server(store, repo_path_provider):
787
805
  Recent git commits for the project (or a specific file).
788
806
  Returns available=false if the directory is not a git repository.
789
807
  Use project=<project_id> to target a specific indexed module's repo.
808
+ TIP: Always pass project= to ensure the correct repo is used.
790
809
  """
791
810
  repo = _resolve_repo_path(store, project, repo_path_provider)
811
+ if not os.path.isdir(repo):
812
+ return {
813
+ "available": False,
814
+ "note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
815
+ }
792
816
  if not _git_available(repo):
793
- return {"available": False, "note": "Not a git repository (or git not installed)."}
817
+ return {
818
+ "available": False,
819
+ "note": (
820
+ f"Not a git repository at {repo}. "
821
+ "Pass project=<project_id> so the tool resolves the correct repo root. "
822
+ "Use list_projects() to see available IDs."
823
+ ),
824
+ }
794
825
  cmd = ["git", "log", f"--max-count={limit}", "--oneline", "--no-decorate"]
795
826
  if file_path:
796
827
  cmd += ["--", file_path]
797
828
  r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
798
829
  if r.returncode != 0:
799
- return {"available": False, "error": r.stderr.strip()}
830
+ return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
831
+ log_lines = r.stdout.strip().splitlines()
800
832
  return {
801
833
  "available": True,
802
834
  "project": project or repo,
803
- "log": r.stdout.strip().splitlines(),
835
+ "repo_path": repo,
836
+ "log": log_lines,
837
+ "note": f"{len(log_lines)} commit(s)" + (" (no commits yet)" if not log_lines else ""),
804
838
  }
805
839
 
806
840
  @mcp.tool()
@@ -809,26 +843,42 @@ def build_mcp_server(store, repo_path_provider):
809
843
  Show git diff (working tree vs ref, or between two refs separated by '...').
810
844
  Output is truncated to 200 lines.
811
845
  Returns available=false if the directory is not a git repository.
846
+ TIP: Always pass project= to ensure the correct repo is used.
812
847
  """
813
848
  repo = _resolve_repo_path(store, project, repo_path_provider)
849
+ if not os.path.isdir(repo):
850
+ return {
851
+ "available": False,
852
+ "note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
853
+ }
814
854
  if not _git_available(repo):
815
- return {"available": False, "note": "Not a git repository (or git not installed)."}
855
+ return {
856
+ "available": False,
857
+ "note": (
858
+ f"Not a git repository at {repo}. "
859
+ "Pass project=<project_id> so the tool resolves the correct repo root. "
860
+ "Use list_projects() to see available IDs."
861
+ ),
862
+ }
816
863
  cmd = ["git", "diff", ref]
817
864
  if file_path:
818
865
  cmd += ["--", file_path]
819
866
  r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
820
867
  if r.returncode != 0:
821
- return {"available": False, "error": r.stderr.strip()}
868
+ return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
822
869
  lines = r.stdout.splitlines()
823
870
  truncated = False
824
871
  if len(lines) > 200:
825
872
  lines = lines[:200]
826
873
  truncated = True
874
+ diff_text = "\n".join(lines)
827
875
  return {
828
876
  "available": True,
829
877
  "project": project or repo,
830
- "diff": "\n".join(lines),
878
+ "repo_path": repo,
879
+ "diff": diff_text,
831
880
  "truncated": truncated,
881
+ "note": f"{len(lines)} line(s)" + (" — no changes" if not diff_text.strip() else ""),
832
882
  }
833
883
 
834
884
  @mcp.tool()
@@ -1240,6 +1290,38 @@ def build_mcp_server(store, repo_path_provider):
1240
1290
  ),
1241
1291
  }
1242
1292
 
1293
+ @mcp.tool()
1294
+ def force_reset_index():
1295
+ """
1296
+ Emergency reset: delete ALL CodeSpine data files without touching the
1297
+ DB engine.
1298
+
1299
+ Use this when the buffer pool is exhausted and normal reset/clear
1300
+ commands also fail with OOM errors. This bypasses Kuzu entirely by
1301
+ removing all data files from disk.
1302
+
1303
+ After calling this, restart the MCP server and re-index all projects
1304
+ with analyse_project().
1305
+
1306
+ This is the nuclear option — only use when reset_project() and
1307
+ reset_index() fail with buffer pool errors.
1308
+ """
1309
+ from codespine.db.store import GraphStore as _GS
1310
+
1311
+ removed = _GS.force_delete_all_data()
1312
+ return {
1313
+ "available": True,
1314
+ "removed_paths": removed,
1315
+ "removed_count": len(removed),
1316
+ "note": (
1317
+ f"Force-reset complete. {len(removed)} path(s) removed. "
1318
+ "Restart the MCP server (codespine stop && codespine start) "
1319
+ "and re-index projects with analyse_project()."
1320
+ if removed else
1321
+ "Nothing to remove — already clean."
1322
+ ),
1323
+ }
1324
+
1243
1325
  # ------------------------------------------------------------------
1244
1326
  # Neighborhood exploration
1245
1327
  # ------------------------------------------------------------------
@@ -1395,6 +1477,10 @@ def build_mcp_server(store, repo_path_provider):
1395
1477
  Use this after editing a file to immediately refresh the graph without
1396
1478
  waiting for watch mode or running a full analysis.
1397
1479
 
1480
+ The file is parsed and its symbols are stored in the overlay (just like
1481
+ watch mode), so the updated data is immediately visible in search and
1482
+ find_symbol results.
1483
+
1398
1484
  Parameters:
1399
1485
  file_path – Absolute path to the .java file.
1400
1486
  project – Optional project_id. If omitted, the tool infers the
@@ -1408,9 +1494,12 @@ def build_mcp_server(store, repo_path_provider):
1408
1494
 
1409
1495
  # Resolve project from indexed projects if not given
1410
1496
  if not project:
1411
- projects = store.query_records(
1412
- "MATCH (p:Project) RETURN p.id as id, p.path as path"
1413
- )
1497
+ try:
1498
+ projects = store.query_records(
1499
+ "MATCH (p:Project) RETURN p.id as id, p.path as path"
1500
+ )
1501
+ except Exception as exc:
1502
+ return {"available": False, "note": f"DB read failed: {exc}"}
1414
1503
  for p in projects:
1415
1504
  if abs_fp.startswith(p["path"] + _os.sep):
1416
1505
  project = p["id"]
@@ -1425,58 +1514,64 @@ def build_mcp_server(store, repo_path_provider):
1425
1514
  }
1426
1515
 
1427
1516
  # Find the project path to use as root for indexing
1428
- proj_recs = store.query_records(
1429
- "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
1430
- {"pid": project},
1431
- )
1517
+ try:
1518
+ proj_recs = store.query_records(
1519
+ "MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
1520
+ {"pid": project},
1521
+ )
1522
+ except Exception as exc:
1523
+ return {"available": False, "note": f"DB read failed: {exc}"}
1432
1524
  if not proj_recs:
1433
1525
  return {"available": False, "note": f"Project '{project}' not found in index."}
1434
1526
 
1435
1527
  proj_path = proj_recs[0]["path"]
1436
1528
 
1437
- # Run incremental index via subprocess to avoid read-only DB constraint.
1438
- # Use Popen + communicate() with a timeout so that a hang never crashes
1439
- # the MCP server process — the subprocess is killed gracefully instead.
1440
- cmd = [
1441
- sys.executable, "-m", "codespine.cli",
1442
- "analyse", proj_path,
1443
- "--incremental", "--no-embed", "--allow-running",
1444
- ]
1529
+ # Use overlay-based single-file update (same mechanism as watch mode).
1530
+ # This avoids spawning a subprocess and contending with the write DB.
1531
+ from codespine.watch.watcher import _update_overlay_for_files
1532
+
1445
1533
  t0 = time.time()
1446
1534
  try:
1447
- proc = subprocess.Popen(
1448
- cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
1449
- )
1450
- stdout, stderr = proc.communicate(timeout=30)
1535
+ result = _update_overlay_for_files(store, proj_path, project, [abs_fp])
1451
1536
  elapsed = round(time.time() - t0, 2)
1452
- except subprocess.TimeoutExpired:
1453
- proc.kill()
1454
- proc.communicate() # reap zombie
1455
- elapsed = round(time.time() - t0, 2)
1456
- return {
1457
- "available": False,
1458
- "note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
1459
- }
1460
1537
  except Exception as exc:
1461
1538
  elapsed = round(time.time() - t0, 2)
1462
- return {
1463
- "available": False,
1464
- "note": f"Re-index error: {exc}",
1465
- }
1466
-
1467
- if proc.returncode != 0:
1468
- return {
1469
- "available": False,
1470
- "note": f"Re-index failed (code {proc.returncode})",
1471
- "error": (stderr or stdout or "").strip()[:500],
1472
- }
1539
+ _LOGGER.warning("reindex_file failed: %s", exc)
1540
+ # Fall back to subprocess approach
1541
+ cmd = [
1542
+ sys.executable, "-m", "codespine.cli",
1543
+ "analyse", proj_path,
1544
+ "--incremental", "--no-embed", "--allow-running",
1545
+ ]
1546
+ try:
1547
+ proc = subprocess.Popen(
1548
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
1549
+ )
1550
+ stdout, stderr = proc.communicate(timeout=60)
1551
+ elapsed = round(time.time() - t0, 2)
1552
+ if proc.returncode != 0:
1553
+ return {
1554
+ "available": False,
1555
+ "note": f"Re-index failed (code {proc.returncode})",
1556
+ "error": (stderr or stdout or "").strip()[:500],
1557
+ }
1558
+ return {
1559
+ "available": True,
1560
+ "file": abs_fp,
1561
+ "project": project,
1562
+ "elapsed_s": elapsed,
1563
+ "note": f"Overlay update failed; fell back to full incremental re-index in {elapsed}s.",
1564
+ }
1565
+ except Exception as fallback_exc:
1566
+ return {"available": False, "note": f"Re-index error: overlay={exc}, subprocess={fallback_exc}"}
1473
1567
 
1474
1568
  return {
1475
1569
  "available": True,
1476
1570
  "file": abs_fp,
1477
1571
  "project": project,
1478
1572
  "elapsed_s": elapsed,
1479
- "note": f"Re-indexed project {project} incrementally in {elapsed}s.",
1573
+ "changed": result.get("changed", 0),
1574
+ "note": f"Re-indexed {abs_fp} via overlay in {elapsed}s.",
1480
1575
  }
1481
1576
 
1482
1577
  # ------------------------------------------------------------------
@@ -1,8 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import logging
3
4
  import os
4
5
  import threading
5
6
  import time
7
+ import traceback
6
8
 
7
9
  from codespine.analysis.community import detect_communities
8
10
  from codespine.analysis.coupling import compute_coupling
@@ -13,6 +15,8 @@ from codespine.indexer.engine import JavaIndexer
13
15
  from codespine.overlay.git_state import current_head, git_repo_root
14
16
  from codespine.overlay.store import OverlayStore, build_overlay_file_entry
15
17
 
18
+ LOGGER = logging.getLogger(__name__)
19
+
16
20
 
17
21
  def _project_modules(root_path: str) -> tuple[dict[str, str], list[str], bool]:
18
22
  abs_path = os.path.abspath(root_path)
@@ -39,15 +43,22 @@ def get_overlay_status(store, project: str | None = None) -> list[dict]:
39
43
  statuses = overlay_store.status(project)
40
44
  out: list[dict] = []
41
45
  for item in statuses:
42
- metadata = store.get_project_metadata(item["project_id"]) or {}
46
+ try:
47
+ metadata = store.get_project_metadata(item["project_id"]) or {}
48
+ except Exception:
49
+ metadata = {}
50
+ # The overlay JSON on disk is the source of truth; the DB flag
51
+ # may be stale if the watch process couldn't write to the DB.
52
+ overlay_present = bool(item.get("overlay_present"))
53
+ db_dirty = bool(metadata.get("overlay_dirty", False))
43
54
  out.append(
44
55
  {
45
56
  **item,
46
57
  "indexed_commit": metadata.get("indexed_commit", ""),
47
- "overlay_dirty": bool(metadata.get("overlay_dirty", False)),
58
+ "overlay_dirty": overlay_present or db_dirty,
48
59
  "indexed_at": metadata.get("indexed_at", ""),
49
60
  "promotion_pending": bool(
50
- item.get("overlay_present")
61
+ overlay_present
51
62
  and item.get("current_head")
52
63
  and metadata.get("indexed_commit")
53
64
  and item.get("current_head") != metadata.get("indexed_commit")
@@ -107,68 +118,98 @@ def promote_overlay(store, project: str | None = None, require_head_change: bool
107
118
  def _update_overlay_for_files(store, project_path: str, project_id: str, file_paths: list[str]) -> dict:
108
119
  overlay_store: OverlayStore = store.overlay_store
109
120
  indexer = JavaIndexer(store)
110
- metadata = store.get_project_metadata(project_id) or {}
111
121
  repo_root = git_repo_root(project_path)
112
- indexed_commit = str(metadata.get("indexed_commit") or "")
113
122
  head = current_head(project_path)
114
- existing_doc = overlay_store.load_project(project_id)
115
123
 
116
- base_method_catalog = indexer._existing_method_catalog(project_id)
117
- base_class_catalog = indexer._existing_class_catalog(project_id)
118
- base_class_ids = indexer._existing_class_ids_by_fqcn(project_id)
119
- base_class_methods = indexer._existing_class_methods(project_id)
120
- embed = store.project_has_embeddings(project_id)
124
+ # DB reads can fail if the write DB is busy; fall back to empty catalogs
125
+ # so the overlay still captures the file changes from tree-sitter alone.
126
+ try:
127
+ metadata = store.get_project_metadata(project_id) or {}
128
+ except Exception as exc:
129
+ LOGGER.warning("watch: DB read failed for project metadata (%s), using empty", exc)
130
+ metadata = {}
131
+ indexed_commit = str(metadata.get("indexed_commit") or "")
132
+
133
+ try:
134
+ base_method_catalog = indexer._existing_method_catalog(project_id)
135
+ base_class_catalog = indexer._existing_class_catalog(project_id)
136
+ base_class_ids = indexer._existing_class_ids_by_fqcn(project_id)
137
+ base_class_methods = indexer._existing_class_methods(project_id)
138
+ except Exception as exc:
139
+ LOGGER.warning("watch: DB read failed for catalogs (%s), using empty", exc)
140
+ base_method_catalog = {}
141
+ base_class_catalog = {}
142
+ base_class_ids = {}
143
+ base_class_methods = {}
121
144
 
122
- changed = deleted = 0
145
+ try:
146
+ embed = store.project_has_embeddings(project_id)
147
+ except Exception:
148
+ embed = False
149
+
150
+ existing_doc = overlay_store.load_project(project_id)
151
+
152
+ changed = deleted = errors = 0
123
153
  for file_path in sorted(set(os.path.abspath(p) for p in file_paths)):
124
154
  if not file_path.endswith(".java"):
125
155
  continue
126
- if os.path.exists(file_path):
127
- with open(file_path, "rb") as fh:
128
- source = fh.read()
129
- entry = build_overlay_file_entry(
130
- store=store,
131
- project_id=project_id,
132
- project_path=project_path,
133
- file_path=file_path,
134
- source=source,
135
- embed=embed,
136
- base_method_catalog=base_method_catalog,
137
- base_class_catalog=base_class_catalog,
138
- base_class_ids_by_fqcn=base_class_ids,
139
- base_class_methods=base_class_methods,
140
- existing_overlay_doc=existing_doc,
141
- )
142
- overlay_store.upsert_file(
143
- project_id=project_id,
144
- project_path=project_path,
145
- repo_root=repo_root,
146
- base_commit=indexed_commit,
147
- current_head=head,
148
- file_path=file_path,
149
- entry=entry,
150
- )
151
- existing_doc = overlay_store.load_project(project_id)
152
- changed += 1
153
- else:
154
- overlay_store.mark_deleted(
155
- project_id=project_id,
156
- project_path=project_path,
157
- repo_root=repo_root,
158
- base_commit=indexed_commit,
159
- current_head=head,
160
- file_path=file_path,
161
- )
162
- existing_doc = overlay_store.load_project(project_id)
163
- deleted += 1
156
+ try:
157
+ if os.path.exists(file_path):
158
+ with open(file_path, "rb") as fh:
159
+ source = fh.read()
160
+ entry = build_overlay_file_entry(
161
+ store=store,
162
+ project_id=project_id,
163
+ project_path=project_path,
164
+ file_path=file_path,
165
+ source=source,
166
+ embed=embed,
167
+ base_method_catalog=base_method_catalog,
168
+ base_class_catalog=base_class_catalog,
169
+ base_class_ids_by_fqcn=base_class_ids,
170
+ base_class_methods=base_class_methods,
171
+ existing_overlay_doc=existing_doc,
172
+ )
173
+ overlay_store.upsert_file(
174
+ project_id=project_id,
175
+ project_path=project_path,
176
+ repo_root=repo_root,
177
+ base_commit=indexed_commit,
178
+ current_head=head,
179
+ file_path=file_path,
180
+ entry=entry,
181
+ )
182
+ existing_doc = overlay_store.load_project(project_id)
183
+ changed += 1
184
+ else:
185
+ overlay_store.mark_deleted(
186
+ project_id=project_id,
187
+ project_path=project_path,
188
+ repo_root=repo_root,
189
+ base_commit=indexed_commit,
190
+ current_head=head,
191
+ file_path=file_path,
192
+ )
193
+ existing_doc = overlay_store.load_project(project_id)
194
+ deleted += 1
195
+ except Exception as exc:
196
+ LOGGER.warning("watch: failed to process %s: %s", file_path, exc)
197
+ errors += 1
198
+
164
199
  if changed or deleted:
165
- if metadata:
166
- store.set_project_overlay_dirty(project_id, True)
167
- else:
168
- store.upsert_project(project_id, project_path)
169
- store.set_project_indexed_commit(project_id, indexed_commit)
170
- store.set_project_overlay_dirty(project_id, True)
171
- return {"project_id": project_id, "changed": changed, "deleted": deleted}
200
+ # Try to mark dirty in the DB; if the DB is busy (write contention),
201
+ # the overlay JSON on disk is still correct and will be picked up on
202
+ # next read. Don't let a DB write failure discard overlay work.
203
+ try:
204
+ if metadata:
205
+ store.set_project_overlay_dirty(project_id, True)
206
+ else:
207
+ store.upsert_project(project_id, project_path)
208
+ store.set_project_indexed_commit(project_id, indexed_commit)
209
+ store.set_project_overlay_dirty(project_id, True)
210
+ except Exception as exc:
211
+ LOGGER.warning("watch: DB write failed for overlay_dirty flag (%s); overlay is still on disk", exc)
212
+ return {"project_id": project_id, "changed": changed, "deleted": deleted, "errors": errors}
172
213
 
173
214
 
174
215
  def run_watch_mode(
@@ -227,11 +268,17 @@ def run_watch_mode(
227
268
  for module_path, files in sorted(grouped.items()):
228
269
  project_id = module_map.get(module_path, os.path.basename(module_path))
229
270
  start = time.time()
230
- result = _update_overlay_for_files(store, module_path, project_id, files)
271
+ try:
272
+ result = _update_overlay_for_files(store, module_path, project_id, files)
273
+ except Exception as exc:
274
+ LOGGER.error("watch: overlay update failed for %s: %s\n%s", project_id, exc, traceback.format_exc())
275
+ print(f"[{time.strftime('%H:%M:%S')}] {project_id}: ERROR updating overlay — {exc}")
276
+ continue
231
277
  elapsed = time.time() - start
278
+ err_note = f", {result.get('errors', 0)} errors" if result.get("errors") else ""
232
279
  print(
233
280
  f"[{time.strftime('%H:%M:%S')}] {project_id}: overlay updated "
234
- f"({result['changed']} changed, {result['deleted']} deleted) in {elapsed:.1f}s"
281
+ f"({result['changed']} changed, {result['deleted']} deleted{err_note}) in {elapsed:.1f}s"
235
282
  )
236
283
 
237
284
  if promote_on_commit:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 0.5.10
3
+ Version: 0.6.0
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "0.5.10"
7
+ version = "0.6.0"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
File without changes
File without changes
File without changes
File without changes