codespine 0.5.10__tar.gz → 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-0.5.10 → codespine-0.6.0}/PKG-INFO +1 -1
- {codespine-0.5.10 → codespine-0.6.0}/codespine/__init__.py +1 -1
- {codespine-0.5.10 → codespine-0.6.0}/codespine/cli.py +119 -12
- {codespine-0.5.10 → codespine-0.6.0}/codespine/config.py +2 -2
- {codespine-0.5.10 → codespine-0.6.0}/codespine/db/store.py +47 -2
- {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/engine.py +117 -8
- {codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/server.py +146 -51
- {codespine-0.5.10 → codespine-0.6.0}/codespine/watch/watcher.py +106 -59
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-0.5.10 → codespine-0.6.0}/pyproject.toml +1 -1
- {codespine-0.5.10 → codespine-0.6.0}/LICENSE +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/README.md +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/community.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/context.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/coupling.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/crossmodule.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/deadcode.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/flow.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/analysis/impact.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/db/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/db/schema.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/diff/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/diff/branch_diff.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/call_resolver.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/java_parser.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/mcp/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/noise/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/noise/blocklist.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/git_state.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/merge.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/overlay/store.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/bm25.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/fuzzy.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/hybrid.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/rrf.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/search/vector.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine/watch/__init__.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/SOURCES.txt +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/requires.txt +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/codespine.egg-info/top_level.txt +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/gindex.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/setup.cfg +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_call_resolver.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_community_detection.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_deadcode.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_index_and_hybrid.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_java_parser.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_multimodule_index.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_overlay.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_search_ranking.py +0 -0
- {codespine-0.5.10 → codespine-0.6.0}/tests/test_store_recovery.py +0 -0
|
@@ -101,16 +101,17 @@ def main() -> None:
|
|
|
101
101
|
@click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses.")
|
|
102
102
|
@click.option(
|
|
103
103
|
"--embed/--no-embed",
|
|
104
|
-
default=
|
|
104
|
+
default=True,
|
|
105
105
|
show_default=True,
|
|
106
|
-
help="Generate vector embeddings
|
|
106
|
+
help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
|
|
107
107
|
)
|
|
108
108
|
@click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
|
|
109
109
|
def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool) -> None:
|
|
110
110
|
"""Index a local Java project (auto-detects workspace / Maven / Gradle layout).
|
|
111
111
|
|
|
112
|
-
|
|
113
|
-
|
|
112
|
+
Embeddings are generated by default. If sentence-transformers is installed
|
|
113
|
+
(pip install codespine[ml]), high-quality semantic vectors are used; otherwise
|
|
114
|
+
a fast hash-based fallback provides basic vector search.
|
|
114
115
|
"""
|
|
115
116
|
if not allow_running and _is_running():
|
|
116
117
|
click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
|
|
@@ -118,6 +119,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
118
119
|
|
|
119
120
|
started = time.perf_counter()
|
|
120
121
|
abs_path = os.path.abspath(path)
|
|
122
|
+
|
|
123
|
+
# Warn about hash fallback early so users know to install [ml]
|
|
124
|
+
if embed:
|
|
125
|
+
from codespine.search.vector import _load_model
|
|
126
|
+
if _load_model() is None:
|
|
127
|
+
click.secho(
|
|
128
|
+
"⚠ sentence-transformers not found — using hash-based embeddings.\n"
|
|
129
|
+
" For better semantic search: pip install codespine[ml]\n",
|
|
130
|
+
fg="yellow",
|
|
131
|
+
)
|
|
132
|
+
|
|
121
133
|
store = GraphStore(read_only=False)
|
|
122
134
|
indexer = JavaIndexer(store)
|
|
123
135
|
|
|
@@ -309,9 +321,27 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
309
321
|
)
|
|
310
322
|
_finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
|
|
311
323
|
else:
|
|
324
|
+
# Run lightweight versions of flow tracing and dead code from the call
|
|
325
|
+
# graph already built — no community detection or coupling (those are
|
|
326
|
+
# genuinely expensive). This gives partial results without --deep.
|
|
312
327
|
_phase("Detecting communities...", "skipped (large repo; rerun with --deep)")
|
|
313
|
-
|
|
314
|
-
|
|
328
|
+
|
|
329
|
+
flow_label = "Detecting execution flows..."
|
|
330
|
+
_live_phase(flow_label, "running (lightweight)")
|
|
331
|
+
try:
|
|
332
|
+
flows = trace_execution_flows(store, max_depth=3)
|
|
333
|
+
except Exception:
|
|
334
|
+
flows = []
|
|
335
|
+
_finish_phase(flow_label, f"{len(flows)} flows (lightweight; rerun with --deep for full)")
|
|
336
|
+
|
|
337
|
+
dead_label = "Finding dead code..."
|
|
338
|
+
_live_phase(dead_label, "running (lightweight)")
|
|
339
|
+
try:
|
|
340
|
+
dead = detect_dead_code(store, limit=100)
|
|
341
|
+
except Exception:
|
|
342
|
+
dead = []
|
|
343
|
+
_finish_phase(dead_label, f"{_dead_result_count(dead)} candidates (lightweight; rerun with --deep for full)")
|
|
344
|
+
|
|
315
345
|
_phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
|
|
316
346
|
|
|
317
347
|
vector_count = store.query_records(
|
|
@@ -331,7 +361,12 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
331
361
|
edges = int(edge_count[0]["count"]) if edge_count else 0
|
|
332
362
|
elapsed = time.perf_counter() - started
|
|
333
363
|
|
|
334
|
-
|
|
364
|
+
if not embed:
|
|
365
|
+
embed_note = " (no embeddings; rerun with --embed for semantic search)"
|
|
366
|
+
elif _load_model() is None:
|
|
367
|
+
embed_note = " (hash embeddings; pip install codespine[ml] for better search)"
|
|
368
|
+
else:
|
|
369
|
+
embed_note = ""
|
|
335
370
|
module_info = f"{len(modules_with_ids)} modules/projects, " if is_multi else ""
|
|
336
371
|
click.echo()
|
|
337
372
|
click.secho(
|
|
@@ -339,6 +374,17 @@ def analyse(path: str, full: bool, deep: bool, embed: bool, allow_running: bool)
|
|
|
339
374
|
fg="green",
|
|
340
375
|
)
|
|
341
376
|
|
|
377
|
+
# Detect unresolved imports → hint about unindexed sibling projects
|
|
378
|
+
try:
|
|
379
|
+
unresolved = JavaIndexer.detect_unresolved_imports(store)
|
|
380
|
+
if unresolved:
|
|
381
|
+
click.echo()
|
|
382
|
+
click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
|
|
383
|
+
for pkg, samples in sorted(unresolved.items())[:8]:
|
|
384
|
+
click.echo(f" {pkg} (e.g. {samples[0]})")
|
|
385
|
+
except Exception:
|
|
386
|
+
pass # best-effort
|
|
387
|
+
|
|
342
388
|
# Publish a read replica so MCP and read-only CLI commands (search, stats…)
|
|
343
389
|
# run against an isolated snapshot rather than competing with the write
|
|
344
390
|
# process's buffer pool. The MCP daemon detects the sentinel file and
|
|
@@ -579,7 +625,14 @@ def list_projects(as_json: bool) -> None:
|
|
|
579
625
|
@main.command()
|
|
580
626
|
@click.option("--json", "as_json", is_flag=True)
|
|
581
627
|
def status(as_json: bool) -> None:
|
|
582
|
-
"""Show service and database status.
|
|
628
|
+
"""Show service and database status.
|
|
629
|
+
|
|
630
|
+
Quick reference for MCP server management:
|
|
631
|
+
codespine start – launch background MCP server
|
|
632
|
+
codespine stop – stop background MCP server
|
|
633
|
+
codespine status – this command
|
|
634
|
+
codespine mcp – run MCP in foreground (stdio, for IDE integration)
|
|
635
|
+
"""
|
|
583
636
|
running = _is_running()
|
|
584
637
|
pid = None
|
|
585
638
|
if os.path.exists(SETTINGS.pid_file):
|
|
@@ -590,17 +643,35 @@ def status(as_json: bool) -> None:
|
|
|
590
643
|
pid = None
|
|
591
644
|
store = GraphStore(read_only=True)
|
|
592
645
|
overlay = get_overlay_status(store)
|
|
646
|
+
|
|
647
|
+
# Check for stale PID file
|
|
648
|
+
stale_pid = pid is not None and not running
|
|
649
|
+
has_snapshot = os.path.exists(SETTINGS.db_snapshot_path)
|
|
650
|
+
|
|
593
651
|
payload = {
|
|
594
652
|
"running": running,
|
|
595
653
|
"pid": pid,
|
|
654
|
+
"stale_pid": stale_pid,
|
|
596
655
|
"pid_file": SETTINGS.pid_file,
|
|
597
656
|
"db_path": SETTINGS.db_path,
|
|
598
657
|
"db_size_bytes": _db_size_bytes(SETTINGS.db_path),
|
|
658
|
+
"read_replica": SETTINGS.db_snapshot_path if has_snapshot else None,
|
|
659
|
+
"read_replica_size_bytes": _db_size_bytes(SETTINGS.db_snapshot_path) if has_snapshot else 0,
|
|
599
660
|
"log_file": SETTINGS.log_file,
|
|
600
661
|
"overlay_dir": SETTINGS.overlay_dir,
|
|
601
662
|
"overlay_projects": overlay,
|
|
602
663
|
}
|
|
603
|
-
|
|
664
|
+
if as_json:
|
|
665
|
+
_echo_json(payload, True)
|
|
666
|
+
else:
|
|
667
|
+
_echo_json(payload, True)
|
|
668
|
+
if stale_pid:
|
|
669
|
+
click.secho(f"\n⚠ Stale PID file found (PID {pid} not running). Run 'codespine stop' to clean up.", fg="yellow")
|
|
670
|
+
if not running:
|
|
671
|
+
click.echo("\nTo start: codespine start")
|
|
672
|
+
click.echo("For IDE: codespine mcp (stdio mode)")
|
|
673
|
+
else:
|
|
674
|
+
click.echo(f"\nMCP server running (PID {pid}). Stop with: codespine stop")
|
|
604
675
|
|
|
605
676
|
|
|
606
677
|
@main.command("overlay-status")
|
|
@@ -727,6 +798,33 @@ def clear_index_cmd(allow_running: bool) -> None:
|
|
|
727
798
|
click.secho(f"Cleared {len(projects)} project(s). Index is now empty.", fg="green")
|
|
728
799
|
|
|
729
800
|
|
|
801
|
+
@main.command("force-reset")
|
|
802
|
+
@click.option("--force", is_flag=True, help="Skip confirmation prompt.")
|
|
803
|
+
def force_reset_cmd(force: bool) -> None:
|
|
804
|
+
"""Emergency reset: delete ALL CodeSpine data files without touching the DB engine.
|
|
805
|
+
|
|
806
|
+
Use this when the buffer pool is exhausted and normal reset/clear commands
|
|
807
|
+
also fail with OOM. This bypasses Kuzu entirely by removing data files
|
|
808
|
+
from disk, including the DB, read replica, overlay, meta cache, and
|
|
809
|
+
embedding cache.
|
|
810
|
+
|
|
811
|
+
After running this, restart the MCP server and re-index your projects.
|
|
812
|
+
"""
|
|
813
|
+
if not force and not click.confirm(
|
|
814
|
+
"This will DELETE all CodeSpine data (DB, overlay, caches). Continue?"
|
|
815
|
+
):
|
|
816
|
+
click.echo("Aborted.")
|
|
817
|
+
return
|
|
818
|
+
removed = GraphStore.force_delete_all_data()
|
|
819
|
+
if removed:
|
|
820
|
+
for p in removed:
|
|
821
|
+
click.echo(f" removed: {p}")
|
|
822
|
+
click.secho(f"\nForce-reset complete. {len(removed)} path(s) removed.", fg="green")
|
|
823
|
+
click.echo("Next: restart MCP ('codespine stop && codespine start') and re-index.")
|
|
824
|
+
else:
|
|
825
|
+
click.secho("Nothing to remove — already clean.", fg="yellow")
|
|
826
|
+
|
|
827
|
+
|
|
730
828
|
@main.command()
|
|
731
829
|
def setup() -> None:
|
|
732
830
|
"""Print local setup checks and next steps."""
|
|
@@ -743,12 +841,21 @@ def setup() -> None:
|
|
|
743
841
|
checks[mod] = True
|
|
744
842
|
except Exception:
|
|
745
843
|
checks[mod] = False
|
|
746
|
-
click.echo("
|
|
844
|
+
click.echo("Core dependencies:")
|
|
747
845
|
for mod, ok in checks.items():
|
|
748
846
|
click.echo(f" - {mod}: {'OK' if ok else 'MISSING'}")
|
|
749
|
-
|
|
750
|
-
|
|
847
|
+
# Check optional ML dependencies
|
|
848
|
+
try:
|
|
849
|
+
from sentence_transformers import SentenceTransformer
|
|
850
|
+
click.echo(" - sentence-transformers: OK (semantic embeddings active)")
|
|
851
|
+
except ImportError:
|
|
852
|
+
click.secho(" - sentence-transformers: NOT INSTALLED (hash fallback; install for better search)", fg="yellow")
|
|
853
|
+
click.echo("\nRecommended setup:")
|
|
854
|
+
click.echo(" pip install -e '.[full]' # core + ML + community detection")
|
|
855
|
+
click.echo(" pip install -e '.[ml]' # just ML embeddings")
|
|
856
|
+
click.echo("\nQuick start:")
|
|
751
857
|
click.echo(" codespine analyse /path/to/java-project --full")
|
|
858
|
+
click.echo(" codespine start # launch MCP server")
|
|
752
859
|
click.echo(" codespine search payment --json")
|
|
753
860
|
|
|
754
861
|
|
|
@@ -16,8 +16,8 @@ class Settings:
|
|
|
16
16
|
rrf_k: int = 60
|
|
17
17
|
semantic_candidate_pool: int = 2000
|
|
18
18
|
write_batch_size: int = 500
|
|
19
|
-
index_file_batch_size: int =
|
|
20
|
-
edge_write_batch_size: int =
|
|
19
|
+
index_file_batch_size: int = 20
|
|
20
|
+
edge_write_batch_size: int = 500
|
|
21
21
|
default_coupling_months: int = 6
|
|
22
22
|
default_min_coupling_strength: float = 0.3
|
|
23
23
|
default_min_cochanges: int = 3
|
|
@@ -147,10 +147,13 @@ class GraphStore:
|
|
|
147
147
|
|
|
148
148
|
def clear_project(self, project_id: str) -> None:
|
|
149
149
|
file_recs = self.query_records("MATCH (f:File) WHERE f.project_id = $pid RETURN f.id as id", {"pid": project_id})
|
|
150
|
+
# Small batches (10 files per tx) prevent buffer pool OOM on large projects.
|
|
150
151
|
for idx, rec in enumerate(file_recs, start=1):
|
|
151
|
-
self.
|
|
152
|
-
|
|
152
|
+
with self.transaction():
|
|
153
|
+
self.clear_file(rec["id"])
|
|
154
|
+
if idx % 10 == 0:
|
|
153
155
|
self._recycle_conn()
|
|
156
|
+
self._recycle_conn()
|
|
154
157
|
self.execute("MATCH (p:Project) WHERE p.id = $pid DETACH DELETE p", {"pid": project_id})
|
|
155
158
|
self._recycle_conn()
|
|
156
159
|
|
|
@@ -502,6 +505,48 @@ class GraphStore:
|
|
|
502
505
|
self.clear_flows()
|
|
503
506
|
self.clear_coupling()
|
|
504
507
|
|
|
508
|
+
@staticmethod
|
|
509
|
+
def force_delete_all_data() -> list[str]:
|
|
510
|
+
"""Delete all CodeSpine data files without touching the Kuzu engine.
|
|
511
|
+
|
|
512
|
+
This is the nuclear option for OOM recovery: when the buffer pool is
|
|
513
|
+
exhausted, normal DB writes (including reset_project / clear_project)
|
|
514
|
+
also fail. This bypasses Kuzu entirely by removing the data files
|
|
515
|
+
from disk, allowing a fresh start.
|
|
516
|
+
|
|
517
|
+
Returns the list of paths that were removed.
|
|
518
|
+
"""
|
|
519
|
+
removed: list[str] = []
|
|
520
|
+
for path in [
|
|
521
|
+
SETTINGS.db_path,
|
|
522
|
+
SETTINGS.db_snapshot_path,
|
|
523
|
+
SETTINGS.db_snapshot_path + ".updated",
|
|
524
|
+
SETTINGS.db_snapshot_path + ".tmp",
|
|
525
|
+
SETTINGS.embedding_cache_path,
|
|
526
|
+
SETTINGS.overlay_dir,
|
|
527
|
+
SETTINGS.index_meta_dir,
|
|
528
|
+
]:
|
|
529
|
+
if not os.path.exists(path):
|
|
530
|
+
continue
|
|
531
|
+
try:
|
|
532
|
+
if os.path.isdir(path):
|
|
533
|
+
shutil.rmtree(path, ignore_errors=True)
|
|
534
|
+
else:
|
|
535
|
+
os.remove(path)
|
|
536
|
+
removed.append(path)
|
|
537
|
+
except OSError:
|
|
538
|
+
pass
|
|
539
|
+
# Also remove any stale WAL files next to the DB
|
|
540
|
+
for suffix in (".wal", ".lock"):
|
|
541
|
+
wal_path = SETTINGS.db_path + suffix
|
|
542
|
+
if os.path.exists(wal_path):
|
|
543
|
+
try:
|
|
544
|
+
os.remove(wal_path)
|
|
545
|
+
removed.append(wal_path)
|
|
546
|
+
except OSError:
|
|
547
|
+
pass
|
|
548
|
+
return removed
|
|
549
|
+
|
|
505
550
|
def rebuild_empty_db(self) -> None:
|
|
506
551
|
self._recycle_conn()
|
|
507
552
|
path = SETTINGS.db_path
|
|
@@ -253,6 +253,20 @@ class JavaIndexer:
|
|
|
253
253
|
for fid in delete_chunk:
|
|
254
254
|
self.store.clear_file(fid)
|
|
255
255
|
self.store._recycle_conn()
|
|
256
|
+
|
|
257
|
+
# Clean up stale project entries that point to the same path under a
|
|
258
|
+
# different ID (e.g. re-indexing "vision-server" directly after it was
|
|
259
|
+
# previously indexed as "vision::vision-server" from a workspace root).
|
|
260
|
+
try:
|
|
261
|
+
stale = self.store.query_records(
|
|
262
|
+
"MATCH (p:Project) WHERE p.path = $path AND p.id <> $pid RETURN p.id as id",
|
|
263
|
+
{"path": root_path, "pid": project_id},
|
|
264
|
+
)
|
|
265
|
+
for old in stale:
|
|
266
|
+
self.store.clear_project(old["id"])
|
|
267
|
+
except Exception:
|
|
268
|
+
pass # best-effort cleanup
|
|
269
|
+
|
|
256
270
|
self.store.upsert_project(project_id, root_path)
|
|
257
271
|
|
|
258
272
|
for parse_chunk in self._chunked(parse_results, file_batch_size):
|
|
@@ -279,7 +293,7 @@ class JavaIndexer:
|
|
|
279
293
|
"hash": file_digest,
|
|
280
294
|
}
|
|
281
295
|
)
|
|
282
|
-
self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source))
|
|
296
|
+
self._update_meta_cache_entry(meta_cache, f_id, file_path, file_digest, len(source), imports=parsed.imports)
|
|
283
297
|
|
|
284
298
|
for cls in parsed.classes:
|
|
285
299
|
c_id = class_id(cls.fqcn, scope)
|
|
@@ -372,15 +386,31 @@ class JavaIndexer:
|
|
|
372
386
|
class_methods[c_id][method.signature] = m_id
|
|
373
387
|
files_indexed += 1
|
|
374
388
|
|
|
389
|
+
# Split writes into smaller transactions and recycle between each
|
|
390
|
+
# to prevent Kuzu WAL from exhausting the buffer pool on large
|
|
391
|
+
# incremental re-indexes (GH feedback: 1,604-file OOM).
|
|
392
|
+
if not full:
|
|
393
|
+
for clear_sub in self._chunked(file_rows, 10):
|
|
394
|
+
with self.store.transaction():
|
|
395
|
+
for row in clear_sub:
|
|
396
|
+
self.store.clear_file(row["id"])
|
|
397
|
+
self.store._recycle_conn()
|
|
375
398
|
with self.store.transaction():
|
|
376
|
-
for row in file_rows:
|
|
377
|
-
if not full:
|
|
378
|
-
self.store.clear_file(row["id"])
|
|
379
399
|
self.store.upsert_files_batch(file_rows)
|
|
400
|
+
self.store._recycle_conn()
|
|
401
|
+
with self.store.transaction():
|
|
380
402
|
self.store.upsert_classes_batch(class_rows)
|
|
381
|
-
self.store.upsert_methods_batch(method_rows)
|
|
382
|
-
self.store.upsert_symbols_batch(symbol_rows)
|
|
383
403
|
self.store._recycle_conn()
|
|
404
|
+
_METHOD_SUB_BATCH = 200
|
|
405
|
+
for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
|
|
406
|
+
with self.store.transaction():
|
|
407
|
+
self.store.upsert_methods_batch(method_sub)
|
|
408
|
+
self.store._recycle_conn()
|
|
409
|
+
_SYMBOL_SUB_BATCH = 200
|
|
410
|
+
for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
|
|
411
|
+
with self.store.transaction():
|
|
412
|
+
self.store.upsert_symbols_batch(symbol_sub)
|
|
413
|
+
self.store._recycle_conn()
|
|
384
414
|
|
|
385
415
|
self._emit(progress, "resolve_calls_start")
|
|
386
416
|
call_rows: list[dict] = []
|
|
@@ -697,7 +727,10 @@ class JavaIndexer:
|
|
|
697
727
|
return
|
|
698
728
|
|
|
699
729
|
@staticmethod
|
|
700
|
-
def _update_meta_cache_entry(
|
|
730
|
+
def _update_meta_cache_entry(
|
|
731
|
+
meta_cache: dict[str, dict], fid: str, file_path: str, digest: str, size_hint: int,
|
|
732
|
+
imports: list[str] | None = None,
|
|
733
|
+
) -> None:
|
|
701
734
|
try:
|
|
702
735
|
st = os.stat(file_path)
|
|
703
736
|
mtime_ns = int(getattr(st, "st_mtime_ns", int(st.st_mtime * 1_000_000_000)))
|
|
@@ -705,7 +738,10 @@ class JavaIndexer:
|
|
|
705
738
|
except OSError:
|
|
706
739
|
mtime_ns = -1
|
|
707
740
|
size = size_hint
|
|
708
|
-
|
|
741
|
+
entry: dict = {"mtime_ns": mtime_ns, "size": size, "hash": digest}
|
|
742
|
+
if imports is not None:
|
|
743
|
+
entry["imports"] = imports
|
|
744
|
+
meta_cache[fid] = entry
|
|
709
745
|
|
|
710
746
|
@staticmethod
|
|
711
747
|
def _prune_meta_cache(meta_cache: dict[str, dict], current_file_ids: set[str]) -> None:
|
|
@@ -728,3 +764,76 @@ class JavaIndexer:
|
|
|
728
764
|
return normalized.split("/src/", 1)[0]
|
|
729
765
|
scope = os.path.dirname(normalized).strip()
|
|
730
766
|
return scope or "."
|
|
767
|
+
|
|
768
|
+
@staticmethod
|
|
769
|
+
def detect_unresolved_imports(store) -> dict[str, list[str]]:
|
|
770
|
+
"""Detect imports that reference packages not covered by any indexed project.
|
|
771
|
+
|
|
772
|
+
Returns a dict mapping unresolved base packages (e.g. "com.foo.bar")
|
|
773
|
+
to a list of sample import FQCNs. Useful for suggesting which sibling
|
|
774
|
+
projects to index.
|
|
775
|
+
|
|
776
|
+
Only reports project-internal packages (not java.*, javax.*, org.apache.*
|
|
777
|
+
etc.).
|
|
778
|
+
"""
|
|
779
|
+
# 1. Collect all indexed class FQCNs
|
|
780
|
+
try:
|
|
781
|
+
recs = store.query_records("MATCH (c:Class) RETURN c.fqcn as fqcn")
|
|
782
|
+
except Exception:
|
|
783
|
+
return {}
|
|
784
|
+
indexed_fqcns = {r["fqcn"] for r in recs if r.get("fqcn")}
|
|
785
|
+
indexed_packages = set()
|
|
786
|
+
for fqcn in indexed_fqcns:
|
|
787
|
+
parts = fqcn.rsplit(".", 1)
|
|
788
|
+
if len(parts) == 2:
|
|
789
|
+
indexed_packages.add(parts[0])
|
|
790
|
+
|
|
791
|
+
# 2. Collect all imports from overlay + any stored file data
|
|
792
|
+
# Parse imports from the parsed file metadata if available
|
|
793
|
+
meta_dir = SETTINGS.index_meta_dir
|
|
794
|
+
all_imports: set[str] = set()
|
|
795
|
+
if os.path.isdir(meta_dir):
|
|
796
|
+
for fname in os.listdir(meta_dir):
|
|
797
|
+
if not fname.endswith(".json"):
|
|
798
|
+
continue
|
|
799
|
+
try:
|
|
800
|
+
with open(os.path.join(meta_dir, fname), "r") as f:
|
|
801
|
+
data = json.load(f)
|
|
802
|
+
for fid, fmeta in data.items():
|
|
803
|
+
for imp in fmeta.get("imports", []):
|
|
804
|
+
all_imports.add(imp)
|
|
805
|
+
except Exception:
|
|
806
|
+
pass
|
|
807
|
+
|
|
808
|
+
# 3. Also scan the DB for CALLS edges that reference unknown targets
|
|
809
|
+
# (lightweight — just check which classes were resolved vs not)
|
|
810
|
+
|
|
811
|
+
# 4. Filter: skip standard library / well-known third-party packages
|
|
812
|
+
_SKIP_PREFIXES = (
|
|
813
|
+
"java.", "javax.", "jakarta.",
|
|
814
|
+
"org.apache.", "org.springframework.", "org.hibernate.",
|
|
815
|
+
"org.slf4j.", "org.junit.", "org.mockito.",
|
|
816
|
+
"com.google.", "com.fasterxml.", "com.sun.",
|
|
817
|
+
"io.micrometer.", "io.netty.", "io.lettuce.",
|
|
818
|
+
"lombok.", "reactor.", "rx.",
|
|
819
|
+
)
|
|
820
|
+
|
|
821
|
+
unresolved: dict[str, list[str]] = {}
|
|
822
|
+
for imp in all_imports:
|
|
823
|
+
if any(imp.startswith(prefix) for prefix in _SKIP_PREFIXES):
|
|
824
|
+
continue
|
|
825
|
+
# Check if this import's class exists in the index
|
|
826
|
+
simple_name = imp.rsplit(".", 1)[-1]
|
|
827
|
+
pkg = imp.rsplit(".", 1)[0] if "." in imp else ""
|
|
828
|
+
if imp in indexed_fqcns:
|
|
829
|
+
continue
|
|
830
|
+
if pkg in indexed_packages:
|
|
831
|
+
continue # same package, just not this specific class
|
|
832
|
+
# Group by top 3 package segments
|
|
833
|
+
parts = imp.split(".")
|
|
834
|
+
base_pkg = ".".join(parts[:min(3, len(parts))])
|
|
835
|
+
if base_pkg not in unresolved:
|
|
836
|
+
unresolved[base_pkg] = []
|
|
837
|
+
if len(unresolved[base_pkg]) < 5:
|
|
838
|
+
unresolved[base_pkg].append(imp)
|
|
839
|
+
return unresolved
|
|
@@ -59,12 +59,15 @@ def _git_available(path: str) -> bool:
|
|
|
59
59
|
def _resolve_repo_path(store, project: str | None, repo_path_provider) -> str:
|
|
60
60
|
"""Resolve the filesystem path for a given project_id, falling back to cwd."""
|
|
61
61
|
if project:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
62
|
+
try:
|
|
63
|
+
recs = store.query_records(
|
|
64
|
+
"MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
|
|
65
|
+
{"pid": project},
|
|
66
|
+
)
|
|
67
|
+
if recs and recs[0].get("path"):
|
|
68
|
+
return recs[0]["path"]
|
|
69
|
+
except Exception:
|
|
70
|
+
pass
|
|
68
71
|
return repo_path_provider()
|
|
69
72
|
|
|
70
73
|
|
|
@@ -305,6 +308,20 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
305
308
|
"RECOMMENDED: start watch mode during active development."
|
|
306
309
|
)
|
|
307
310
|
|
|
311
|
+
# Detect unresolved imports → hint about unindexed sibling projects
|
|
312
|
+
unresolved_imports: dict[str, list[str]] = {}
|
|
313
|
+
try:
|
|
314
|
+
from codespine.indexer.engine import JavaIndexer as _JI
|
|
315
|
+
unresolved_imports = _JI.detect_unresolved_imports(store)
|
|
316
|
+
if unresolved_imports:
|
|
317
|
+
pkgs = list(unresolved_imports.keys())[:5]
|
|
318
|
+
notes["unresolved_imports"] = (
|
|
319
|
+
f"Imports from unindexed packages detected: {', '.join(pkgs)}. "
|
|
320
|
+
"Consider indexing these projects for complete cross-project tracing."
|
|
321
|
+
)
|
|
322
|
+
except Exception:
|
|
323
|
+
pass
|
|
324
|
+
|
|
308
325
|
return {
|
|
309
326
|
"available": True,
|
|
310
327
|
"indexed_projects": projects,
|
|
@@ -333,6 +350,7 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
333
350
|
"get_overlay_status": True,
|
|
334
351
|
"promote_overlay": True,
|
|
335
352
|
"clear_overlay": True,
|
|
353
|
+
"force_reset_index": True,
|
|
336
354
|
},
|
|
337
355
|
"background_jobs": {
|
|
338
356
|
"watch_running": watch_running,
|
|
@@ -787,20 +805,36 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
787
805
|
Recent git commits for the project (or a specific file).
|
|
788
806
|
Returns available=false if the directory is not a git repository.
|
|
789
807
|
Use project=<project_id> to target a specific indexed module's repo.
|
|
808
|
+
TIP: Always pass project= to ensure the correct repo is used.
|
|
790
809
|
"""
|
|
791
810
|
repo = _resolve_repo_path(store, project, repo_path_provider)
|
|
811
|
+
if not os.path.isdir(repo):
|
|
812
|
+
return {
|
|
813
|
+
"available": False,
|
|
814
|
+
"note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
|
|
815
|
+
}
|
|
792
816
|
if not _git_available(repo):
|
|
793
|
-
return {
|
|
817
|
+
return {
|
|
818
|
+
"available": False,
|
|
819
|
+
"note": (
|
|
820
|
+
f"Not a git repository at {repo}. "
|
|
821
|
+
"Pass project=<project_id> so the tool resolves the correct repo root. "
|
|
822
|
+
"Use list_projects() to see available IDs."
|
|
823
|
+
),
|
|
824
|
+
}
|
|
794
825
|
cmd = ["git", "log", f"--max-count={limit}", "--oneline", "--no-decorate"]
|
|
795
826
|
if file_path:
|
|
796
827
|
cmd += ["--", file_path]
|
|
797
828
|
r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
|
|
798
829
|
if r.returncode != 0:
|
|
799
|
-
return {"available": False, "error": r.stderr.strip()}
|
|
830
|
+
return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
|
|
831
|
+
log_lines = r.stdout.strip().splitlines()
|
|
800
832
|
return {
|
|
801
833
|
"available": True,
|
|
802
834
|
"project": project or repo,
|
|
803
|
-
"
|
|
835
|
+
"repo_path": repo,
|
|
836
|
+
"log": log_lines,
|
|
837
|
+
"note": f"{len(log_lines)} commit(s)" + (" (no commits yet)" if not log_lines else ""),
|
|
804
838
|
}
|
|
805
839
|
|
|
806
840
|
@mcp.tool()
|
|
@@ -809,26 +843,42 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
809
843
|
Show git diff (working tree vs ref, or between two refs separated by '...').
|
|
810
844
|
Output is truncated to 200 lines.
|
|
811
845
|
Returns available=false if the directory is not a git repository.
|
|
846
|
+
TIP: Always pass project= to ensure the correct repo is used.
|
|
812
847
|
"""
|
|
813
848
|
repo = _resolve_repo_path(store, project, repo_path_provider)
|
|
849
|
+
if not os.path.isdir(repo):
|
|
850
|
+
return {
|
|
851
|
+
"available": False,
|
|
852
|
+
"note": f"Path does not exist: {repo}. Pass project=<project_id> to resolve the repo from the index.",
|
|
853
|
+
}
|
|
814
854
|
if not _git_available(repo):
|
|
815
|
-
return {
|
|
855
|
+
return {
|
|
856
|
+
"available": False,
|
|
857
|
+
"note": (
|
|
858
|
+
f"Not a git repository at {repo}. "
|
|
859
|
+
"Pass project=<project_id> so the tool resolves the correct repo root. "
|
|
860
|
+
"Use list_projects() to see available IDs."
|
|
861
|
+
),
|
|
862
|
+
}
|
|
816
863
|
cmd = ["git", "diff", ref]
|
|
817
864
|
if file_path:
|
|
818
865
|
cmd += ["--", file_path]
|
|
819
866
|
r = subprocess.run(cmd, cwd=repo, capture_output=True, text=True, timeout=30)
|
|
820
867
|
if r.returncode != 0:
|
|
821
|
-
return {"available": False, "error": r.stderr.strip()}
|
|
868
|
+
return {"available": False, "error": r.stderr.strip(), "repo_path": repo}
|
|
822
869
|
lines = r.stdout.splitlines()
|
|
823
870
|
truncated = False
|
|
824
871
|
if len(lines) > 200:
|
|
825
872
|
lines = lines[:200]
|
|
826
873
|
truncated = True
|
|
874
|
+
diff_text = "\n".join(lines)
|
|
827
875
|
return {
|
|
828
876
|
"available": True,
|
|
829
877
|
"project": project or repo,
|
|
830
|
-
"
|
|
878
|
+
"repo_path": repo,
|
|
879
|
+
"diff": diff_text,
|
|
831
880
|
"truncated": truncated,
|
|
881
|
+
"note": f"{len(lines)} line(s)" + (" — no changes" if not diff_text.strip() else ""),
|
|
832
882
|
}
|
|
833
883
|
|
|
834
884
|
@mcp.tool()
|
|
@@ -1240,6 +1290,38 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1240
1290
|
),
|
|
1241
1291
|
}
|
|
1242
1292
|
|
|
1293
|
+
@mcp.tool()
|
|
1294
|
+
def force_reset_index():
|
|
1295
|
+
"""
|
|
1296
|
+
Emergency reset: delete ALL CodeSpine data files without touching the
|
|
1297
|
+
DB engine.
|
|
1298
|
+
|
|
1299
|
+
Use this when the buffer pool is exhausted and normal reset/clear
|
|
1300
|
+
commands also fail with OOM errors. This bypasses Kuzu entirely by
|
|
1301
|
+
removing all data files from disk.
|
|
1302
|
+
|
|
1303
|
+
After calling this, restart the MCP server and re-index all projects
|
|
1304
|
+
with analyse_project().
|
|
1305
|
+
|
|
1306
|
+
This is the nuclear option — only use when reset_project() and
|
|
1307
|
+
reset_index() fail with buffer pool errors.
|
|
1308
|
+
"""
|
|
1309
|
+
from codespine.db.store import GraphStore as _GS
|
|
1310
|
+
|
|
1311
|
+
removed = _GS.force_delete_all_data()
|
|
1312
|
+
return {
|
|
1313
|
+
"available": True,
|
|
1314
|
+
"removed_paths": removed,
|
|
1315
|
+
"removed_count": len(removed),
|
|
1316
|
+
"note": (
|
|
1317
|
+
f"Force-reset complete. {len(removed)} path(s) removed. "
|
|
1318
|
+
"Restart the MCP server (codespine stop && codespine start) "
|
|
1319
|
+
"and re-index projects with analyse_project()."
|
|
1320
|
+
if removed else
|
|
1321
|
+
"Nothing to remove — already clean."
|
|
1322
|
+
),
|
|
1323
|
+
}
|
|
1324
|
+
|
|
1243
1325
|
# ------------------------------------------------------------------
|
|
1244
1326
|
# Neighborhood exploration
|
|
1245
1327
|
# ------------------------------------------------------------------
|
|
@@ -1395,6 +1477,10 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1395
1477
|
Use this after editing a file to immediately refresh the graph without
|
|
1396
1478
|
waiting for watch mode or running a full analysis.
|
|
1397
1479
|
|
|
1480
|
+
The file is parsed and its symbols are stored in the overlay (just like
|
|
1481
|
+
watch mode), so the updated data is immediately visible in search and
|
|
1482
|
+
find_symbol results.
|
|
1483
|
+
|
|
1398
1484
|
Parameters:
|
|
1399
1485
|
file_path – Absolute path to the .java file.
|
|
1400
1486
|
project – Optional project_id. If omitted, the tool infers the
|
|
@@ -1408,9 +1494,12 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1408
1494
|
|
|
1409
1495
|
# Resolve project from indexed projects if not given
|
|
1410
1496
|
if not project:
|
|
1411
|
-
|
|
1412
|
-
|
|
1413
|
-
|
|
1497
|
+
try:
|
|
1498
|
+
projects = store.query_records(
|
|
1499
|
+
"MATCH (p:Project) RETURN p.id as id, p.path as path"
|
|
1500
|
+
)
|
|
1501
|
+
except Exception as exc:
|
|
1502
|
+
return {"available": False, "note": f"DB read failed: {exc}"}
|
|
1414
1503
|
for p in projects:
|
|
1415
1504
|
if abs_fp.startswith(p["path"] + _os.sep):
|
|
1416
1505
|
project = p["id"]
|
|
@@ -1425,58 +1514,64 @@ def build_mcp_server(store, repo_path_provider):
|
|
|
1425
1514
|
}
|
|
1426
1515
|
|
|
1427
1516
|
# Find the project path to use as root for indexing
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1517
|
+
try:
|
|
1518
|
+
proj_recs = store.query_records(
|
|
1519
|
+
"MATCH (p:Project) WHERE p.id = $pid RETURN p.path as path LIMIT 1",
|
|
1520
|
+
{"pid": project},
|
|
1521
|
+
)
|
|
1522
|
+
except Exception as exc:
|
|
1523
|
+
return {"available": False, "note": f"DB read failed: {exc}"}
|
|
1432
1524
|
if not proj_recs:
|
|
1433
1525
|
return {"available": False, "note": f"Project '{project}' not found in index."}
|
|
1434
1526
|
|
|
1435
1527
|
proj_path = proj_recs[0]["path"]
|
|
1436
1528
|
|
|
1437
|
-
#
|
|
1438
|
-
#
|
|
1439
|
-
|
|
1440
|
-
|
|
1441
|
-
sys.executable, "-m", "codespine.cli",
|
|
1442
|
-
"analyse", proj_path,
|
|
1443
|
-
"--incremental", "--no-embed", "--allow-running",
|
|
1444
|
-
]
|
|
1529
|
+
# Use overlay-based single-file update (same mechanism as watch mode).
|
|
1530
|
+
# This avoids spawning a subprocess and contending with the write DB.
|
|
1531
|
+
from codespine.watch.watcher import _update_overlay_for_files
|
|
1532
|
+
|
|
1445
1533
|
t0 = time.time()
|
|
1446
1534
|
try:
|
|
1447
|
-
|
|
1448
|
-
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
1449
|
-
)
|
|
1450
|
-
stdout, stderr = proc.communicate(timeout=30)
|
|
1535
|
+
result = _update_overlay_for_files(store, proj_path, project, [abs_fp])
|
|
1451
1536
|
elapsed = round(time.time() - t0, 2)
|
|
1452
|
-
except subprocess.TimeoutExpired:
|
|
1453
|
-
proc.kill()
|
|
1454
|
-
proc.communicate() # reap zombie
|
|
1455
|
-
elapsed = round(time.time() - t0, 2)
|
|
1456
|
-
return {
|
|
1457
|
-
"available": False,
|
|
1458
|
-
"note": f"Re-index timed out after {elapsed}s. The project may be too large for single-file re-index. Use analyse_project() instead.",
|
|
1459
|
-
}
|
|
1460
1537
|
except Exception as exc:
|
|
1461
1538
|
elapsed = round(time.time() - t0, 2)
|
|
1462
|
-
|
|
1463
|
-
|
|
1464
|
-
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
|
|
1468
|
-
|
|
1469
|
-
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
|
|
1539
|
+
_LOGGER.warning("reindex_file failed: %s", exc)
|
|
1540
|
+
# Fall back to subprocess approach
|
|
1541
|
+
cmd = [
|
|
1542
|
+
sys.executable, "-m", "codespine.cli",
|
|
1543
|
+
"analyse", proj_path,
|
|
1544
|
+
"--incremental", "--no-embed", "--allow-running",
|
|
1545
|
+
]
|
|
1546
|
+
try:
|
|
1547
|
+
proc = subprocess.Popen(
|
|
1548
|
+
cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True,
|
|
1549
|
+
)
|
|
1550
|
+
stdout, stderr = proc.communicate(timeout=60)
|
|
1551
|
+
elapsed = round(time.time() - t0, 2)
|
|
1552
|
+
if proc.returncode != 0:
|
|
1553
|
+
return {
|
|
1554
|
+
"available": False,
|
|
1555
|
+
"note": f"Re-index failed (code {proc.returncode})",
|
|
1556
|
+
"error": (stderr or stdout or "").strip()[:500],
|
|
1557
|
+
}
|
|
1558
|
+
return {
|
|
1559
|
+
"available": True,
|
|
1560
|
+
"file": abs_fp,
|
|
1561
|
+
"project": project,
|
|
1562
|
+
"elapsed_s": elapsed,
|
|
1563
|
+
"note": f"Overlay update failed; fell back to full incremental re-index in {elapsed}s.",
|
|
1564
|
+
}
|
|
1565
|
+
except Exception as fallback_exc:
|
|
1566
|
+
return {"available": False, "note": f"Re-index error: overlay={exc}, subprocess={fallback_exc}"}
|
|
1473
1567
|
|
|
1474
1568
|
return {
|
|
1475
1569
|
"available": True,
|
|
1476
1570
|
"file": abs_fp,
|
|
1477
1571
|
"project": project,
|
|
1478
1572
|
"elapsed_s": elapsed,
|
|
1479
|
-
"
|
|
1573
|
+
"changed": result.get("changed", 0),
|
|
1574
|
+
"note": f"Re-indexed {abs_fp} via overlay in {elapsed}s.",
|
|
1480
1575
|
}
|
|
1481
1576
|
|
|
1482
1577
|
# ------------------------------------------------------------------
|
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import logging
|
|
3
4
|
import os
|
|
4
5
|
import threading
|
|
5
6
|
import time
|
|
7
|
+
import traceback
|
|
6
8
|
|
|
7
9
|
from codespine.analysis.community import detect_communities
|
|
8
10
|
from codespine.analysis.coupling import compute_coupling
|
|
@@ -13,6 +15,8 @@ from codespine.indexer.engine import JavaIndexer
|
|
|
13
15
|
from codespine.overlay.git_state import current_head, git_repo_root
|
|
14
16
|
from codespine.overlay.store import OverlayStore, build_overlay_file_entry
|
|
15
17
|
|
|
18
|
+
LOGGER = logging.getLogger(__name__)
|
|
19
|
+
|
|
16
20
|
|
|
17
21
|
def _project_modules(root_path: str) -> tuple[dict[str, str], list[str], bool]:
|
|
18
22
|
abs_path = os.path.abspath(root_path)
|
|
@@ -39,15 +43,22 @@ def get_overlay_status(store, project: str | None = None) -> list[dict]:
|
|
|
39
43
|
statuses = overlay_store.status(project)
|
|
40
44
|
out: list[dict] = []
|
|
41
45
|
for item in statuses:
|
|
42
|
-
|
|
46
|
+
try:
|
|
47
|
+
metadata = store.get_project_metadata(item["project_id"]) or {}
|
|
48
|
+
except Exception:
|
|
49
|
+
metadata = {}
|
|
50
|
+
# The overlay JSON on disk is the source of truth; the DB flag
|
|
51
|
+
# may be stale if the watch process couldn't write to the DB.
|
|
52
|
+
overlay_present = bool(item.get("overlay_present"))
|
|
53
|
+
db_dirty = bool(metadata.get("overlay_dirty", False))
|
|
43
54
|
out.append(
|
|
44
55
|
{
|
|
45
56
|
**item,
|
|
46
57
|
"indexed_commit": metadata.get("indexed_commit", ""),
|
|
47
|
-
"overlay_dirty":
|
|
58
|
+
"overlay_dirty": overlay_present or db_dirty,
|
|
48
59
|
"indexed_at": metadata.get("indexed_at", ""),
|
|
49
60
|
"promotion_pending": bool(
|
|
50
|
-
|
|
61
|
+
overlay_present
|
|
51
62
|
and item.get("current_head")
|
|
52
63
|
and metadata.get("indexed_commit")
|
|
53
64
|
and item.get("current_head") != metadata.get("indexed_commit")
|
|
@@ -107,68 +118,98 @@ def promote_overlay(store, project: str | None = None, require_head_change: bool
|
|
|
107
118
|
def _update_overlay_for_files(store, project_path: str, project_id: str, file_paths: list[str]) -> dict:
|
|
108
119
|
overlay_store: OverlayStore = store.overlay_store
|
|
109
120
|
indexer = JavaIndexer(store)
|
|
110
|
-
metadata = store.get_project_metadata(project_id) or {}
|
|
111
121
|
repo_root = git_repo_root(project_path)
|
|
112
|
-
indexed_commit = str(metadata.get("indexed_commit") or "")
|
|
113
122
|
head = current_head(project_path)
|
|
114
|
-
existing_doc = overlay_store.load_project(project_id)
|
|
115
123
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
124
|
+
# DB reads can fail if the write DB is busy; fall back to empty catalogs
|
|
125
|
+
# so the overlay still captures the file changes from tree-sitter alone.
|
|
126
|
+
try:
|
|
127
|
+
metadata = store.get_project_metadata(project_id) or {}
|
|
128
|
+
except Exception as exc:
|
|
129
|
+
LOGGER.warning("watch: DB read failed for project metadata (%s), using empty", exc)
|
|
130
|
+
metadata = {}
|
|
131
|
+
indexed_commit = str(metadata.get("indexed_commit") or "")
|
|
132
|
+
|
|
133
|
+
try:
|
|
134
|
+
base_method_catalog = indexer._existing_method_catalog(project_id)
|
|
135
|
+
base_class_catalog = indexer._existing_class_catalog(project_id)
|
|
136
|
+
base_class_ids = indexer._existing_class_ids_by_fqcn(project_id)
|
|
137
|
+
base_class_methods = indexer._existing_class_methods(project_id)
|
|
138
|
+
except Exception as exc:
|
|
139
|
+
LOGGER.warning("watch: DB read failed for catalogs (%s), using empty", exc)
|
|
140
|
+
base_method_catalog = {}
|
|
141
|
+
base_class_catalog = {}
|
|
142
|
+
base_class_ids = {}
|
|
143
|
+
base_class_methods = {}
|
|
121
144
|
|
|
122
|
-
|
|
145
|
+
try:
|
|
146
|
+
embed = store.project_has_embeddings(project_id)
|
|
147
|
+
except Exception:
|
|
148
|
+
embed = False
|
|
149
|
+
|
|
150
|
+
existing_doc = overlay_store.load_project(project_id)
|
|
151
|
+
|
|
152
|
+
changed = deleted = errors = 0
|
|
123
153
|
for file_path in sorted(set(os.path.abspath(p) for p in file_paths)):
|
|
124
154
|
if not file_path.endswith(".java"):
|
|
125
155
|
continue
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
156
|
+
try:
|
|
157
|
+
if os.path.exists(file_path):
|
|
158
|
+
with open(file_path, "rb") as fh:
|
|
159
|
+
source = fh.read()
|
|
160
|
+
entry = build_overlay_file_entry(
|
|
161
|
+
store=store,
|
|
162
|
+
project_id=project_id,
|
|
163
|
+
project_path=project_path,
|
|
164
|
+
file_path=file_path,
|
|
165
|
+
source=source,
|
|
166
|
+
embed=embed,
|
|
167
|
+
base_method_catalog=base_method_catalog,
|
|
168
|
+
base_class_catalog=base_class_catalog,
|
|
169
|
+
base_class_ids_by_fqcn=base_class_ids,
|
|
170
|
+
base_class_methods=base_class_methods,
|
|
171
|
+
existing_overlay_doc=existing_doc,
|
|
172
|
+
)
|
|
173
|
+
overlay_store.upsert_file(
|
|
174
|
+
project_id=project_id,
|
|
175
|
+
project_path=project_path,
|
|
176
|
+
repo_root=repo_root,
|
|
177
|
+
base_commit=indexed_commit,
|
|
178
|
+
current_head=head,
|
|
179
|
+
file_path=file_path,
|
|
180
|
+
entry=entry,
|
|
181
|
+
)
|
|
182
|
+
existing_doc = overlay_store.load_project(project_id)
|
|
183
|
+
changed += 1
|
|
184
|
+
else:
|
|
185
|
+
overlay_store.mark_deleted(
|
|
186
|
+
project_id=project_id,
|
|
187
|
+
project_path=project_path,
|
|
188
|
+
repo_root=repo_root,
|
|
189
|
+
base_commit=indexed_commit,
|
|
190
|
+
current_head=head,
|
|
191
|
+
file_path=file_path,
|
|
192
|
+
)
|
|
193
|
+
existing_doc = overlay_store.load_project(project_id)
|
|
194
|
+
deleted += 1
|
|
195
|
+
except Exception as exc:
|
|
196
|
+
LOGGER.warning("watch: failed to process %s: %s", file_path, exc)
|
|
197
|
+
errors += 1
|
|
198
|
+
|
|
164
199
|
if changed or deleted:
|
|
165
|
-
if
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
200
|
+
# Try to mark dirty in the DB; if the DB is busy (write contention),
|
|
201
|
+
# the overlay JSON on disk is still correct and will be picked up on
|
|
202
|
+
# next read. Don't let a DB write failure discard overlay work.
|
|
203
|
+
try:
|
|
204
|
+
if metadata:
|
|
205
|
+
store.set_project_overlay_dirty(project_id, True)
|
|
206
|
+
else:
|
|
207
|
+
store.upsert_project(project_id, project_path)
|
|
208
|
+
store.set_project_indexed_commit(project_id, indexed_commit)
|
|
209
|
+
store.set_project_overlay_dirty(project_id, True)
|
|
210
|
+
except Exception as exc:
|
|
211
|
+
LOGGER.warning("watch: DB write failed for overlay_dirty flag (%s); overlay is still on disk", exc)
|
|
212
|
+
return {"project_id": project_id, "changed": changed, "deleted": deleted, "errors": errors}
|
|
172
213
|
|
|
173
214
|
|
|
174
215
|
def run_watch_mode(
|
|
@@ -227,11 +268,17 @@ def run_watch_mode(
|
|
|
227
268
|
for module_path, files in sorted(grouped.items()):
|
|
228
269
|
project_id = module_map.get(module_path, os.path.basename(module_path))
|
|
229
270
|
start = time.time()
|
|
230
|
-
|
|
271
|
+
try:
|
|
272
|
+
result = _update_overlay_for_files(store, module_path, project_id, files)
|
|
273
|
+
except Exception as exc:
|
|
274
|
+
LOGGER.error("watch: overlay update failed for %s: %s\n%s", project_id, exc, traceback.format_exc())
|
|
275
|
+
print(f"[{time.strftime('%H:%M:%S')}] {project_id}: ERROR updating overlay — {exc}")
|
|
276
|
+
continue
|
|
231
277
|
elapsed = time.time() - start
|
|
278
|
+
err_note = f", {result.get('errors', 0)} errors" if result.get("errors") else ""
|
|
232
279
|
print(
|
|
233
280
|
f"[{time.strftime('%H:%M:%S')}] {project_id}: overlay updated "
|
|
234
|
-
f"({result['changed']} changed, {result['deleted']} deleted) in {elapsed:.1f}s"
|
|
281
|
+
f"({result['changed']} changed, {result['deleted']} deleted{err_note}) in {elapsed:.1f}s"
|
|
235
282
|
)
|
|
236
283
|
|
|
237
284
|
if promote_on_commit:
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|