codespine 1.0.4__tar.gz → 1.0.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {codespine-1.0.4 → codespine-1.0.7}/PKG-INFO +1 -1
  2. {codespine-1.0.4 → codespine-1.0.7}/codespine/__init__.py +1 -1
  3. {codespine-1.0.4 → codespine-1.0.7}/codespine/cli.py +146 -21
  4. codespine-1.0.7/codespine/db/_cypher_compat.py +523 -0
  5. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/call_resolver.py +11 -0
  6. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/engine.py +195 -26
  7. {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/store.py +9 -0
  8. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/PKG-INFO +1 -1
  9. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/SOURCES.txt +1 -0
  10. {codespine-1.0.4 → codespine-1.0.7}/pyproject.toml +1 -1
  11. {codespine-1.0.4 → codespine-1.0.7}/tests/test_cypher_compat.py +213 -0
  12. codespine-1.0.7/tests/test_parse_resilience.py +194 -0
  13. codespine-1.0.4/codespine/db/_cypher_compat.py +0 -309
  14. {codespine-1.0.4 → codespine-1.0.7}/LICENSE +0 -0
  15. {codespine-1.0.4 → codespine-1.0.7}/README.md +0 -0
  16. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/__init__.py +0 -0
  17. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/community.py +0 -0
  18. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/context.py +0 -0
  19. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/coupling.py +0 -0
  20. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/crossmodule.py +0 -0
  21. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/deadcode.py +0 -0
  22. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/flow.py +0 -0
  23. {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/impact.py +0 -0
  24. {codespine-1.0.4 → codespine-1.0.7}/codespine/cache/__init__.py +0 -0
  25. {codespine-1.0.4 → codespine-1.0.7}/codespine/cache/result_cache.py +0 -0
  26. {codespine-1.0.4 → codespine-1.0.7}/codespine/config.py +0 -0
  27. {codespine-1.0.4 → codespine-1.0.7}/codespine/db/__init__.py +0 -0
  28. {codespine-1.0.4 → codespine-1.0.7}/codespine/db/duckdb_store.py +0 -0
  29. {codespine-1.0.4 → codespine-1.0.7}/codespine/db/schema.py +0 -0
  30. {codespine-1.0.4 → codespine-1.0.7}/codespine/db/store.py +0 -0
  31. {codespine-1.0.4 → codespine-1.0.7}/codespine/diff/__init__.py +0 -0
  32. {codespine-1.0.4 → codespine-1.0.7}/codespine/diff/branch_diff.py +0 -0
  33. {codespine-1.0.4 → codespine-1.0.7}/codespine/guide.py +0 -0
  34. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/__init__.py +0 -0
  35. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/di_resolver.py +0 -0
  36. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/java_parser.py +0 -0
  37. {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/symbol_builder.py +0 -0
  38. {codespine-1.0.4 → codespine-1.0.7}/codespine/mcp/__init__.py +0 -0
  39. {codespine-1.0.4 → codespine-1.0.7}/codespine/mcp/server.py +0 -0
  40. {codespine-1.0.4 → codespine-1.0.7}/codespine/noise/__init__.py +0 -0
  41. {codespine-1.0.4 → codespine-1.0.7}/codespine/noise/blocklist.py +0 -0
  42. {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/__init__.py +0 -0
  43. {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/git_state.py +0 -0
  44. {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/merge.py +0 -0
  45. {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/store.py +0 -0
  46. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/__init__.py +0 -0
  47. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/bm25.py +0 -0
  48. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/fuzzy.py +0 -0
  49. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/hybrid.py +0 -0
  50. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/rrf.py +0 -0
  51. {codespine-1.0.4 → codespine-1.0.7}/codespine/search/vector.py +0 -0
  52. {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/__init__.py +0 -0
  53. {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/router.py +0 -0
  54. {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/__init__.py +0 -0
  55. {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/git_hook.py +0 -0
  56. {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/watcher.py +0 -0
  57. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/dependency_links.txt +0 -0
  58. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/entry_points.txt +0 -0
  59. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/requires.txt +0 -0
  60. {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/top_level.txt +0 -0
  61. {codespine-1.0.4 → codespine-1.0.7}/gindex.py +0 -0
  62. {codespine-1.0.4 → codespine-1.0.7}/setup.cfg +0 -0
  63. {codespine-1.0.4 → codespine-1.0.7}/tests/test_branch_diff_normalize.py +0 -0
  64. {codespine-1.0.4 → codespine-1.0.7}/tests/test_call_resolver.py +0 -0
  65. {codespine-1.0.4 → codespine-1.0.7}/tests/test_community_detection.py +0 -0
  66. {codespine-1.0.4 → codespine-1.0.7}/tests/test_deadcode.py +0 -0
  67. {codespine-1.0.4 → codespine-1.0.7}/tests/test_duckdb_store.py +0 -0
  68. {codespine-1.0.4 → codespine-1.0.7}/tests/test_index_and_hybrid.py +0 -0
  69. {codespine-1.0.4 → codespine-1.0.7}/tests/test_java_parser.py +0 -0
  70. {codespine-1.0.4 → codespine-1.0.7}/tests/test_multimodule_index.py +0 -0
  71. {codespine-1.0.4 → codespine-1.0.7}/tests/test_overlay.py +0 -0
  72. {codespine-1.0.4 → codespine-1.0.7}/tests/test_result_cache.py +0 -0
  73. {codespine-1.0.4 → codespine-1.0.7}/tests/test_search_ranking.py +0 -0
  74. {codespine-1.0.4 → codespine-1.0.7}/tests/test_sharding.py +0 -0
  75. {codespine-1.0.4 → codespine-1.0.7}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.4
3
+ Version: 1.0.7
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.4"
4
+ __version__ = "1.0.7"
@@ -137,8 +137,12 @@ def _index_shard_group(
137
137
 
138
138
  for mod_path, project_id in modules:
139
139
  # Per-module progress state (local — no shared mutation).
140
- parse_state: dict = {"shown": False, "indexed": 0, "total": 0,
141
- "last_ts": 0.0, "printed_zero": False}
140
+ parse_state: dict = {
141
+ "shown": False, "indexed": 0, "total": 0,
142
+ "last_ts": 0.0, "printed_zero": False,
143
+ "current_file": "", "elapsed": 0.0,
144
+ "last_done": -1, "frozen_since": 0.0, "stall_warned": False,
145
+ }
142
146
  call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
143
147
  "started_at": 0.0}
144
148
 
@@ -160,11 +164,61 @@ def _index_shard_group(
160
164
  _phase(f"{prefix}Parsing code...", "0/0")
161
165
  parse_state["printed_zero"] = True
162
166
  return
167
+ if event == "parse_heartbeat":
168
+ # Fires every 2s from a daemon thread — keeps spinner alive
169
+ # even when all worker threads are busy or one is hanging.
170
+ done = int(payload.get("done", 0))
171
+ total = int(payload.get("total", 0))
172
+ current = str(payload.get("current_file", ""))
173
+ elapsed_s = float(payload.get("elapsed", 0.0))
174
+ parse_state["indexed"] = done
175
+ parse_state["total"] = total
176
+ parse_state["current_file"] = current
177
+ parse_state["elapsed"] = elapsed_s
178
+ if total > 0 and not parallel:
179
+ basename = os.path.basename(current) if current else ""
180
+ click.echo(
181
+ f"\r{_spinner_char()} {prefix}Parsing code... "
182
+ f"{_bar(done, total)} {done}/{total} "
183
+ f"{basename[:38]:<38} {elapsed_s:.0f}s ",
184
+ nl=False,
185
+ )
186
+ parse_state["shown"] = True
187
+ parse_state["last_ts"] = now
188
+
189
+ # ── Stall detection ──────────────────────────────────────
190
+ if done == parse_state["last_done"]:
191
+ if parse_state["frozen_since"] == 0.0:
192
+ parse_state["frozen_since"] = now
193
+ stalled_for = now - parse_state["frozen_since"]
194
+ if stalled_for >= 15.0 and not parse_state["stall_warned"]:
195
+ parse_state["stall_warned"] = True
196
+ basename = os.path.basename(current) if current else "unknown"
197
+ with output_lock:
198
+ click.echo() # break out of \r line
199
+ click.secho(
200
+ f" ⚠ Parsing stalled on {basename} for "
201
+ f"{stalled_for:.0f}s — file may be pathological.\n"
202
+ f" Timeout at {os.environ.get('CODESPINE_PARSE_TIMEOUT_SECS', '60')}s. "
203
+ f"To skip large files: "
204
+ f"export CODESPINE_MAX_FILE_BYTES=2097152",
205
+ fg="yellow",
206
+ )
207
+ else:
208
+ parse_state["last_done"] = done
209
+ parse_state["frozen_since"] = 0.0
210
+ parse_state["stall_warned"] = False
211
+ return
163
212
  if event == "parse_progress":
164
213
  indexed = int(payload.get("indexed", 0))
165
214
  total = int(payload.get("total", 0))
166
215
  parse_state["indexed"] = indexed
167
216
  parse_state["total"] = total
217
+ # Reset stall tracker on actual progress
218
+ if indexed != parse_state["last_done"]:
219
+ parse_state["last_done"] = indexed
220
+ parse_state["frozen_since"] = 0.0
221
+ parse_state["stall_warned"] = False
168
222
  if total == 0:
169
223
  return
170
224
  if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
@@ -192,6 +246,21 @@ def _index_shard_group(
192
246
  with output_lock:
193
247
  _phase(f"{prefix}Tracing calls...", "starting...")
194
248
  return
249
+ if event == "resolve_calls_heartbeat":
250
+ # Fires every 2 s from a daemon thread so the spinner stays
251
+ # alive even when the resolver produces no new edges.
252
+ scanned = int(payload.get("scanned", 0))
253
+ edges = int(payload.get("edges", 0))
254
+ elapsed_s = float(payload.get("elapsed", 0.0))
255
+ if not parallel:
256
+ click.echo(
257
+ f"\r{_spinner_char()} {prefix}Tracing calls... "
258
+ f"{edges:>6} resolved / {scanned} scanned {elapsed_s:.1f}s ",
259
+ nl=False,
260
+ )
261
+ call_state["shown"] = True
262
+ call_state["last_ts"] = now
263
+ return
195
264
  if event == "resolve_calls_progress":
196
265
  call_state["count"] = int(payload.get("calls_resolved", 0))
197
266
  if (now - call_state["last_ts"]) >= 0.25:
@@ -345,6 +414,37 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
345
414
  # For single-project analysis this is transparent — shard() always
346
415
  # returns a GraphStore pointing to the correct shard path.
347
416
  sg = ShardedGraphStore(read_only=False)
417
+
418
+ # ── SIGINT handler: flush partial index on Ctrl+C ────────────────────
419
+ # The handler captures `sg` by closure. On interrupt it snapshots all
420
+ # open shards so `codespine stats` and MCP see the partial result, then
421
+ # calls os._exit(130) to bypass Python cleanup (safe for CLI process).
422
+ # A second Ctrl+C hard-exits immediately.
423
+ _sigint_pressed: list[bool] = [False]
424
+ _old_sigint_handler = signal.getsignal(signal.SIGINT)
425
+
426
+ def _sigint_flush(signum: int, frame: object) -> None: # noqa: ARG001
427
+ if _sigint_pressed[0]:
428
+ os._exit(130)
429
+ _sigint_pressed[0] = True
430
+ # Restore default handler so a second Ctrl+C exits immediately.
431
+ signal.signal(signal.SIGINT, signal.default_int_handler)
432
+ click.secho(
433
+ "\n\n⚠ Interrupted — flushing partial index to read replica…",
434
+ fg="yellow",
435
+ )
436
+ try:
437
+ sg.snapshot_all(background=False)
438
+ click.secho(
439
+ "✓ Partial index saved. Run 'codespine stats' to see what was indexed.",
440
+ fg="yellow",
441
+ )
442
+ except Exception: # noqa: BLE001
443
+ pass
444
+ os._exit(130)
445
+
446
+ signal.signal(signal.SIGINT, _sigint_flush)
447
+
348
448
  # The indexer is initialised per-module below with the right shard store.
349
449
  # We keep a single ShardedGraphStore to fan-out cross-module linking later.
350
450
 
@@ -537,21 +637,28 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
537
637
 
538
638
  _phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
539
639
 
540
- vector_count = root_shard_store.query_records(
640
+ # Summary queries are best-effort: a translator miss or a transient
641
+ # DB error must never throw away a successful index.
642
+ def _safe_count(query: str) -> int:
643
+ try:
644
+ rows = root_shard_store.query_records(query)
645
+ return int(rows[0]["count"]) if rows else 0
646
+ except Exception as exc: # noqa: BLE001 - summary stats are non-critical
647
+ click.secho(f" (summary stat unavailable: {exc})", fg="yellow")
648
+ return 0
649
+
650
+ embeddings_generated = last_result.embeddings_generated if last_result else 0
651
+ vectors_stored = _safe_count(
541
652
  """
542
653
  MATCH (s:Symbol)
543
654
  WHERE s.embedding IS NOT NULL
544
655
  RETURN count(s) as count
545
656
  """
546
- )
547
- embeddings_generated = last_result.embeddings_generated if last_result else 0
548
- vectors_stored = int(vector_count[0]["count"]) if vector_count else embeddings_generated
657
+ ) or embeddings_generated
549
658
  _phase("Generating embeddings...", f"{vectors_stored} vectors stored")
550
659
 
551
- symbol_count = root_shard_store.query_records("MATCH (s:Symbol) RETURN count(s) as count")
552
- edge_count = root_shard_store.query_records("MATCH ()-[r]->() RETURN count(r) as count")
553
- symbols = int(symbol_count[0]["count"]) if symbol_count else 0
554
- edges = int(edge_count[0]["count"]) if edge_count else 0
660
+ symbols = _safe_count("MATCH (s:Symbol) RETURN count(s) as count")
661
+ edges = _safe_count("MATCH ()-[r]->() RETURN count(r) as count")
555
662
  elapsed = time.perf_counter() - started
556
663
 
557
664
  if not embed:
@@ -587,6 +694,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
587
694
  sg.snapshot_all(background=False)
588
695
  _finish_phase(snap_label, "MCP will reload automatically")
589
696
 
697
+ # Restore original SIGINT handler now that we've finished cleanly.
698
+ signal.signal(signal.SIGINT, _old_sigint_handler)
699
+
590
700
 
591
701
  @main.command()
592
702
  @click.argument("query")
@@ -734,15 +844,27 @@ def stats(as_json: bool, show_shards: bool) -> None:
734
844
  click.secho("No projects indexed yet. Run 'codespine analyse <path>'.", fg="yellow")
735
845
  return
736
846
 
847
+ def _stat_count(store, query: str, params: dict) -> int:
848
+ """Run a stats count query — returns 0 on any failure."""
849
+ try:
850
+ rows = store.query_records(query, params)
851
+ return int(rows[0]["n"]) if rows else 0
852
+ except Exception as exc: # noqa: BLE001
853
+ click.secho(f" (stat unavailable: {exc})", fg="yellow")
854
+ return 0
855
+
737
856
  rows = []
738
857
  for p in all_projects_meta:
739
858
  pid = p["id"]
740
859
  # Route each query to the project's owning shard.
741
860
  ps = _project_store(pid)
742
- files = ps.query_records(
743
- "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n", {"pid": pid}
861
+ n_files = _stat_count(
862
+ ps,
863
+ "MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n",
864
+ {"pid": pid},
744
865
  )
745
- classes = ps.query_records(
866
+ n_classes = _stat_count(
867
+ ps,
746
868
  """
747
869
  MATCH (f:File) WHERE f.project_id = $pid
748
870
  WITH f
@@ -751,7 +873,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
751
873
  """,
752
874
  {"pid": pid},
753
875
  )
754
- methods = ps.query_records(
876
+ n_methods = _stat_count(
877
+ ps,
755
878
  """
756
879
  MATCH (f:File) WHERE f.project_id = $pid
757
880
  WITH f
@@ -762,7 +885,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
762
885
  """,
763
886
  {"pid": pid},
764
887
  )
765
- calls = ps.query_records(
888
+ n_calls = _stat_count(
889
+ ps,
766
890
  """
767
891
  MATCH (f:File) WHERE f.project_id = $pid
768
892
  WITH f
@@ -773,7 +897,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
773
897
  """,
774
898
  {"pid": pid},
775
899
  )
776
- emb = ps.query_records(
900
+ n_emb = _stat_count(
901
+ ps,
777
902
  """
778
903
  MATCH (f:File) WHERE f.project_id = $pid
779
904
  WITH f
@@ -786,11 +911,11 @@ def stats(as_json: bool, show_shards: bool) -> None:
786
911
  "project": pid,
787
912
  "path": p["path"],
788
913
  "shard": sg.router.shard_for(pid),
789
- "files": files[0]["n"] if files else 0,
790
- "classes": classes[0]["n"] if classes else 0,
791
- "methods": methods[0]["n"] if methods else 0,
792
- "calls_out": calls[0]["n"] if calls else 0,
793
- "embeddings": emb[0]["n"] if emb else 0,
914
+ "files": n_files,
915
+ "classes": n_classes,
916
+ "methods": n_methods,
917
+ "calls_out": n_calls,
918
+ "embeddings": n_emb,
794
919
  })
795
920
 
796
921
  if as_json: