codespine 1.0.4__tar.gz → 1.0.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {codespine-1.0.4 → codespine-1.0.7}/PKG-INFO +1 -1
- {codespine-1.0.4 → codespine-1.0.7}/codespine/__init__.py +1 -1
- {codespine-1.0.4 → codespine-1.0.7}/codespine/cli.py +146 -21
- codespine-1.0.7/codespine/db/_cypher_compat.py +523 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/call_resolver.py +11 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/engine.py +195 -26
- {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/store.py +9 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/PKG-INFO +1 -1
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/SOURCES.txt +1 -0
- {codespine-1.0.4 → codespine-1.0.7}/pyproject.toml +1 -1
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_cypher_compat.py +213 -0
- codespine-1.0.7/tests/test_parse_resilience.py +194 -0
- codespine-1.0.4/codespine/db/_cypher_compat.py +0 -309
- {codespine-1.0.4 → codespine-1.0.7}/LICENSE +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/README.md +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/community.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/context.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/coupling.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/crossmodule.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/deadcode.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/flow.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/analysis/impact.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/cache/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/cache/result_cache.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/config.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/db/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/db/duckdb_store.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/db/schema.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/db/store.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/diff/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/diff/branch_diff.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/guide.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/di_resolver.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/java_parser.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/indexer/symbol_builder.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/mcp/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/mcp/server.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/noise/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/noise/blocklist.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/git_state.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/merge.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/overlay/store.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/bm25.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/fuzzy.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/hybrid.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/rrf.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/search/vector.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/sharding/router.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/__init__.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/git_hook.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine/watch/watcher.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/dependency_links.txt +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/entry_points.txt +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/requires.txt +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/codespine.egg-info/top_level.txt +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/gindex.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/setup.cfg +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_branch_diff_normalize.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_call_resolver.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_community_detection.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_deadcode.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_duckdb_store.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_index_and_hybrid.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_java_parser.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_multimodule_index.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_overlay.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_result_cache.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_search_ranking.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_sharding.py +0 -0
- {codespine-1.0.4 → codespine-1.0.7}/tests/test_store_recovery.py +0 -0
|
@@ -137,8 +137,12 @@ def _index_shard_group(
|
|
|
137
137
|
|
|
138
138
|
for mod_path, project_id in modules:
|
|
139
139
|
# Per-module progress state (local — no shared mutation).
|
|
140
|
-
parse_state: dict = {
|
|
141
|
-
|
|
140
|
+
parse_state: dict = {
|
|
141
|
+
"shown": False, "indexed": 0, "total": 0,
|
|
142
|
+
"last_ts": 0.0, "printed_zero": False,
|
|
143
|
+
"current_file": "", "elapsed": 0.0,
|
|
144
|
+
"last_done": -1, "frozen_since": 0.0, "stall_warned": False,
|
|
145
|
+
}
|
|
142
146
|
call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
|
|
143
147
|
"started_at": 0.0}
|
|
144
148
|
|
|
@@ -160,11 +164,61 @@ def _index_shard_group(
|
|
|
160
164
|
_phase(f"{prefix}Parsing code...", "0/0")
|
|
161
165
|
parse_state["printed_zero"] = True
|
|
162
166
|
return
|
|
167
|
+
if event == "parse_heartbeat":
|
|
168
|
+
# Fires every 2s from a daemon thread — keeps spinner alive
|
|
169
|
+
# even when all worker threads are busy or one is hanging.
|
|
170
|
+
done = int(payload.get("done", 0))
|
|
171
|
+
total = int(payload.get("total", 0))
|
|
172
|
+
current = str(payload.get("current_file", ""))
|
|
173
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
174
|
+
parse_state["indexed"] = done
|
|
175
|
+
parse_state["total"] = total
|
|
176
|
+
parse_state["current_file"] = current
|
|
177
|
+
parse_state["elapsed"] = elapsed_s
|
|
178
|
+
if total > 0 and not parallel:
|
|
179
|
+
basename = os.path.basename(current) if current else ""
|
|
180
|
+
click.echo(
|
|
181
|
+
f"\r{_spinner_char()} {prefix}Parsing code... "
|
|
182
|
+
f"{_bar(done, total)} {done}/{total} "
|
|
183
|
+
f"{basename[:38]:<38} {elapsed_s:.0f}s ",
|
|
184
|
+
nl=False,
|
|
185
|
+
)
|
|
186
|
+
parse_state["shown"] = True
|
|
187
|
+
parse_state["last_ts"] = now
|
|
188
|
+
|
|
189
|
+
# ── Stall detection ──────────────────────────────────────
|
|
190
|
+
if done == parse_state["last_done"]:
|
|
191
|
+
if parse_state["frozen_since"] == 0.0:
|
|
192
|
+
parse_state["frozen_since"] = now
|
|
193
|
+
stalled_for = now - parse_state["frozen_since"]
|
|
194
|
+
if stalled_for >= 15.0 and not parse_state["stall_warned"]:
|
|
195
|
+
parse_state["stall_warned"] = True
|
|
196
|
+
basename = os.path.basename(current) if current else "unknown"
|
|
197
|
+
with output_lock:
|
|
198
|
+
click.echo() # break out of \r line
|
|
199
|
+
click.secho(
|
|
200
|
+
f" ⚠ Parsing stalled on {basename} for "
|
|
201
|
+
f"{stalled_for:.0f}s — file may be pathological.\n"
|
|
202
|
+
f" Timeout at {os.environ.get('CODESPINE_PARSE_TIMEOUT_SECS', '60')}s. "
|
|
203
|
+
f"To skip large files: "
|
|
204
|
+
f"export CODESPINE_MAX_FILE_BYTES=2097152",
|
|
205
|
+
fg="yellow",
|
|
206
|
+
)
|
|
207
|
+
else:
|
|
208
|
+
parse_state["last_done"] = done
|
|
209
|
+
parse_state["frozen_since"] = 0.0
|
|
210
|
+
parse_state["stall_warned"] = False
|
|
211
|
+
return
|
|
163
212
|
if event == "parse_progress":
|
|
164
213
|
indexed = int(payload.get("indexed", 0))
|
|
165
214
|
total = int(payload.get("total", 0))
|
|
166
215
|
parse_state["indexed"] = indexed
|
|
167
216
|
parse_state["total"] = total
|
|
217
|
+
# Reset stall tracker on actual progress
|
|
218
|
+
if indexed != parse_state["last_done"]:
|
|
219
|
+
parse_state["last_done"] = indexed
|
|
220
|
+
parse_state["frozen_since"] = 0.0
|
|
221
|
+
parse_state["stall_warned"] = False
|
|
168
222
|
if total == 0:
|
|
169
223
|
return
|
|
170
224
|
if indexed == total or (now - parse_state["last_ts"]) >= 0.2:
|
|
@@ -192,6 +246,21 @@ def _index_shard_group(
|
|
|
192
246
|
with output_lock:
|
|
193
247
|
_phase(f"{prefix}Tracing calls...", "starting...")
|
|
194
248
|
return
|
|
249
|
+
if event == "resolve_calls_heartbeat":
|
|
250
|
+
# Fires every 2 s from a daemon thread so the spinner stays
|
|
251
|
+
# alive even when the resolver produces no new edges.
|
|
252
|
+
scanned = int(payload.get("scanned", 0))
|
|
253
|
+
edges = int(payload.get("edges", 0))
|
|
254
|
+
elapsed_s = float(payload.get("elapsed", 0.0))
|
|
255
|
+
if not parallel:
|
|
256
|
+
click.echo(
|
|
257
|
+
f"\r{_spinner_char()} {prefix}Tracing calls... "
|
|
258
|
+
f"{edges:>6} resolved / {scanned} scanned {elapsed_s:.1f}s ",
|
|
259
|
+
nl=False,
|
|
260
|
+
)
|
|
261
|
+
call_state["shown"] = True
|
|
262
|
+
call_state["last_ts"] = now
|
|
263
|
+
return
|
|
195
264
|
if event == "resolve_calls_progress":
|
|
196
265
|
call_state["count"] = int(payload.get("calls_resolved", 0))
|
|
197
266
|
if (now - call_state["last_ts"]) >= 0.25:
|
|
@@ -345,6 +414,37 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
345
414
|
# For single-project analysis this is transparent — shard() always
|
|
346
415
|
# returns a GraphStore pointing to the correct shard path.
|
|
347
416
|
sg = ShardedGraphStore(read_only=False)
|
|
417
|
+
|
|
418
|
+
# ── SIGINT handler: flush partial index on Ctrl+C ────────────────────
|
|
419
|
+
# The handler captures `sg` by closure. On interrupt it snapshots all
|
|
420
|
+
# open shards so `codespine stats` and MCP see the partial result, then
|
|
421
|
+
# calls os._exit(130) to bypass Python cleanup (safe for CLI process).
|
|
422
|
+
# A second Ctrl+C hard-exits immediately.
|
|
423
|
+
_sigint_pressed: list[bool] = [False]
|
|
424
|
+
_old_sigint_handler = signal.getsignal(signal.SIGINT)
|
|
425
|
+
|
|
426
|
+
def _sigint_flush(signum: int, frame: object) -> None: # noqa: ARG001
|
|
427
|
+
if _sigint_pressed[0]:
|
|
428
|
+
os._exit(130)
|
|
429
|
+
_sigint_pressed[0] = True
|
|
430
|
+
# Restore default handler so a second Ctrl+C exits immediately.
|
|
431
|
+
signal.signal(signal.SIGINT, signal.default_int_handler)
|
|
432
|
+
click.secho(
|
|
433
|
+
"\n\n⚠ Interrupted — flushing partial index to read replica…",
|
|
434
|
+
fg="yellow",
|
|
435
|
+
)
|
|
436
|
+
try:
|
|
437
|
+
sg.snapshot_all(background=False)
|
|
438
|
+
click.secho(
|
|
439
|
+
"✓ Partial index saved. Run 'codespine stats' to see what was indexed.",
|
|
440
|
+
fg="yellow",
|
|
441
|
+
)
|
|
442
|
+
except Exception: # noqa: BLE001
|
|
443
|
+
pass
|
|
444
|
+
os._exit(130)
|
|
445
|
+
|
|
446
|
+
signal.signal(signal.SIGINT, _sigint_flush)
|
|
447
|
+
|
|
348
448
|
# The indexer is initialised per-module below with the right shard store.
|
|
349
449
|
# We keep a single ShardedGraphStore to fan-out cross-module linking later.
|
|
350
450
|
|
|
@@ -537,21 +637,28 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
537
637
|
|
|
538
638
|
_phase("Analyzing git history...", "skipped (large repo; rerun with --deep)")
|
|
539
639
|
|
|
540
|
-
|
|
640
|
+
# Summary queries are best-effort: a translator miss or a transient
|
|
641
|
+
# DB error must never throw away a successful index.
|
|
642
|
+
def _safe_count(query: str) -> int:
|
|
643
|
+
try:
|
|
644
|
+
rows = root_shard_store.query_records(query)
|
|
645
|
+
return int(rows[0]["count"]) if rows else 0
|
|
646
|
+
except Exception as exc: # noqa: BLE001 - summary stats are non-critical
|
|
647
|
+
click.secho(f" (summary stat unavailable: {exc})", fg="yellow")
|
|
648
|
+
return 0
|
|
649
|
+
|
|
650
|
+
embeddings_generated = last_result.embeddings_generated if last_result else 0
|
|
651
|
+
vectors_stored = _safe_count(
|
|
541
652
|
"""
|
|
542
653
|
MATCH (s:Symbol)
|
|
543
654
|
WHERE s.embedding IS NOT NULL
|
|
544
655
|
RETURN count(s) as count
|
|
545
656
|
"""
|
|
546
|
-
)
|
|
547
|
-
embeddings_generated = last_result.embeddings_generated if last_result else 0
|
|
548
|
-
vectors_stored = int(vector_count[0]["count"]) if vector_count else embeddings_generated
|
|
657
|
+
) or embeddings_generated
|
|
549
658
|
_phase("Generating embeddings...", f"{vectors_stored} vectors stored")
|
|
550
659
|
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
symbols = int(symbol_count[0]["count"]) if symbol_count else 0
|
|
554
|
-
edges = int(edge_count[0]["count"]) if edge_count else 0
|
|
660
|
+
symbols = _safe_count("MATCH (s:Symbol) RETURN count(s) as count")
|
|
661
|
+
edges = _safe_count("MATCH ()-[r]->() RETURN count(r) as count")
|
|
555
662
|
elapsed = time.perf_counter() - started
|
|
556
663
|
|
|
557
664
|
if not embed:
|
|
@@ -587,6 +694,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
|
|
|
587
694
|
sg.snapshot_all(background=False)
|
|
588
695
|
_finish_phase(snap_label, "MCP will reload automatically")
|
|
589
696
|
|
|
697
|
+
# Restore original SIGINT handler now that we've finished cleanly.
|
|
698
|
+
signal.signal(signal.SIGINT, _old_sigint_handler)
|
|
699
|
+
|
|
590
700
|
|
|
591
701
|
@main.command()
|
|
592
702
|
@click.argument("query")
|
|
@@ -734,15 +844,27 @@ def stats(as_json: bool, show_shards: bool) -> None:
|
|
|
734
844
|
click.secho("No projects indexed yet. Run 'codespine analyse <path>'.", fg="yellow")
|
|
735
845
|
return
|
|
736
846
|
|
|
847
|
+
def _stat_count(store, query: str, params: dict) -> int:
|
|
848
|
+
"""Run a stats count query — returns 0 on any failure."""
|
|
849
|
+
try:
|
|
850
|
+
rows = store.query_records(query, params)
|
|
851
|
+
return int(rows[0]["n"]) if rows else 0
|
|
852
|
+
except Exception as exc: # noqa: BLE001
|
|
853
|
+
click.secho(f" (stat unavailable: {exc})", fg="yellow")
|
|
854
|
+
return 0
|
|
855
|
+
|
|
737
856
|
rows = []
|
|
738
857
|
for p in all_projects_meta:
|
|
739
858
|
pid = p["id"]
|
|
740
859
|
# Route each query to the project's owning shard.
|
|
741
860
|
ps = _project_store(pid)
|
|
742
|
-
|
|
743
|
-
|
|
861
|
+
n_files = _stat_count(
|
|
862
|
+
ps,
|
|
863
|
+
"MATCH (f:File) WHERE f.project_id = $pid RETURN count(f) as n",
|
|
864
|
+
{"pid": pid},
|
|
744
865
|
)
|
|
745
|
-
|
|
866
|
+
n_classes = _stat_count(
|
|
867
|
+
ps,
|
|
746
868
|
"""
|
|
747
869
|
MATCH (f:File) WHERE f.project_id = $pid
|
|
748
870
|
WITH f
|
|
@@ -751,7 +873,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
|
|
|
751
873
|
""",
|
|
752
874
|
{"pid": pid},
|
|
753
875
|
)
|
|
754
|
-
|
|
876
|
+
n_methods = _stat_count(
|
|
877
|
+
ps,
|
|
755
878
|
"""
|
|
756
879
|
MATCH (f:File) WHERE f.project_id = $pid
|
|
757
880
|
WITH f
|
|
@@ -762,7 +885,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
|
|
|
762
885
|
""",
|
|
763
886
|
{"pid": pid},
|
|
764
887
|
)
|
|
765
|
-
|
|
888
|
+
n_calls = _stat_count(
|
|
889
|
+
ps,
|
|
766
890
|
"""
|
|
767
891
|
MATCH (f:File) WHERE f.project_id = $pid
|
|
768
892
|
WITH f
|
|
@@ -773,7 +897,8 @@ def stats(as_json: bool, show_shards: bool) -> None:
|
|
|
773
897
|
""",
|
|
774
898
|
{"pid": pid},
|
|
775
899
|
)
|
|
776
|
-
|
|
900
|
+
n_emb = _stat_count(
|
|
901
|
+
ps,
|
|
777
902
|
"""
|
|
778
903
|
MATCH (f:File) WHERE f.project_id = $pid
|
|
779
904
|
WITH f
|
|
@@ -786,11 +911,11 @@ def stats(as_json: bool, show_shards: bool) -> None:
|
|
|
786
911
|
"project": pid,
|
|
787
912
|
"path": p["path"],
|
|
788
913
|
"shard": sg.router.shard_for(pid),
|
|
789
|
-
"files":
|
|
790
|
-
"classes":
|
|
791
|
-
"methods":
|
|
792
|
-
"calls_out":
|
|
793
|
-
"embeddings":
|
|
914
|
+
"files": n_files,
|
|
915
|
+
"classes": n_classes,
|
|
916
|
+
"methods": n_methods,
|
|
917
|
+
"calls_out": n_calls,
|
|
918
|
+
"embeddings": n_emb,
|
|
794
919
|
})
|
|
795
920
|
|
|
796
921
|
if as_json:
|