codespine 1.0.8__tar.gz → 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {codespine-1.0.8 → codespine-1.0.9}/PKG-INFO +9 -9
  2. {codespine-1.0.8 → codespine-1.0.9}/README.md +8 -8
  3. {codespine-1.0.8 → codespine-1.0.9}/codespine/__init__.py +1 -1
  4. {codespine-1.0.8 → codespine-1.0.9}/codespine/cli.py +186 -28
  5. {codespine-1.0.8 → codespine-1.0.9}/codespine/config.py +4 -2
  6. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/call_resolver.py +6 -0
  7. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/engine.py +66 -38
  8. {codespine-1.0.8 → codespine-1.0.9}/codespine/mcp/server.py +3 -2
  9. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/PKG-INFO +9 -9
  10. {codespine-1.0.8 → codespine-1.0.9}/pyproject.toml +1 -1
  11. {codespine-1.0.8 → codespine-1.0.9}/tests/test_call_resolver.py +34 -0
  12. {codespine-1.0.8 → codespine-1.0.9}/LICENSE +0 -0
  13. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/__init__.py +0 -0
  14. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/community.py +0 -0
  15. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/context.py +0 -0
  16. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/coupling.py +0 -0
  17. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/crossmodule.py +0 -0
  18. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/deadcode.py +0 -0
  19. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/flow.py +0 -0
  20. {codespine-1.0.8 → codespine-1.0.9}/codespine/analysis/impact.py +0 -0
  21. {codespine-1.0.8 → codespine-1.0.9}/codespine/cache/__init__.py +0 -0
  22. {codespine-1.0.8 → codespine-1.0.9}/codespine/cache/result_cache.py +0 -0
  23. {codespine-1.0.8 → codespine-1.0.9}/codespine/db/__init__.py +0 -0
  24. {codespine-1.0.8 → codespine-1.0.9}/codespine/db/_cypher_compat.py +0 -0
  25. {codespine-1.0.8 → codespine-1.0.9}/codespine/db/duckdb_store.py +0 -0
  26. {codespine-1.0.8 → codespine-1.0.9}/codespine/db/schema.py +0 -0
  27. {codespine-1.0.8 → codespine-1.0.9}/codespine/db/store.py +0 -0
  28. {codespine-1.0.8 → codespine-1.0.9}/codespine/diff/__init__.py +0 -0
  29. {codespine-1.0.8 → codespine-1.0.9}/codespine/diff/branch_diff.py +0 -0
  30. {codespine-1.0.8 → codespine-1.0.9}/codespine/guide.py +0 -0
  31. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/__init__.py +0 -0
  32. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/di_resolver.py +0 -0
  33. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/java_parser.py +0 -0
  34. {codespine-1.0.8 → codespine-1.0.9}/codespine/indexer/symbol_builder.py +0 -0
  35. {codespine-1.0.8 → codespine-1.0.9}/codespine/mcp/__init__.py +0 -0
  36. {codespine-1.0.8 → codespine-1.0.9}/codespine/noise/__init__.py +0 -0
  37. {codespine-1.0.8 → codespine-1.0.9}/codespine/noise/blocklist.py +0 -0
  38. {codespine-1.0.8 → codespine-1.0.9}/codespine/overlay/__init__.py +0 -0
  39. {codespine-1.0.8 → codespine-1.0.9}/codespine/overlay/git_state.py +0 -0
  40. {codespine-1.0.8 → codespine-1.0.9}/codespine/overlay/merge.py +0 -0
  41. {codespine-1.0.8 → codespine-1.0.9}/codespine/overlay/store.py +0 -0
  42. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/__init__.py +0 -0
  43. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/bm25.py +0 -0
  44. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/fuzzy.py +0 -0
  45. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/hybrid.py +0 -0
  46. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/rrf.py +0 -0
  47. {codespine-1.0.8 → codespine-1.0.9}/codespine/search/vector.py +0 -0
  48. {codespine-1.0.8 → codespine-1.0.9}/codespine/sharding/__init__.py +0 -0
  49. {codespine-1.0.8 → codespine-1.0.9}/codespine/sharding/router.py +0 -0
  50. {codespine-1.0.8 → codespine-1.0.9}/codespine/sharding/store.py +0 -0
  51. {codespine-1.0.8 → codespine-1.0.9}/codespine/watch/__init__.py +0 -0
  52. {codespine-1.0.8 → codespine-1.0.9}/codespine/watch/git_hook.py +0 -0
  53. {codespine-1.0.8 → codespine-1.0.9}/codespine/watch/watcher.py +0 -0
  54. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/SOURCES.txt +0 -0
  55. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/dependency_links.txt +0 -0
  56. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/entry_points.txt +0 -0
  57. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/requires.txt +0 -0
  58. {codespine-1.0.8 → codespine-1.0.9}/codespine.egg-info/top_level.txt +0 -0
  59. {codespine-1.0.8 → codespine-1.0.9}/gindex.py +0 -0
  60. {codespine-1.0.8 → codespine-1.0.9}/setup.cfg +0 -0
  61. {codespine-1.0.8 → codespine-1.0.9}/tests/test_branch_diff_normalize.py +0 -0
  62. {codespine-1.0.8 → codespine-1.0.9}/tests/test_community_detection.py +0 -0
  63. {codespine-1.0.8 → codespine-1.0.9}/tests/test_cypher_compat.py +0 -0
  64. {codespine-1.0.8 → codespine-1.0.9}/tests/test_deadcode.py +0 -0
  65. {codespine-1.0.8 → codespine-1.0.9}/tests/test_duckdb_store.py +0 -0
  66. {codespine-1.0.8 → codespine-1.0.9}/tests/test_index_and_hybrid.py +0 -0
  67. {codespine-1.0.8 → codespine-1.0.9}/tests/test_java_parser.py +0 -0
  68. {codespine-1.0.8 → codespine-1.0.9}/tests/test_multimodule_index.py +0 -0
  69. {codespine-1.0.8 → codespine-1.0.9}/tests/test_overlay.py +0 -0
  70. {codespine-1.0.8 → codespine-1.0.9}/tests/test_parse_resilience.py +0 -0
  71. {codespine-1.0.8 → codespine-1.0.9}/tests/test_result_cache.py +0 -0
  72. {codespine-1.0.8 → codespine-1.0.9}/tests/test_search_ranking.py +0 -0
  73. {codespine-1.0.8 → codespine-1.0.9}/tests/test_sharding.py +0 -0
  74. {codespine-1.0.8 → codespine-1.0.9}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.8
3
+ Version: 1.0.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -124,8 +124,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
124
124
  codespine analyse /path/to/java-project
125
125
 
126
126
  # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
127
- # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
128
- codespine analyse /path/to/java-project --deep
127
+ codespine analyse /path/to/java-project --complete --deep
129
128
 
130
129
  # 3. (Optional) Add semantic embeddings for concept-level search
131
130
  codespine analyse /path/to/java-project --embed
@@ -313,8 +312,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
313
312
  # Indexing
314
313
  codespine analyse <path> # incremental index (default)
315
314
  codespine analyse <path> --full # full re-index from scratch
316
- codespine analyse <path> --deep # + communities, flows, dead code, coupling
317
- codespine analyse <path> --incremental-deep # incremental index + force deep passes
315
+ codespine analyse <path> --budget 90 # fast index with a resolver deadline
316
+ codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
317
+ codespine analyse <path> --complete --incremental-deep
318
318
  codespine analyse <path> --embed # + vector embeddings
319
319
 
320
320
  # Live watch
@@ -360,7 +360,7 @@ codespine force-reset # emergency: delete all data files
360
360
 
361
361
  `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
362
362
 
363
- Deep analysis (`--deep`) now runs automatically for repos with 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
363
+ `analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
364
364
 
365
365
  ---
366
366
 
@@ -546,12 +546,12 @@ The deep analysis phase covers four passes that are expensive but optional:
546
546
  | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
547
547
  | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
548
548
 
549
- **Auto-threshold:** deep analysis runs automatically when the project has 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
549
+ **Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
550
550
 
551
- **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
551
+ **Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
552
552
 
553
553
  ```bash
554
- codespine analyse . --incremental-deep
554
+ codespine analyse . --complete --incremental-deep
555
555
  ```
556
556
 
557
557
  **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
@@ -59,8 +59,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
59
59
  codespine analyse /path/to/java-project
60
60
 
61
61
  # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
62
- # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
63
- codespine analyse /path/to/java-project --deep
62
+ codespine analyse /path/to/java-project --complete --deep
64
63
 
65
64
  # 3. (Optional) Add semantic embeddings for concept-level search
66
65
  codespine analyse /path/to/java-project --embed
@@ -248,8 +247,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
248
247
  # Indexing
249
248
  codespine analyse <path> # incremental index (default)
250
249
  codespine analyse <path> --full # full re-index from scratch
251
- codespine analyse <path> --deep # + communities, flows, dead code, coupling
252
- codespine analyse <path> --incremental-deep # incremental index + force deep passes
250
+ codespine analyse <path> --budget 90 # fast index with a resolver deadline
251
+ codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
252
+ codespine analyse <path> --complete --incremental-deep
253
253
  codespine analyse <path> --embed # + vector embeddings
254
254
 
255
255
  # Live watch
@@ -295,7 +295,7 @@ codespine force-reset # emergency: delete all data files
295
295
 
296
296
  `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
297
297
 
298
- Deep analysis (`--deep`) now runs automatically for repos with 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
298
+ `analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
299
299
 
300
300
  ---
301
301
 
@@ -481,12 +481,12 @@ The deep analysis phase covers four passes that are expensive but optional:
481
481
  | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
482
482
  | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
483
483
 
484
- **Auto-threshold:** deep analysis runs automatically when the project has 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
484
+ **Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
485
485
 
486
- **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
486
+ **Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
487
487
 
488
488
  ```bash
489
- codespine analyse . --incremental-deep
489
+ codespine analyse . --complete --incremental-deep
490
490
  ```
491
491
 
492
492
  **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.8"
4
+ __version__ = "1.0.9"
@@ -66,6 +66,24 @@ def _open_store(read_only: bool = True) -> ShardedGraphStore:
66
66
  return ShardedGraphStore(read_only=read_only)
67
67
 
68
68
 
69
+ def _spawn_background_enrichment(path: str) -> bool:
70
+ """Publish the fast index, then enrich it in a detached process."""
71
+ try:
72
+ subprocess.Popen(
73
+ [sys.executable, "-m", "codespine.cli", "enrich-background", path],
74
+ stdin=subprocess.DEVNULL,
75
+ stdout=subprocess.DEVNULL,
76
+ stderr=subprocess.DEVNULL,
77
+ start_new_session=True,
78
+ cwd=os.getcwd(),
79
+ env=os.environ.copy(),
80
+ )
81
+ return True
82
+ except Exception as exc: # noqa: BLE001
83
+ LOGGER.warning("Unable to spawn background enrichment: %s", exc)
84
+ return False
85
+
86
+
69
87
  def _db_size_bytes(path: str) -> int:
70
88
  if os.path.isfile(path):
71
89
  return os.path.getsize(path)
@@ -110,6 +128,7 @@ def _index_shard_group(
110
128
  sg,
111
129
  full: bool,
112
130
  embed: bool,
131
+ deadline: float | None,
113
132
  output_lock: threading.Lock,
114
133
  parallel: bool,
115
134
  ) -> tuple[int, list, int]:
@@ -381,8 +400,9 @@ def _index_shard_group(
381
400
  call_state["shown"] = False
382
401
  elapsed_s = (now - call_state["started_at"]) if call_state["started_at"] else 0.0
383
402
  n = int(payload.get("calls_resolved", 0))
403
+ suffix = " partial" if payload.get("partial") else ""
384
404
  with output_lock:
385
- _phase(f"{prefix}Tracing calls...", f"{n} calls resolved ({elapsed_s:.1f}s)")
405
+ _phase(f"{prefix}Tracing calls...", f"{n} calls resolved{suffix} ({elapsed_s:.1f}s)")
386
406
  return
387
407
  if event == "resolve_types_start":
388
408
  with output_lock:
@@ -390,14 +410,20 @@ def _index_shard_group(
390
410
  return
391
411
  if event == "resolve_types_done":
392
412
  n = int(payload.get("type_relationships", 0))
413
+ suffix = " partial" if payload.get("partial") else ""
393
414
  with output_lock:
394
- _phase(f"{prefix}Analyzing types...", f"{n} type relationships")
415
+ _phase(f"{prefix}Analyzing types...", f"{n} type relationships{suffix}")
395
416
  return
396
417
 
397
418
  shard_store = sg.shard(project_id)
398
419
  indexer = JavaIndexer(shard_store)
399
420
  result = indexer.index_project(
400
- mod_path, full=full, progress=_progress, project_id=project_id, embed=embed
421
+ mod_path,
422
+ full=full,
423
+ progress=_progress,
424
+ project_id=project_id,
425
+ embed=embed,
426
+ deadline=deadline,
401
427
  )
402
428
  results.append(result)
403
429
  total_files += result.files_found
@@ -466,7 +492,21 @@ def main() -> None:
466
492
  @main.command()
467
493
  @click.argument("path", type=click.Path(exists=True))
468
494
  @click.option("--full/--incremental", default=False, show_default=True)
469
- @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
495
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses when used with --complete.")
496
+ @click.option(
497
+ "--fast/--complete",
498
+ default=True,
499
+ show_default=True,
500
+ help="Fast mode returns after the core index is queryable; complete mode runs enrichment in the foreground.",
501
+ )
502
+ @click.option(
503
+ "--budget",
504
+ "budget_seconds",
505
+ default=90.0,
506
+ show_default=True,
507
+ type=float,
508
+ help="Foreground time budget in seconds for fast mode; use 0 to disable the resolver deadline.",
509
+ )
470
510
  @click.option(
471
511
  "--incremental-deep",
472
512
  is_flag=True,
@@ -475,17 +515,25 @@ def main() -> None:
475
515
  )
476
516
  @click.option(
477
517
  "--embed/--no-embed",
478
- default=True,
518
+ default=False,
479
519
  show_default=True,
480
- help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
520
+ help="Generate vector embeddings. Off by default so analyse stays fast; rerun with --embed when semantic vectors are needed.",
481
521
  )
482
522
  @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
483
- def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
523
+ def analyse(
524
+ path: str,
525
+ full: bool,
526
+ deep: bool,
527
+ fast: bool,
528
+ budget_seconds: float,
529
+ incremental_deep: bool,
530
+ embed: bool,
531
+ allow_running: bool,
532
+ ) -> None:
484
533
  """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
485
534
 
486
- Embeddings are generated by default. If sentence-transformers is installed
487
- (pip install codespine[ml]), high-quality semantic vectors are used; otherwise
488
- a fast hash-based fallback provides basic vector search.
535
+ Fast mode indexes the core Java graph and returns quickly. Use --complete
536
+ for foreground communities, flows, dead-code, and git-coupling enrichment.
489
537
  """
490
538
  if not allow_running and _is_running():
491
539
  click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -494,6 +542,18 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
494
542
  started = time.perf_counter()
495
543
  abs_path = os.path.abspath(path)
496
544
 
545
+ if fast and (deep or incremental_deep):
546
+ click.secho(
547
+ "Fast mode runs deep analysis in the background. Use --complete --deep to wait for it.",
548
+ fg="yellow",
549
+ )
550
+
551
+ budget_deadline = (
552
+ started + budget_seconds
553
+ if fast and budget_seconds and budget_seconds > 0
554
+ else None
555
+ )
556
+
497
557
  # Warn about hash fallback early so users know to install [ml]
498
558
  if embed:
499
559
  from codespine.search.vector import _load_model
@@ -610,7 +670,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
610
670
  for s_idx, group in shard_groups.items():
611
671
  f = ex.submit(
612
672
  _index_shard_group,
613
- s_idx, group, sg, full, embed, output_lock, True,
673
+ s_idx, group, sg, full, embed, budget_deadline, output_lock, True,
614
674
  )
615
675
  futures_map[f] = s_idx
616
676
 
@@ -632,7 +692,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
632
692
  only_shard_idx = next(iter(shard_groups))
633
693
  only_group = shard_groups[only_shard_idx]
634
694
  _, all_results, total_files_found = _index_shard_group(
635
- only_shard_idx, only_group, sg, full, embed, output_lock, False,
695
+ only_shard_idx, only_group, sg, full, embed, budget_deadline, output_lock, False,
636
696
  )
637
697
  if all_results:
638
698
  last_result = all_results[-1]
@@ -652,7 +712,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
652
712
  root_shard_store = sg.shard(root_project_id)
653
713
 
654
714
  # ── Cross-module call linking ──────────────────────────────────────
655
- if is_multi and len(modules_with_ids) > 1:
715
+ if fast and is_multi and len(modules_with_ids) > 1:
716
+ _phase("Cross-module linking...", "skipped (fast mode; use --complete)")
717
+ elif is_multi and len(modules_with_ids) > 1:
656
718
  xmod_label = "Cross-module linking..."
657
719
  _live_phase(xmod_label, "running")
658
720
  xmod_pids = [pid for _, pid in modules_with_ids]
@@ -669,7 +731,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
669
731
  dead: list[dict] = []
670
732
  coupling_pairs: list[dict] = []
671
733
 
672
- should_run_deep = deep or incremental_deep or total_files_found <= 3000
734
+ should_run_deep = (not fast) and (deep or incremental_deep or total_files_found <= 3000)
673
735
  if should_run_deep:
674
736
  comm_label = "Detecting communities..."
675
737
  _live_phase(comm_label, "running")
@@ -707,6 +769,11 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
707
769
  progress=lambda s: _live_phase(coup_label, s),
708
770
  )
709
771
  _finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
772
+ elif fast:
773
+ _phase("Detecting communities...", "queued in background")
774
+ _phase("Detecting execution flows...", "queued in background")
775
+ _phase("Finding dead code...", "queued in background")
776
+ _phase("Analyzing git history...", "queued in background")
710
777
  else:
711
778
  # Run lightweight versions of flow tracing and dead code from the call
712
779
  # graph already built — no community detection or coupling (those are
@@ -768,30 +835,121 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
768
835
  fg="green",
769
836
  )
770
837
 
771
- # Detect unresolved imports → hint about unindexed sibling projects
772
- try:
773
- unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
774
- if unresolved:
775
- click.echo()
776
- click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
777
- for pkg, samples in sorted(unresolved.items())[:8]:
778
- click.echo(f" {pkg} (e.g. {samples[0]})")
779
- except Exception:
780
- pass # best-effort
838
+ # Detect unresolved imports → hint about unindexed sibling projects.
839
+ # This is useful, but it is still another global query, so fast mode leaves
840
+ # it out of the foreground path.
841
+ if not fast:
842
+ try:
843
+ unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
844
+ if unresolved:
845
+ click.echo()
846
+ click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
847
+ for pkg, samples in sorted(unresolved.items())[:8]:
848
+ click.echo(f" {pkg} (e.g. {samples[0]})")
849
+ except Exception:
850
+ pass # best-effort
781
851
 
782
852
  # Publish a read replica so MCP and read-only CLI commands (search, stats…)
783
853
  # run against an isolated snapshot rather than competing with the write
784
854
  # process's buffer pool. Snapshot all open shards concurrently.
785
855
  snap_label = "Publishing read replica..."
786
- _live_phase(snap_label, "copying")
787
- root_shard_store._recycle_conn()
788
- sg.snapshot_all(background=False)
789
- _finish_phase(snap_label, "MCP will reload automatically")
856
+ for store in sg.open_shards():
857
+ recycle = getattr(store, "_recycle_conn", None)
858
+ if callable(recycle):
859
+ recycle()
860
+ if fast and _spawn_background_enrichment(abs_path):
861
+ _phase(snap_label, "core snapshot now; enrichment continues in background")
862
+ else:
863
+ _live_phase(snap_label, "copying")
864
+ sg.snapshot_all(background=False)
865
+ _finish_phase(snap_label, "MCP will reload automatically")
790
866
 
791
867
  # Restore original SIGINT handler now that we've finished cleanly.
792
868
  signal.signal(signal.SIGINT, _old_sigint_handler)
793
869
 
794
870
 
871
+ @main.command("publish-snapshot", hidden=True)
872
+ def publish_snapshot() -> None:
873
+ """Publish sharded read replicas for a recently completed analyse run."""
874
+ sg = ShardedGraphStore(read_only=False)
875
+ sg.snapshot_all(background=False)
876
+
877
+
878
+ @main.command("enrich-background", hidden=True)
879
+ @click.argument("path", type=click.Path(exists=True))
880
+ def enrich_background(path: str) -> None:
881
+ """Run expensive post-index graph enrichment outside the analyse foreground."""
882
+ abs_path = os.path.abspath(path)
883
+ LOGGER.info("Background enrichment starting for %s", abs_path)
884
+
885
+ project_roots = JavaIndexer.detect_projects_in_workspace(abs_path)
886
+ modules_with_ids: list[tuple[str, str]] = []
887
+ for proj_root in project_roots:
888
+ proj_name = os.path.basename(proj_root)
889
+ module_dirs = JavaIndexer.detect_modules(proj_root)
890
+ is_multi_module = not (len(module_dirs) == 1 and module_dirs[0] == proj_root)
891
+ if is_multi_module:
892
+ for m in module_dirs:
893
+ modules_with_ids.append((m, f"{proj_name}::{os.path.basename(m)}"))
894
+ else:
895
+ modules_with_ids.append((proj_root, proj_name))
896
+
897
+ root_basename = os.path.basename(abs_path)
898
+ root_project_id = modules_with_ids[-1][1] if modules_with_ids else root_basename
899
+ is_multi = len(modules_with_ids) > 1
900
+ xmod_pids = [pid for _, pid in modules_with_ids]
901
+
902
+ sg = ShardedGraphStore(read_only=False)
903
+ root_shard_store = sg.shard(root_project_id)
904
+
905
+ try:
906
+ # Publish the fast core graph first so MCP/search can use it while the
907
+ # more expensive enrichment keeps working.
908
+ sg.snapshot_all(background=False)
909
+
910
+ if is_multi and len(xmod_pids) > 1:
911
+ xmod_edges = link_cross_module_calls(
912
+ root_shard_store,
913
+ project_ids=xmod_pids,
914
+ progress=lambda s: LOGGER.info("Cross-module linking: %s", s),
915
+ )
916
+ LOGGER.info("Background cross-module linking wrote %d edges", xmod_edges)
917
+
918
+ communities = detect_communities(
919
+ root_shard_store,
920
+ progress=lambda s: LOGGER.info("Community detection: %s", s),
921
+ )
922
+ LOGGER.info("Background community detection found %d clusters", len(communities))
923
+
924
+ flows = trace_execution_flows(
925
+ root_shard_store,
926
+ progress=lambda s: LOGGER.info("Execution flow tracing: %s", s),
927
+ )
928
+ LOGGER.info("Background flow tracing found %d flows", len(flows))
929
+
930
+ dead = detect_dead_code(root_shard_store, limit=500)
931
+ LOGGER.info("Background dead-code scan found %d candidates", _dead_result_count(dead))
932
+
933
+ root_shard_store.clear_coupling()
934
+ coupling_project = root_basename if is_multi else root_project_id
935
+ coupling_pairs = compute_coupling(
936
+ root_shard_store,
937
+ abs_path,
938
+ coupling_project,
939
+ days=SETTINGS.default_coupling_days,
940
+ min_strength=SETTINGS.default_min_coupling_strength,
941
+ min_cochanges=SETTINGS.default_min_cochanges,
942
+ progress=lambda s: LOGGER.info("Git coupling: %s", s),
943
+ )
944
+ LOGGER.info("Background coupling analysis found %d pairs", len(coupling_pairs))
945
+
946
+ sg.snapshot_all(background=False)
947
+ LOGGER.info("Background enrichment finished for %s", abs_path)
948
+ except Exception as exc: # noqa: BLE001
949
+ LOGGER.exception("Background enrichment failed for %s: %s", abs_path, exc)
950
+ raise
951
+
952
+
795
953
  @main.command()
796
954
  @click.argument("query")
797
955
  @click.option("--k", default=20, show_default=True, type=int)
@@ -29,8 +29,10 @@ class Settings:
29
29
  rrf_k: int = 60
30
30
  semantic_candidate_pool: int = 2000
31
31
  write_batch_size: int = 500
32
- index_file_batch_size: int = 20
33
- edge_write_batch_size: int = 500
32
+ index_file_batch_size: int = 200
33
+ index_method_batch_size: int = 2000
34
+ index_symbol_batch_size: int = 2000
35
+ edge_write_batch_size: int = 5000
34
36
  default_coupling_days: int = 5
35
37
  default_min_coupling_strength: float = 0.3
36
38
  default_min_cochanges: int = 3
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import time
3
4
  from collections import defaultdict
4
5
  from typing import Iterator
5
6
 
@@ -58,6 +59,7 @@ def resolve_calls(
58
59
  class_catalog: dict[str, list[str]],
59
60
  *,
60
61
  scan_counter: list[int] | None = None,
62
+ deadline: float | None = None,
61
63
  ) -> Iterator[tuple[str, str, float, str]]:
62
64
  """Resolve call names to known method ids.
63
65
 
@@ -84,6 +86,8 @@ def resolve_calls(
84
86
  class_method_index_by_fqcn[class_fqcn][key].append(method_id)
85
87
 
86
88
  for source_id, call_sites in calls.items():
89
+ if deadline is not None and time.perf_counter() >= deadline:
90
+ return
87
91
  if scan_counter is not None:
88
92
  scan_counter[0] += 1
89
93
  src_meta = method_catalog.get(source_id, {})
@@ -94,6 +98,8 @@ def resolve_calls(
94
98
  field_types = src_ctx.get("field_types", {}) or {}
95
99
 
96
100
  for call in call_sites:
101
+ if deadline is not None and time.perf_counter() >= deadline:
102
+ return
97
103
  call_name = call.name
98
104
 
99
105
  key = (call_name, int(call.arg_count))
@@ -190,6 +190,7 @@ class JavaIndexer:
190
190
  progress: Callable[[str, dict], None] | None = None,
191
191
  project_id: str | None = None,
192
192
  embed: bool = True,
193
+ deadline: float | None = None,
193
194
  ) -> IndexResult:
194
195
  root_path = os.path.abspath(root_path)
195
196
  if project_id is None:
@@ -651,13 +652,13 @@ class JavaIndexer:
651
652
  with self.store.transaction():
652
653
  self.store.upsert_classes_batch(class_rows)
653
654
  self.store._recycle_conn()
654
- _METHOD_SUB_BATCH = 200
655
+ _METHOD_SUB_BATCH = max(1, int(getattr(SETTINGS, "index_method_batch_size", 2000)))
655
656
  _db_phase_holder[0] = "writing methods"
656
657
  for method_sub in self._chunked(method_rows, _METHOD_SUB_BATCH):
657
658
  with self.store.transaction():
658
659
  self.store.upsert_methods_batch(method_sub)
659
660
  self.store._recycle_conn()
660
- _SYMBOL_SUB_BATCH = 200
661
+ _SYMBOL_SUB_BATCH = max(1, int(getattr(SETTINGS, "index_symbol_batch_size", 2000)))
661
662
  _db_phase_holder[0] = "writing symbols"
662
663
  for symbol_sub in self._chunked(symbol_rows, _SYMBOL_SUB_BATCH):
663
664
  with self.store.transaction():
@@ -686,6 +687,9 @@ class JavaIndexer:
686
687
  elapsed=time.perf_counter() - _db_start,
687
688
  )
688
689
 
690
+ def _deadline_expired() -> bool:
691
+ return deadline is not None and time.perf_counter() >= deadline
692
+
689
693
  self._emit(progress, "resolve_calls_start")
690
694
 
691
695
  # ── Heartbeat thread ──────────────────────────────────────────────
@@ -693,43 +697,47 @@ class JavaIndexer:
693
697
  # many seconds on large repos with common method names. A daemon
694
698
  # heartbeat thread fires every 2 s so the CLI progress spinner keeps
695
699
  # ticking even during those silent stretches.
696
- _scan_counter: list[int] = [0]
697
- _edges_counter: list[int] = [0]
698
- _hb_stop = threading.Event()
699
- _resolve_start = time.perf_counter()
700
-
701
- def _heartbeat_worker() -> None:
702
- while not _hb_stop.wait(2.0):
703
- self._emit(
704
- progress,
705
- "resolve_calls_heartbeat",
706
- scanned=_scan_counter[0],
707
- edges=_edges_counter[0],
708
- elapsed=time.perf_counter() - _resolve_start,
709
- )
700
+ best_calls: dict[tuple[str, str], tuple[float, str]] = {}
701
+ partial_calls = _deadline_expired()
702
+ if not partial_calls:
703
+ _scan_counter: list[int] = [0]
704
+ _edges_counter: list[int] = [0]
705
+ _hb_stop = threading.Event()
706
+ _resolve_start = time.perf_counter()
707
+
708
+ def _heartbeat_worker() -> None:
709
+ while not _hb_stop.wait(2.0):
710
+ self._emit(
711
+ progress,
712
+ "resolve_calls_heartbeat",
713
+ scanned=_scan_counter[0],
714
+ edges=_edges_counter[0],
715
+ elapsed=time.perf_counter() - _resolve_start,
716
+ )
710
717
 
711
- _hb_thread = threading.Thread(
712
- target=_heartbeat_worker,
713
- daemon=True,
714
- name="codespine-resolver-heartbeat",
715
- )
716
- _hb_thread.start()
718
+ _hb_thread = threading.Thread(
719
+ target=_heartbeat_worker,
720
+ daemon=True,
721
+ name="codespine-resolver-heartbeat",
722
+ )
723
+ _hb_thread.start()
717
724
 
718
- # Deduplicate (src, dst) pairs — the same pair can appear many times
719
- # when a method calls another method at multiple call sites.
720
- # Keep the highest-confidence resolution to avoid N writes per pair.
721
- best_calls: dict[tuple[str, str], tuple[float, str]] = {}
722
- try:
723
- for src, dst, confidence, reason in resolve_calls(
724
- method_catalog, method_calls, method_context, class_catalog,
725
- scan_counter=_scan_counter,
726
- ):
727
- key = (src, dst)
728
- if key not in best_calls or confidence > best_calls[key][0]:
729
- best_calls[key] = (confidence, reason)
730
- finally:
731
- _hb_stop.set()
732
- _hb_thread.join(timeout=3.0)
725
+ # Deduplicate (src, dst) pairs — the same pair can appear many times
726
+ # when a method calls another method at multiple call sites.
727
+ # Keep the highest-confidence resolution to avoid N writes per pair.
728
+ try:
729
+ for src, dst, confidence, reason in resolve_calls(
730
+ method_catalog, method_calls, method_context, class_catalog,
731
+ scan_counter=_scan_counter,
732
+ deadline=deadline,
733
+ ):
734
+ key = (src, dst)
735
+ if key not in best_calls or confidence > best_calls[key][0]:
736
+ best_calls[key] = (confidence, reason)
737
+ partial_calls = _deadline_expired()
738
+ finally:
739
+ _hb_stop.set()
740
+ _hb_thread.join(timeout=3.0)
733
741
 
734
742
  # Stream writes in batches — never hold the full set in RAM.
735
743
  call_buf: list[dict] = []
@@ -751,9 +759,29 @@ class JavaIndexer:
751
759
  self.store.add_calls_batch(call_buf)
752
760
  calls_resolved += len(call_buf)
753
761
  self.store._recycle_conn()
754
- self._emit(progress, "resolve_calls_done", calls_resolved=calls_resolved)
762
+ self._emit(
763
+ progress,
764
+ "resolve_calls_done",
765
+ calls_resolved=calls_resolved,
766
+ partial=partial_calls,
767
+ )
755
768
 
756
769
  self._emit(progress, "resolve_types_start")
770
+ if _deadline_expired():
771
+ self._emit(progress, "resolve_types_done", type_relationships=0, partial=True)
772
+ self._emit(progress, "di_done", injections=0, interface_bindings=0, partial=True)
773
+ self._prune_meta_cache(meta_cache, current_file_ids)
774
+ self._save_file_meta_cache(project_id, meta_cache)
775
+ return IndexResult(
776
+ project_id=project_id,
777
+ files_found=len(current_files),
778
+ files_indexed=files_indexed,
779
+ classes_indexed=classes_indexed,
780
+ methods_indexed=methods_indexed,
781
+ calls_resolved=calls_resolved,
782
+ type_relationships=0,
783
+ embeddings_generated=classes_indexed + methods_indexed if embed else 0,
784
+ )
757
785
  type_rows = self._build_inheritance_edges(
758
786
  class_meta,
759
787
  class_catalog,
@@ -1394,7 +1394,8 @@ def build_mcp_server(store, repo_path_provider):
1394
1394
  Parameters:
1395
1395
  path – Absolute or relative path to the project/workspace to index.
1396
1396
  full – If True, re-index every file even if unchanged (default: incremental).
1397
- deep – If True, also run community detection, flows, and coupling (slower).
1397
+ deep – If True, run complete foreground community, flow, dead-code,
1398
+ and coupling enrichment (slower).
1398
1399
  embed – If True, generate vector embeddings for semantic search (slow when
1399
1400
  sentence-transformers is installed; BM25/fuzzy search works without them).
1400
1401
 
@@ -1422,7 +1423,7 @@ def build_mcp_server(store, repo_path_provider):
1422
1423
  else:
1423
1424
  cmd.append("--incremental")
1424
1425
  if deep:
1425
- cmd.append("--deep")
1426
+ cmd.extend(["--complete", "--deep"])
1426
1427
  if embed:
1427
1428
  cmd.append("--embed")
1428
1429
  else:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.8
3
+ Version: 1.0.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -124,8 +124,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
124
124
  codespine analyse /path/to/java-project
125
125
 
126
126
  # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
127
- # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
128
- codespine analyse /path/to/java-project --deep
127
+ codespine analyse /path/to/java-project --complete --deep
129
128
 
130
129
  # 3. (Optional) Add semantic embeddings for concept-level search
131
130
  codespine analyse /path/to/java-project --embed
@@ -313,8 +312,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
313
312
  # Indexing
314
313
  codespine analyse <path> # incremental index (default)
315
314
  codespine analyse <path> --full # full re-index from scratch
316
- codespine analyse <path> --deep # + communities, flows, dead code, coupling
317
- codespine analyse <path> --incremental-deep # incremental index + force deep passes
315
+ codespine analyse <path> --budget 90 # fast index with a resolver deadline
316
+ codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
317
+ codespine analyse <path> --complete --incremental-deep
318
318
  codespine analyse <path> --embed # + vector embeddings
319
319
 
320
320
  # Live watch
@@ -360,7 +360,7 @@ codespine force-reset # emergency: delete all data files
360
360
 
361
361
  `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
362
362
 
363
- Deep analysis (`--deep`) now runs automatically for repos with 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
363
+ `analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
364
364
 
365
365
  ---
366
366
 
@@ -546,12 +546,12 @@ The deep analysis phase covers four passes that are expensive but optional:
546
546
  | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
547
547
  | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
548
548
 
549
- **Auto-threshold:** deep analysis runs automatically when the project has 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
549
+ **Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
550
550
 
551
- **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
551
+ **Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
552
552
 
553
553
  ```bash
554
- codespine analyse . --incremental-deep
554
+ codespine analyse . --complete --incremental-deep
555
555
  ```
556
556
 
557
557
  **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "codespine"
7
- version = "1.0.8"
7
+ version = "1.0.9"
8
8
  description = "Local Java code intelligence indexer backed by a graph database"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,3 +1,4 @@
1
+ import time
1
2
  from types import SimpleNamespace
2
3
 
3
4
  from codespine.indexer.call_resolver import resolve_calls
@@ -41,3 +42,36 @@ def test_resolver_prefers_receiver_type_and_arity():
41
42
  out = list(resolve_calls(method_catalog, calls, method_context, class_catalog))
42
43
  assert ("src", "m1", 1.0, "receiver_this_exact") in out
43
44
  assert ("src", "m3", 0.8, "receiver_method_match") in out
45
+
46
+
47
+ def test_resolver_stops_at_deadline():
48
+ method_catalog = {
49
+ "src": {
50
+ "name": "entry",
51
+ "param_count": 0,
52
+ "class_id": "c_service",
53
+ "class_fqcn": "com.example.Service",
54
+ "signature": "entry()",
55
+ },
56
+ "m1": {
57
+ "name": "run",
58
+ "param_count": 0,
59
+ "class_id": "c_service",
60
+ "class_fqcn": "com.example.Service",
61
+ "signature": "run()",
62
+ },
63
+ }
64
+ calls = {"src": [SimpleNamespace(name="run", receiver="this", arg_count=0)]}
65
+ method_context = {"src": {"class_id": "c_service", "class_fqcn": "com.example.Service"}}
66
+
67
+ out = list(
68
+ resolve_calls(
69
+ method_catalog,
70
+ calls,
71
+ method_context,
72
+ {"Service": ["com.example.Service"]},
73
+ deadline=time.perf_counter() - 1,
74
+ )
75
+ )
76
+
77
+ assert out == []
File without changes
File without changes
File without changes
File without changes