codespine 1.0.7__tar.gz → 1.0.9__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {codespine-1.0.7 → codespine-1.0.9}/PKG-INFO +9 -9
  2. {codespine-1.0.7 → codespine-1.0.9}/README.md +8 -8
  3. {codespine-1.0.7 → codespine-1.0.9}/codespine/__init__.py +1 -1
  4. {codespine-1.0.7 → codespine-1.0.9}/codespine/cli.py +282 -30
  5. {codespine-1.0.7 → codespine-1.0.9}/codespine/config.py +4 -2
  6. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/call_resolver.py +6 -0
  7. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/engine.py +335 -235
  8. {codespine-1.0.7 → codespine-1.0.9}/codespine/mcp/server.py +3 -2
  9. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/PKG-INFO +9 -9
  10. {codespine-1.0.7 → codespine-1.0.9}/pyproject.toml +1 -1
  11. {codespine-1.0.7 → codespine-1.0.9}/tests/test_call_resolver.py +34 -0
  12. {codespine-1.0.7 → codespine-1.0.9}/tests/test_parse_resilience.py +96 -0
  13. {codespine-1.0.7 → codespine-1.0.9}/LICENSE +0 -0
  14. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/__init__.py +0 -0
  15. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/community.py +0 -0
  16. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/context.py +0 -0
  17. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/coupling.py +0 -0
  18. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/crossmodule.py +0 -0
  19. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/deadcode.py +0 -0
  20. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/flow.py +0 -0
  21. {codespine-1.0.7 → codespine-1.0.9}/codespine/analysis/impact.py +0 -0
  22. {codespine-1.0.7 → codespine-1.0.9}/codespine/cache/__init__.py +0 -0
  23. {codespine-1.0.7 → codespine-1.0.9}/codespine/cache/result_cache.py +0 -0
  24. {codespine-1.0.7 → codespine-1.0.9}/codespine/db/__init__.py +0 -0
  25. {codespine-1.0.7 → codespine-1.0.9}/codespine/db/_cypher_compat.py +0 -0
  26. {codespine-1.0.7 → codespine-1.0.9}/codespine/db/duckdb_store.py +0 -0
  27. {codespine-1.0.7 → codespine-1.0.9}/codespine/db/schema.py +0 -0
  28. {codespine-1.0.7 → codespine-1.0.9}/codespine/db/store.py +0 -0
  29. {codespine-1.0.7 → codespine-1.0.9}/codespine/diff/__init__.py +0 -0
  30. {codespine-1.0.7 → codespine-1.0.9}/codespine/diff/branch_diff.py +0 -0
  31. {codespine-1.0.7 → codespine-1.0.9}/codespine/guide.py +0 -0
  32. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/__init__.py +0 -0
  33. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/di_resolver.py +0 -0
  34. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/java_parser.py +0 -0
  35. {codespine-1.0.7 → codespine-1.0.9}/codespine/indexer/symbol_builder.py +0 -0
  36. {codespine-1.0.7 → codespine-1.0.9}/codespine/mcp/__init__.py +0 -0
  37. {codespine-1.0.7 → codespine-1.0.9}/codespine/noise/__init__.py +0 -0
  38. {codespine-1.0.7 → codespine-1.0.9}/codespine/noise/blocklist.py +0 -0
  39. {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/__init__.py +0 -0
  40. {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/git_state.py +0 -0
  41. {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/merge.py +0 -0
  42. {codespine-1.0.7 → codespine-1.0.9}/codespine/overlay/store.py +0 -0
  43. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/__init__.py +0 -0
  44. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/bm25.py +0 -0
  45. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/fuzzy.py +0 -0
  46. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/hybrid.py +0 -0
  47. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/rrf.py +0 -0
  48. {codespine-1.0.7 → codespine-1.0.9}/codespine/search/vector.py +0 -0
  49. {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/__init__.py +0 -0
  50. {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/router.py +0 -0
  51. {codespine-1.0.7 → codespine-1.0.9}/codespine/sharding/store.py +0 -0
  52. {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/__init__.py +0 -0
  53. {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/git_hook.py +0 -0
  54. {codespine-1.0.7 → codespine-1.0.9}/codespine/watch/watcher.py +0 -0
  55. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/SOURCES.txt +0 -0
  56. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/dependency_links.txt +0 -0
  57. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/entry_points.txt +0 -0
  58. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/requires.txt +0 -0
  59. {codespine-1.0.7 → codespine-1.0.9}/codespine.egg-info/top_level.txt +0 -0
  60. {codespine-1.0.7 → codespine-1.0.9}/gindex.py +0 -0
  61. {codespine-1.0.7 → codespine-1.0.9}/setup.cfg +0 -0
  62. {codespine-1.0.7 → codespine-1.0.9}/tests/test_branch_diff_normalize.py +0 -0
  63. {codespine-1.0.7 → codespine-1.0.9}/tests/test_community_detection.py +0 -0
  64. {codespine-1.0.7 → codespine-1.0.9}/tests/test_cypher_compat.py +0 -0
  65. {codespine-1.0.7 → codespine-1.0.9}/tests/test_deadcode.py +0 -0
  66. {codespine-1.0.7 → codespine-1.0.9}/tests/test_duckdb_store.py +0 -0
  67. {codespine-1.0.7 → codespine-1.0.9}/tests/test_index_and_hybrid.py +0 -0
  68. {codespine-1.0.7 → codespine-1.0.9}/tests/test_java_parser.py +0 -0
  69. {codespine-1.0.7 → codespine-1.0.9}/tests/test_multimodule_index.py +0 -0
  70. {codespine-1.0.7 → codespine-1.0.9}/tests/test_overlay.py +0 -0
  71. {codespine-1.0.7 → codespine-1.0.9}/tests/test_result_cache.py +0 -0
  72. {codespine-1.0.7 → codespine-1.0.9}/tests/test_search_ranking.py +0 -0
  73. {codespine-1.0.7 → codespine-1.0.9}/tests/test_sharding.py +0 -0
  74. {codespine-1.0.7 → codespine-1.0.9}/tests/test_store_recovery.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: codespine
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: Local Java code intelligence indexer backed by a graph database
5
5
  Author: CodeSpine contributors
6
6
  License: MIT License
@@ -124,8 +124,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
124
124
  codespine analyse /path/to/java-project
125
125
 
126
126
  # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
127
- # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
128
- codespine analyse /path/to/java-project --deep
127
+ codespine analyse /path/to/java-project --complete --deep
129
128
 
130
129
  # 3. (Optional) Add semantic embeddings for concept-level search
131
130
  codespine analyse /path/to/java-project --embed
@@ -313,8 +312,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
313
312
  # Indexing
314
313
  codespine analyse <path> # incremental index (default)
315
314
  codespine analyse <path> --full # full re-index from scratch
316
- codespine analyse <path> --deep # + communities, flows, dead code, coupling
317
- codespine analyse <path> --incremental-deep # incremental index + force deep passes
315
+ codespine analyse <path> --budget 90 # fast index with a resolver deadline
316
+ codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
317
+ codespine analyse <path> --complete --incremental-deep
318
318
  codespine analyse <path> --embed # + vector embeddings
319
319
 
320
320
  # Live watch
@@ -360,7 +360,7 @@ codespine force-reset # emergency: delete all data files
360
360
 
361
361
  `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
362
362
 
363
- Deep analysis (`--deep`) now runs automatically for repos with 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
363
+ `analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
364
364
 
365
365
  ---
366
366
 
@@ -546,12 +546,12 @@ The deep analysis phase covers four passes that are expensive but optional:
546
546
  | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
547
547
  | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
548
548
 
549
- **Auto-threshold:** deep analysis runs automatically when the project has 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
549
+ **Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
550
550
 
551
- **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
551
+ **Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
552
552
 
553
553
  ```bash
554
- codespine analyse . --incremental-deep
554
+ codespine analyse . --complete --incremental-deep
555
555
  ```
556
556
 
557
557
  **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
@@ -59,8 +59,7 @@ Downloads and caches the embedding model. Only needed once. After this, `--embed
59
59
  codespine analyse /path/to/java-project
60
60
 
61
61
  # 2. (Optional) Run the expensive deep passes: communities, flows, dead code, coupling
62
- # Auto-enabled for repos with ≤ 3,000 files; use --deep to force on larger repos.
63
- codespine analyse /path/to/java-project --deep
62
+ codespine analyse /path/to/java-project --complete --deep
64
63
 
65
64
  # 3. (Optional) Add semantic embeddings for concept-level search
66
65
  codespine analyse /path/to/java-project --embed
@@ -248,8 +247,9 @@ Higher-level tools designed to answer full agent questions in a single call, wit
248
247
  # Indexing
249
248
  codespine analyse <path> # incremental index (default)
250
249
  codespine analyse <path> --full # full re-index from scratch
251
- codespine analyse <path> --deep # + communities, flows, dead code, coupling
252
- codespine analyse <path> --incremental-deep # incremental index + force deep passes
250
+ codespine analyse <path> --budget 90 # fast index with a resolver deadline
251
+ codespine analyse <path> --complete --deep # + communities, flows, dead code, coupling
252
+ codespine analyse <path> --complete --incremental-deep
253
253
  codespine analyse <path> --embed # + vector embeddings
254
254
 
255
255
  # Live watch
@@ -295,7 +295,7 @@ codespine force-reset # emergency: delete all data files
295
295
 
296
296
  `analyse` defaults to incremental mode. Repeat runs only process changed files and are fast.
297
297
 
298
- Deep analysis (`--deep`) now runs automatically for repos with 3,000 files. For larger repos, pass `--deep` explicitly. Use `--incremental-deep` when you want a fast file-only update but still want communities, flows, dead code, and coupling refreshed.
298
+ `analyse` runs in fast mode by default: it indexes the core graph, publishes that read replica from a detached process, then continues communities, flows, dead code, coupling, and cross-module enrichment in the background. Use `--complete --deep` when you want those passes refreshed before the command returns.
299
299
 
300
300
  ---
301
301
 
@@ -481,12 +481,12 @@ The deep analysis phase covers four passes that are expensive but optional:
481
481
  | Dead code | Finds methods with no callers (Java-aware exemptions) | Cleanup audits |
482
482
  | Change coupling | Analyses git history for co-changed file pairs | `get_change_coupling`, `related` |
483
483
 
484
- **Auto-threshold:** deep analysis runs automatically when the project has 3,000 Java files. Larger repos get lightweight flow/dead-code passes; full deep analysis requires `--deep`.
484
+ **Fast default:** `codespine analyse` prioritizes a queryable core index. Communities, flows, dead-code, git coupling, and cross-module links are queued in a detached background enrichment job unless you use `--complete`.
485
485
 
486
- **Incremental deep:** `--incremental-deep` combines incremental file indexing with a forced full deep pass — useful after large refactors where you want the call graph refreshed quickly but also want updated communities and coupling.
486
+ **Complete deep:** `--complete --deep` runs the expensive enrichment passes before returning. `--complete --incremental-deep` combines incremental file indexing with a forced full deep pass.
487
487
 
488
488
  ```bash
489
- codespine analyse . --incremental-deep
489
+ codespine analyse . --complete --incremental-deep
490
490
  ```
491
491
 
492
492
  **Embeddings** (`--embed`) are independent of deep analysis. Without them, BM25 + fuzzy search still works. Add embeddings when you need concept-level retrieval ("find retry logic", "find payment processing").
@@ -1,4 +1,4 @@
1
1
  """CodeSpine package."""
2
2
 
3
3
  __all__ = ["__version__"]
4
- __version__ = "1.0.7"
4
+ __version__ = "1.0.9"
@@ -66,6 +66,24 @@ def _open_store(read_only: bool = True) -> ShardedGraphStore:
66
66
  return ShardedGraphStore(read_only=read_only)
67
67
 
68
68
 
69
+ def _spawn_background_enrichment(path: str) -> bool:
70
+ """Publish the fast index, then enrich it in a detached process."""
71
+ try:
72
+ subprocess.Popen(
73
+ [sys.executable, "-m", "codespine.cli", "enrich-background", path],
74
+ stdin=subprocess.DEVNULL,
75
+ stdout=subprocess.DEVNULL,
76
+ stderr=subprocess.DEVNULL,
77
+ start_new_session=True,
78
+ cwd=os.getcwd(),
79
+ env=os.environ.copy(),
80
+ )
81
+ return True
82
+ except Exception as exc: # noqa: BLE001
83
+ LOGGER.warning("Unable to spawn background enrichment: %s", exc)
84
+ return False
85
+
86
+
69
87
  def _db_size_bytes(path: str) -> int:
70
88
  if os.path.isfile(path):
71
89
  return os.path.getsize(path)
@@ -110,6 +128,7 @@ def _index_shard_group(
110
128
  sg,
111
129
  full: bool,
112
130
  embed: bool,
131
+ deadline: float | None,
113
132
  output_lock: threading.Lock,
114
133
  parallel: bool,
115
134
  ) -> tuple[int, list, int]:
@@ -145,6 +164,10 @@ def _index_shard_group(
145
164
  }
146
165
  call_state: dict = {"shown": False, "count": 0, "last_ts": 0.0,
147
166
  "started_at": 0.0}
167
+ db_state: dict = {
168
+ "shown": False, "done": 0, "total": 0, "last_ts": 0.0,
169
+ "started_at": 0.0,
170
+ }
148
171
 
149
172
  def _progress(event: str, payload: dict) -> None:
150
173
  now = time.perf_counter()
@@ -237,11 +260,101 @@ def _index_shard_group(
237
260
  parse_state["shown"] = True
238
261
  parse_state["last_ts"] = now
239
262
  return
240
- if event in ("resolve_calls_start",):
263
+ if event == "db_write_start":
241
264
  if parse_state["shown"]:
242
265
  with output_lock:
243
266
  click.echo()
244
267
  parse_state["shown"] = False
268
+ total = int(payload.get("total", 0))
269
+ deleted = int(payload.get("deleted_files", 0))
270
+ db_state["done"] = 0
271
+ db_state["total"] = total
272
+ db_state["started_at"] = now
273
+ status = f"starting ({total} files"
274
+ if deleted:
275
+ status += f", {deleted} deleted"
276
+ status += ")"
277
+ with output_lock:
278
+ _phase(f"{prefix}Writing index...", status)
279
+ return
280
+ if event == "db_write_heartbeat":
281
+ done = int(payload.get("done", 0))
282
+ total = int(payload.get("total", 0))
283
+ classes = int(payload.get("classes", 0))
284
+ methods = int(payload.get("methods", 0))
285
+ phase = str(payload.get("phase", "writing"))
286
+ elapsed_s = float(payload.get("elapsed", 0.0))
287
+ db_state["done"] = done
288
+ db_state["total"] = total
289
+ if not parallel:
290
+ click.echo(
291
+ f"\r{_spinner_char()} {prefix}Writing index... "
292
+ f"{_bar(done, total)} {done}/{total} "
293
+ f"{classes} classes / {methods} methods "
294
+ f"{phase[:18]:<18} {elapsed_s:.0f}s ",
295
+ nl=False,
296
+ )
297
+ else:
298
+ with output_lock:
299
+ click.echo(
300
+ f"\r{prefix}Writing {done}/{total} "
301
+ f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
302
+ nl=False,
303
+ )
304
+ db_state["shown"] = True
305
+ db_state["last_ts"] = now
306
+ return
307
+ if event == "db_write_progress":
308
+ done = int(payload.get("done", 0))
309
+ total = int(payload.get("total", 0))
310
+ classes = int(payload.get("classes", 0))
311
+ methods = int(payload.get("methods", 0))
312
+ phase = str(payload.get("phase", "writing"))
313
+ db_state["done"] = done
314
+ db_state["total"] = total
315
+ if total == 0 and done == 0:
316
+ return
317
+ if done == total or (now - db_state["last_ts"]) >= 0.25:
318
+ elapsed_s = now - db_state["started_at"]
319
+ if not parallel:
320
+ click.echo(
321
+ f"\r{_spinner_char()} {prefix}Writing index... "
322
+ f"{_bar(done, total)} {done}/{total} "
323
+ f"{classes} classes / {methods} methods "
324
+ f"{phase[:18]:<18} {elapsed_s:.0f}s ",
325
+ nl=False,
326
+ )
327
+ else:
328
+ with output_lock:
329
+ click.echo(
330
+ f"\r{prefix}Writing {done}/{total} "
331
+ f"({classes} classes, {methods} methods, {elapsed_s:.0f}s) ",
332
+ nl=False,
333
+ )
334
+ db_state["shown"] = True
335
+ db_state["last_ts"] = now
336
+ return
337
+ if event == "db_write_done":
338
+ if db_state["shown"]:
339
+ with output_lock:
340
+ click.echo()
341
+ db_state["shown"] = False
342
+ files = int(payload.get("files_indexed", db_state["done"]))
343
+ classes = int(payload.get("classes", 0))
344
+ methods = int(payload.get("methods", 0))
345
+ elapsed_s = float(payload.get("elapsed", 0.0))
346
+ with output_lock:
347
+ _phase(
348
+ f"{prefix}Writing index...",
349
+ f"{files} files, {classes} classes, {methods} methods ({elapsed_s:.1f}s)",
350
+ )
351
+ return
352
+ if event in ("resolve_calls_start",):
353
+ if parse_state["shown"] or db_state["shown"]:
354
+ with output_lock:
355
+ click.echo()
356
+ parse_state["shown"] = False
357
+ db_state["shown"] = False
245
358
  call_state["started_at"] = now
246
359
  with output_lock:
247
360
  _phase(f"{prefix}Tracing calls...", "starting...")
@@ -287,8 +400,9 @@ def _index_shard_group(
287
400
  call_state["shown"] = False
288
401
  elapsed_s = (now - call_state["started_at"]) if call_state["started_at"] else 0.0
289
402
  n = int(payload.get("calls_resolved", 0))
403
+ suffix = " partial" if payload.get("partial") else ""
290
404
  with output_lock:
291
- _phase(f"{prefix}Tracing calls...", f"{n} calls resolved ({elapsed_s:.1f}s)")
405
+ _phase(f"{prefix}Tracing calls...", f"{n} calls resolved{suffix} ({elapsed_s:.1f}s)")
292
406
  return
293
407
  if event == "resolve_types_start":
294
408
  with output_lock:
@@ -296,20 +410,26 @@ def _index_shard_group(
296
410
  return
297
411
  if event == "resolve_types_done":
298
412
  n = int(payload.get("type_relationships", 0))
413
+ suffix = " partial" if payload.get("partial") else ""
299
414
  with output_lock:
300
- _phase(f"{prefix}Analyzing types...", f"{n} type relationships")
415
+ _phase(f"{prefix}Analyzing types...", f"{n} type relationships{suffix}")
301
416
  return
302
417
 
303
418
  shard_store = sg.shard(project_id)
304
419
  indexer = JavaIndexer(shard_store)
305
420
  result = indexer.index_project(
306
- mod_path, full=full, progress=_progress, project_id=project_id, embed=embed
421
+ mod_path,
422
+ full=full,
423
+ progress=_progress,
424
+ project_id=project_id,
425
+ embed=embed,
426
+ deadline=deadline,
307
427
  )
308
428
  results.append(result)
309
429
  total_files += result.files_found
310
430
 
311
431
  # Flush any dangling progress line.
312
- if parse_state["shown"]:
432
+ if parse_state["shown"] or db_state["shown"]:
313
433
  with output_lock:
314
434
  click.echo()
315
435
 
@@ -372,7 +492,21 @@ def main() -> None:
372
492
  @main.command()
373
493
  @click.argument("path", type=click.Path(exists=True))
374
494
  @click.option("--full/--incremental", default=False, show_default=True)
375
- @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses (auto-on for repos ≤3 k files).")
495
+ @click.option("--deep/--no-deep", default=False, show_default=True, help="Run expensive global analyses when used with --complete.")
496
+ @click.option(
497
+ "--fast/--complete",
498
+ default=True,
499
+ show_default=True,
500
+ help="Fast mode returns after the core index is queryable; complete mode runs enrichment in the foreground.",
501
+ )
502
+ @click.option(
503
+ "--budget",
504
+ "budget_seconds",
505
+ default=90.0,
506
+ show_default=True,
507
+ type=float,
508
+ help="Foreground time budget in seconds for fast mode; use 0 to disable the resolver deadline.",
509
+ )
376
510
  @click.option(
377
511
  "--incremental-deep",
378
512
  is_flag=True,
@@ -381,17 +515,25 @@ def main() -> None:
381
515
  )
382
516
  @click.option(
383
517
  "--embed/--no-embed",
384
- default=True,
518
+ default=False,
385
519
  show_default=True,
386
- help="Generate vector embeddings. Uses sentence-transformers if installed (pip install codespine[ml]), otherwise falls back to hash-based vectors.",
520
+ help="Generate vector embeddings. Off by default so analyse stays fast; rerun with --embed when semantic vectors are needed.",
387
521
  )
388
522
  @click.option("--allow-running", is_flag=True, hidden=True, help="Skip MCP running check (used by MCP analyse_project tool).")
389
- def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bool, allow_running: bool) -> None:
523
+ def analyse(
524
+ path: str,
525
+ full: bool,
526
+ deep: bool,
527
+ fast: bool,
528
+ budget_seconds: float,
529
+ incremental_deep: bool,
530
+ embed: bool,
531
+ allow_running: bool,
532
+ ) -> None:
390
533
  """Index a local Java project (auto-detects workspace / Maven / Gradle layout).
391
534
 
392
- Embeddings are generated by default. If sentence-transformers is installed
393
- (pip install codespine[ml]), high-quality semantic vectors are used; otherwise
394
- a fast hash-based fallback provides basic vector search.
535
+ Fast mode indexes the core Java graph and returns quickly. Use --complete
536
+ for foreground communities, flows, dead-code, and git-coupling enrichment.
395
537
  """
396
538
  if not allow_running and _is_running():
397
539
  click.secho("Stop MCP first ('codespine stop') to index.", fg="yellow")
@@ -400,6 +542,18 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
400
542
  started = time.perf_counter()
401
543
  abs_path = os.path.abspath(path)
402
544
 
545
+ if fast and (deep or incremental_deep):
546
+ click.secho(
547
+ "Fast mode runs deep analysis in the background. Use --complete --deep to wait for it.",
548
+ fg="yellow",
549
+ )
550
+
551
+ budget_deadline = (
552
+ started + budget_seconds
553
+ if fast and budget_seconds and budget_seconds > 0
554
+ else None
555
+ )
556
+
403
557
  # Warn about hash fallback early so users know to install [ml]
404
558
  if embed:
405
559
  from codespine.search.vector import _load_model
@@ -516,7 +670,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
516
670
  for s_idx, group in shard_groups.items():
517
671
  f = ex.submit(
518
672
  _index_shard_group,
519
- s_idx, group, sg, full, embed, output_lock, True,
673
+ s_idx, group, sg, full, embed, budget_deadline, output_lock, True,
520
674
  )
521
675
  futures_map[f] = s_idx
522
676
 
@@ -538,7 +692,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
538
692
  only_shard_idx = next(iter(shard_groups))
539
693
  only_group = shard_groups[only_shard_idx]
540
694
  _, all_results, total_files_found = _index_shard_group(
541
- only_shard_idx, only_group, sg, full, embed, output_lock, False,
695
+ only_shard_idx, only_group, sg, full, embed, budget_deadline, output_lock, False,
542
696
  )
543
697
  if all_results:
544
698
  last_result = all_results[-1]
@@ -558,7 +712,9 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
558
712
  root_shard_store = sg.shard(root_project_id)
559
713
 
560
714
  # ── Cross-module call linking ──────────────────────────────────────
561
- if is_multi and len(modules_with_ids) > 1:
715
+ if fast and is_multi and len(modules_with_ids) > 1:
716
+ _phase("Cross-module linking...", "skipped (fast mode; use --complete)")
717
+ elif is_multi and len(modules_with_ids) > 1:
562
718
  xmod_label = "Cross-module linking..."
563
719
  _live_phase(xmod_label, "running")
564
720
  xmod_pids = [pid for _, pid in modules_with_ids]
@@ -575,7 +731,7 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
575
731
  dead: list[dict] = []
576
732
  coupling_pairs: list[dict] = []
577
733
 
578
- should_run_deep = deep or incremental_deep or total_files_found <= 3000
734
+ should_run_deep = (not fast) and (deep or incremental_deep or total_files_found <= 3000)
579
735
  if should_run_deep:
580
736
  comm_label = "Detecting communities..."
581
737
  _live_phase(comm_label, "running")
@@ -613,6 +769,11 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
613
769
  progress=lambda s: _live_phase(coup_label, s),
614
770
  )
615
771
  _finish_phase(coup_label, f"{len(coupling_pairs)} coupled file pairs")
772
+ elif fast:
773
+ _phase("Detecting communities...", "queued in background")
774
+ _phase("Detecting execution flows...", "queued in background")
775
+ _phase("Finding dead code...", "queued in background")
776
+ _phase("Analyzing git history...", "queued in background")
616
777
  else:
617
778
  # Run lightweight versions of flow tracing and dead code from the call
618
779
  # graph already built — no community detection or coupling (those are
@@ -674,30 +835,121 @@ def analyse(path: str, full: bool, deep: bool, incremental_deep: bool, embed: bo
674
835
  fg="green",
675
836
  )
676
837
 
677
- # Detect unresolved imports → hint about unindexed sibling projects
678
- try:
679
- unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
680
- if unresolved:
681
- click.echo()
682
- click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
683
- for pkg, samples in sorted(unresolved.items())[:8]:
684
- click.echo(f" {pkg} (e.g. {samples[0]})")
685
- except Exception:
686
- pass # best-effort
838
+ # Detect unresolved imports → hint about unindexed sibling projects.
839
+ # This is useful, but it is still another global query, so fast mode leaves
840
+ # it out of the foreground path.
841
+ if not fast:
842
+ try:
843
+ unresolved = JavaIndexer.detect_unresolved_imports(root_shard_store)
844
+ if unresolved:
845
+ click.echo()
846
+ click.secho("⚠ Unresolved imports — consider indexing these projects:", fg="yellow")
847
+ for pkg, samples in sorted(unresolved.items())[:8]:
848
+ click.echo(f" {pkg} (e.g. {samples[0]})")
849
+ except Exception:
850
+ pass # best-effort
687
851
 
688
852
  # Publish a read replica so MCP and read-only CLI commands (search, stats…)
689
853
  # run against an isolated snapshot rather than competing with the write
690
854
  # process's buffer pool. Snapshot all open shards concurrently.
691
855
  snap_label = "Publishing read replica..."
692
- _live_phase(snap_label, "copying")
693
- root_shard_store._recycle_conn()
694
- sg.snapshot_all(background=False)
695
- _finish_phase(snap_label, "MCP will reload automatically")
856
+ for store in sg.open_shards():
857
+ recycle = getattr(store, "_recycle_conn", None)
858
+ if callable(recycle):
859
+ recycle()
860
+ if fast and _spawn_background_enrichment(abs_path):
861
+ _phase(snap_label, "core snapshot now; enrichment continues in background")
862
+ else:
863
+ _live_phase(snap_label, "copying")
864
+ sg.snapshot_all(background=False)
865
+ _finish_phase(snap_label, "MCP will reload automatically")
696
866
 
697
867
  # Restore original SIGINT handler now that we've finished cleanly.
698
868
  signal.signal(signal.SIGINT, _old_sigint_handler)
699
869
 
700
870
 
871
+ @main.command("publish-snapshot", hidden=True)
872
+ def publish_snapshot() -> None:
873
+ """Publish sharded read replicas for a recently completed analyse run."""
874
+ sg = ShardedGraphStore(read_only=False)
875
+ sg.snapshot_all(background=False)
876
+
877
+
878
+ @main.command("enrich-background", hidden=True)
879
+ @click.argument("path", type=click.Path(exists=True))
880
+ def enrich_background(path: str) -> None:
881
+ """Run expensive post-index graph enrichment outside the analyse foreground."""
882
+ abs_path = os.path.abspath(path)
883
+ LOGGER.info("Background enrichment starting for %s", abs_path)
884
+
885
+ project_roots = JavaIndexer.detect_projects_in_workspace(abs_path)
886
+ modules_with_ids: list[tuple[str, str]] = []
887
+ for proj_root in project_roots:
888
+ proj_name = os.path.basename(proj_root)
889
+ module_dirs = JavaIndexer.detect_modules(proj_root)
890
+ is_multi_module = not (len(module_dirs) == 1 and module_dirs[0] == proj_root)
891
+ if is_multi_module:
892
+ for m in module_dirs:
893
+ modules_with_ids.append((m, f"{proj_name}::{os.path.basename(m)}"))
894
+ else:
895
+ modules_with_ids.append((proj_root, proj_name))
896
+
897
+ root_basename = os.path.basename(abs_path)
898
+ root_project_id = modules_with_ids[-1][1] if modules_with_ids else root_basename
899
+ is_multi = len(modules_with_ids) > 1
900
+ xmod_pids = [pid for _, pid in modules_with_ids]
901
+
902
+ sg = ShardedGraphStore(read_only=False)
903
+ root_shard_store = sg.shard(root_project_id)
904
+
905
+ try:
906
+ # Publish the fast core graph first so MCP/search can use it while the
907
+ # more expensive enrichment keeps working.
908
+ sg.snapshot_all(background=False)
909
+
910
+ if is_multi and len(xmod_pids) > 1:
911
+ xmod_edges = link_cross_module_calls(
912
+ root_shard_store,
913
+ project_ids=xmod_pids,
914
+ progress=lambda s: LOGGER.info("Cross-module linking: %s", s),
915
+ )
916
+ LOGGER.info("Background cross-module linking wrote %d edges", xmod_edges)
917
+
918
+ communities = detect_communities(
919
+ root_shard_store,
920
+ progress=lambda s: LOGGER.info("Community detection: %s", s),
921
+ )
922
+ LOGGER.info("Background community detection found %d clusters", len(communities))
923
+
924
+ flows = trace_execution_flows(
925
+ root_shard_store,
926
+ progress=lambda s: LOGGER.info("Execution flow tracing: %s", s),
927
+ )
928
+ LOGGER.info("Background flow tracing found %d flows", len(flows))
929
+
930
+ dead = detect_dead_code(root_shard_store, limit=500)
931
+ LOGGER.info("Background dead-code scan found %d candidates", _dead_result_count(dead))
932
+
933
+ root_shard_store.clear_coupling()
934
+ coupling_project = root_basename if is_multi else root_project_id
935
+ coupling_pairs = compute_coupling(
936
+ root_shard_store,
937
+ abs_path,
938
+ coupling_project,
939
+ days=SETTINGS.default_coupling_days,
940
+ min_strength=SETTINGS.default_min_coupling_strength,
941
+ min_cochanges=SETTINGS.default_min_cochanges,
942
+ progress=lambda s: LOGGER.info("Git coupling: %s", s),
943
+ )
944
+ LOGGER.info("Background coupling analysis found %d pairs", len(coupling_pairs))
945
+
946
+ sg.snapshot_all(background=False)
947
+ LOGGER.info("Background enrichment finished for %s", abs_path)
948
+ except Exception as exc: # noqa: BLE001
949
+ LOGGER.exception("Background enrichment failed for %s: %s", abs_path, exc)
950
+ raise
951
+
952
+
701
953
  @main.command()
702
954
  @click.argument("query")
703
955
  @click.option("--k", default=20, show_default=True, type=int)
@@ -29,8 +29,10 @@ class Settings:
29
29
  rrf_k: int = 60
30
30
  semantic_candidate_pool: int = 2000
31
31
  write_batch_size: int = 500
32
- index_file_batch_size: int = 20
33
- edge_write_batch_size: int = 500
32
+ index_file_batch_size: int = 200
33
+ index_method_batch_size: int = 2000
34
+ index_symbol_batch_size: int = 2000
35
+ edge_write_batch_size: int = 5000
34
36
  default_coupling_days: int = 5
35
37
  default_min_coupling_strength: float = 0.3
36
38
  default_min_cochanges: int = 3
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import time
3
4
  from collections import defaultdict
4
5
  from typing import Iterator
5
6
 
@@ -58,6 +59,7 @@ def resolve_calls(
58
59
  class_catalog: dict[str, list[str]],
59
60
  *,
60
61
  scan_counter: list[int] | None = None,
62
+ deadline: float | None = None,
61
63
  ) -> Iterator[tuple[str, str, float, str]]:
62
64
  """Resolve call names to known method ids.
63
65
 
@@ -84,6 +86,8 @@ def resolve_calls(
84
86
  class_method_index_by_fqcn[class_fqcn][key].append(method_id)
85
87
 
86
88
  for source_id, call_sites in calls.items():
89
+ if deadline is not None and time.perf_counter() >= deadline:
90
+ return
87
91
  if scan_counter is not None:
88
92
  scan_counter[0] += 1
89
93
  src_meta = method_catalog.get(source_id, {})
@@ -94,6 +98,8 @@ def resolve_calls(
94
98
  field_types = src_ctx.get("field_types", {}) or {}
95
99
 
96
100
  for call in call_sites:
101
+ if deadline is not None and time.perf_counter() >= deadline:
102
+ return
97
103
  call_name = call.name
98
104
 
99
105
  key = (call_name, int(call.arg_count))