java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graph_enrich.py CHANGED
@@ -334,7 +334,7 @@ def collect_annotation_meta_chain(
334
334
  ) -> dict[str, frozenset[str]]:
335
335
  """Map annotation simple name → built-in simple names reachable via meta-annotations.
336
336
 
337
- Single source of truth for Layer A: both the Kuzu writer and Lance chunk
337
+ Single source of truth for Layer A: both the LadybugDB writer and Lance chunk
338
338
  enrichment must use this; they must not derive `meta_chain` from separate
339
339
  filesystem walks. See ``PLAN-BROWNFIELD-ROLE-OVERRIDES`` §
340
340
  *Single source of truth (REQUIRED — read before implementation)*.
@@ -350,7 +350,7 @@ def annotation_meta_decls_from_graph_tables(
350
350
  """From `build_ast_graph.GraphTables.types`, map @interface simple name -> meta anns.
351
351
 
352
352
  Used for diagnostics; Layer A in production uses `collect_annotation_meta_chain`
353
- (disk) so Kuzu and Lance share one index.
353
+ (disk) so LadybugDB and Lance share one index.
354
354
  """
355
355
  decls: dict[str, tuple[str, ...]] = {}
356
356
  first_fqn: dict[str, str] = {}
@@ -1702,7 +1702,7 @@ def enrich_chunk(
1702
1702
 
1703
1703
 
1704
1704
  def symbol_id(kind: str, fqn: str, file_path: str = "", start_byte: int = 0) -> str:
1705
- """Deterministic SHA1-based id for Kuzu Symbol nodes."""
1705
+ """Deterministic SHA1-based id for LadybugDB Symbol nodes."""
1706
1706
  key = f"{kind}|{fqn}|{file_path}|{start_byte}".encode("utf-8")
1707
1707
  return hashlib.sha1(key).hexdigest()
1708
1708
 
@@ -0,0 +1,48 @@
1
+ """Raise the process soft file-descriptor limit to avoid LanceDB EMFILE.
2
+
3
+ LanceDB's merge-insert path opens many file handles concurrently; under the
4
+ default OS soft ``RLIMIT_NOFILE`` (256 on macOS processes launched by GUI /
5
+ launchd / IDE hosts, *not* the shell's raised limit) this exhausts file
6
+ descriptors and surfaces as::
7
+
8
+ RuntimeError: lance error: LanceError(IO): ... Too many open files (os error 24)
9
+ lance-io-4.0.0/src/local.rs:133:24
10
+
11
+ ``raise_fd_limit`` raises the process's *own* soft limit toward its hard limit.
12
+ ``RLIMIT_NOFILE`` is inherited across ``fork``+``exec``, so every CocoIndex /
13
+ ``cocoindex-code`` child spawned afterwards inherits the headroom. This fixes the
14
+ failure regardless of launch context (shell vs IDE vs MCP host) and regardless of
15
+ Lance's internal IO concurrency.
16
+
17
+ Never raise to ``RLIM_INFINITY`` — that breaks ``select()``/kqueue and Python
18
+ selectors on macOS; ``cap`` bounds the target to a safe value.
19
+
20
+ See https://github.com/HumanBean17/java-codebase-rag/issues/306
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import resource
26
+
27
+ # Safe ceiling well above LanceDB's appetite, comfortably below macOS libc
28
+ # quirks. The hard limit caps it further if lower (locked-down servers).
29
+ _DEFAULT_CAP = 65536
30
+
31
+
32
+ def raise_fd_limit(cap: int = _DEFAULT_CAP) -> None:
33
+ """Raise this process's soft ``RLIMIT_NOFILE`` toward its hard limit.
34
+
35
+ Best-effort and silent: never raises. No-op where ``RLIMIT_NOFILE`` is
36
+ unsupported (Windows) or where the soft limit already meets ``min(hard, cap)``.
37
+ """
38
+ if not hasattr(resource, "RLIMIT_NOFILE"):
39
+ return
40
+ soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE)
41
+ target = min(hard, cap)
42
+ if soft >= target:
43
+ return
44
+ try:
45
+ resource.setrlimit(resource.RLIMIT_NOFILE, (target, hard))
46
+ except (ValueError, OSError):
47
+ # Best-effort: a locked-down environment shouldn't fail the run.
48
+ pass
java_codebase_rag/cli.py CHANGED
@@ -21,13 +21,14 @@ from java_codebase_rag.config import (
21
21
  index_dir_has_existing_artifacts,
22
22
  resolve_operator_config,
23
23
  )
24
+ from java_codebase_rag._fdlimit import raise_fd_limit
24
25
  from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update, run_incremental_graph
25
26
  from java_ontology import VALID_UNRESOLVED_CALL_REASONS
26
27
 
27
- KUZU_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
28
+ LADYBUG_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
28
29
 
29
30
  _INCREMENT_WARNING_LINES = (
30
- "WARNING: AST graph (Kuzu) incremental rebuild is not yet implemented.",
31
+ "WARNING: AST graph (LadybugDB) incremental rebuild is not yet implemented.",
31
32
  "The graph reflects the index state from the last `init` or `reprocess`,",
32
33
  "which means `find`, `neighbors`, and `describe` may return stale results",
33
34
  "for files changed since then.",
@@ -37,8 +38,8 @@ _INCREMENT_WARNING_LINES = (
37
38
  "For an up-to-date graph, run:",
38
39
  " java-codebase-rag reprocess",
39
40
  "",
40
- "Track progress on Kuzu incremental rebuild:",
41
- f" {KUZU_INCREMENTAL_TRACKING_ISSUE_URL}",
41
+ "Track progress on LadybugDB incremental rebuild:",
42
+ f" {LADYBUG_INCREMENTAL_TRACKING_ISSUE_URL}",
42
43
  )
43
44
 
44
45
  _REFRESH_DEPRECATION = (
@@ -47,7 +48,7 @@ _REFRESH_DEPRECATION = (
47
48
  )
48
49
 
49
50
  _REPROCESS_DRIFT_VECTORS_ONLY = (
50
- "java-codebase-rag reprocess: rebuilt vectors only; graph (code_graph.kuzu) was NOT rebuilt "
51
+ "java-codebase-rag reprocess: rebuilt vectors only; graph (code_graph.lbug) was NOT rebuilt "
51
52
  "and may now reflect a stale source snapshot."
52
53
  )
53
54
 
@@ -178,7 +179,7 @@ def _emit(value: Any) -> None:
178
179
  print(json.dumps(payload, default=_jsonable, sort_keys=True, indent=None))
179
180
 
180
181
 
181
- def _emit_increment_kuzu_warning() -> None:
182
+ def _emit_increment_ladybug_warning() -> None:
182
183
  for line in _INCREMENT_WARNING_LINES:
183
184
  print(line, file=sys.stderr)
184
185
 
@@ -289,7 +290,7 @@ def _cmd_init(args: argparse.Namespace) -> int:
289
290
  print(file=sys.stderr, flush=True)
290
291
  g = run_build_ast_graph(
291
292
  source_root=cfg.source_root,
292
- kuzu_path=cfg.kuzu_path,
293
+ ladybug_path=cfg.ladybug_path,
293
294
  verbose=verbose,
294
295
  quiet=bool(args.quiet),
295
296
  env=env,
@@ -319,7 +320,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
319
320
  # Check for --vectors-only flag
320
321
  vectors_only = bool(getattr(args, "vectors_only", False))
321
322
  if vectors_only:
322
- _emit_increment_kuzu_warning()
323
+ _emit_increment_ladybug_warning()
323
324
 
324
325
  def work() -> int:
325
326
  env = cfg.subprocess_env()
@@ -350,7 +351,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
350
351
  # Run incremental graph update
351
352
  g = run_incremental_graph(
352
353
  source_root=cfg.source_root,
353
- kuzu_path=cfg.kuzu_path,
354
+ ladybug_path=cfg.ladybug_path,
354
355
  verbose=bool(args.verbose),
355
356
  quiet=bool(args.quiet),
356
357
  env=env,
@@ -437,7 +438,7 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
437
438
  if graph_only:
438
439
  g = run_build_ast_graph(
439
440
  source_root=cfg.source_root,
440
- kuzu_path=cfg.kuzu_path,
441
+ ladybug_path=cfg.ladybug_path,
441
442
  verbose=verbose,
442
443
  quiet=bool(args.quiet),
443
444
  env=env,
@@ -509,7 +510,7 @@ def _cmd_erase(args: argparse.Namespace) -> int:
509
510
  cfg = _resolved_from_ns(args)
510
511
  _startup_hints(cfg)
511
512
  cfg.apply_to_os_environ()
512
- to_describe: list[Path] = [cfg.kuzu_path, cfg.cocoindex_db]
513
+ to_describe: list[Path] = [cfg.ladybug_path, cfg.cocoindex_db]
513
514
  if cfg.index_dir.is_dir():
514
515
  try:
515
516
  import lancedb
@@ -546,8 +547,8 @@ def _cmd_erase(args: argparse.Namespace) -> int:
546
547
  )
547
548
  elif drop.returncode != 0:
548
549
  print(clip(drop.stderr, 4000), file=sys.stderr)
549
- if cfg.kuzu_path.exists():
550
- shutil.rmtree(cfg.kuzu_path, ignore_errors=True)
550
+ if cfg.ladybug_path.exists():
551
+ shutil.rmtree(cfg.ladybug_path, ignore_errors=True)
551
552
  if cfg.cocoindex_db.exists():
552
553
  try:
553
554
  cfg.cocoindex_db.unlink()
@@ -577,17 +578,17 @@ def _cmd_meta(args: argparse.Namespace) -> int:
577
578
  cfg = _resolved_from_ns(args)
578
579
  _startup_hints(cfg)
579
580
  cfg.apply_to_os_environ()
580
- from kuzu_queries import KuzuGraph # lazy
581
+ from ladybug_queries import LadybugGraph # lazy
581
582
 
582
- KuzuGraph._instance = None
583
- KuzuGraph._instance_path = None
583
+ LadybugGraph._instance = None
584
+ LadybugGraph._instance_path = None
584
585
  payload = server._graph_meta_output().model_dump()
585
586
  payload["embedding_model"] = cfg.embedding_model
586
587
  payload["embedding_device"] = cfg.embedding_device
587
588
  payload["embedding_model_source"] = cfg.embedding_model_source
588
589
  payload["embedding_device_source"] = cfg.embedding_device_source
589
590
  payload["index_dir"] = str(cfg.index_dir.resolve())
590
- payload["kuzu_path"] = str(cfg.kuzu_path.resolve())
591
+ payload["ladybug_path"] = str(cfg.ladybug_path.resolve())
591
592
  payload["index_dir_source"] = cfg.index_dir_source
592
593
  payload["hints_enabled"] = cfg.hints_enabled
593
594
  payload["hints_enabled_source"] = cfg.hints_enabled_source
@@ -637,12 +638,12 @@ def _cmd_unresolved_calls_list(args: argparse.Namespace) -> int:
637
638
  cfg = _resolved_from_ns(args)
638
639
  _startup_hints(cfg)
639
640
  cfg.apply_to_os_environ()
640
- from kuzu_queries import KuzuGraph # lazy
641
+ from ladybug_queries import LadybugGraph # lazy
641
642
 
642
- if not KuzuGraph.exists():
643
+ if not LadybugGraph.exists():
643
644
  _emit({"success": False, "message": "Kuzu graph not found"})
644
645
  return 1
645
- graph = KuzuGraph.get()
646
+ graph = LadybugGraph.get()
646
647
  rows = graph.list_unresolved_call_sites(
647
648
  method_id=args.method_id,
648
649
  reason=args.reason,
@@ -658,12 +659,12 @@ def _cmd_unresolved_calls_stats(args: argparse.Namespace) -> int:
658
659
  cfg = _resolved_from_ns(args)
659
660
  _startup_hints(cfg)
660
661
  cfg.apply_to_os_environ()
661
- from kuzu_queries import KuzuGraph # lazy
662
+ from ladybug_queries import LadybugGraph # lazy
662
663
 
663
- if not KuzuGraph.exists():
664
+ if not LadybugGraph.exists():
664
665
  _emit({"success": False, "message": "Kuzu graph not found"})
665
666
  return 1
666
- graph = KuzuGraph.get()
667
+ graph = LadybugGraph.get()
667
668
  buckets = graph.stats_unresolved_call_sites(by=args.by)
668
669
  total = sum(int(r.get("n") or 0) for r in buckets)
669
670
  _emit({"success": True, "total": total, "by": args.by, "buckets": buckets})
@@ -683,12 +684,12 @@ def _cmd_analyze_pr(args: argparse.Namespace) -> int:
683
684
  _emit({"success": False, "message": "Diff is empty"})
684
685
  return 1
685
686
  import pr_analysis # lazy
686
- from kuzu_queries import KuzuGraph # lazy
687
+ from ladybug_queries import LadybugGraph # lazy
687
688
 
688
- if not KuzuGraph.exists():
689
+ if not LadybugGraph.exists():
689
690
  _emit({"success": False, "message": "Kuzu graph not found"})
690
691
  return 1
691
- graph = KuzuGraph.get()
692
+ graph = LadybugGraph.get()
692
693
  report = pr_analysis.analyze_pr_pipeline(graph, diff_text)
693
694
  _emit(pr_analysis.pr_report_to_dict(report))
694
695
  return 0
@@ -774,8 +775,9 @@ def build_parser() -> argparse.ArgumentParser:
774
775
  help="Refresh shipped artifacts (skill, agent, MCP entry) after pip upgrade.",
775
776
  description=(
776
777
  "Post-upgrade refresh: overwrites skill and agent files with the latest "
777
- "shipped versions and updates the MCP command path. Use --dry-run to "
778
- "preview changes without writing. Requires a prior `install` run."
778
+ "shipped versions and updates the MCP command path. If an index exists, "
779
+ "also runs an incremental Lance + graph catch-up (same as `increment`). "
780
+ "Use --dry-run to preview changes without writing. Requires a prior `install` run."
779
781
  ),
780
782
  )
781
783
  update.add_argument(
@@ -902,6 +904,7 @@ def build_parser() -> argparse.ArgumentParser:
902
904
 
903
905
 
904
906
  def main(argv: list[str] | None = None) -> int:
907
+ raise_fd_limit()
905
908
  raw = list(argv if argv is not None else sys.argv[1:])
906
909
  if raw and raw[0] == "refresh":
907
910
  print(_REFRESH_DEPRECATION, file=sys.stderr)
@@ -25,6 +25,27 @@ ENV_SOURCE_ROOT = "JAVA_CODEBASE_RAG_SOURCE_ROOT"
25
25
  ENV_DEBUG_CONTEXT = "JAVA_CODEBASE_RAG_DEBUG_CONTEXT"
26
26
  ENV_RUN_HEAVY = "JAVA_CODEBASE_RAG_RUN_HEAVY"
27
27
 
28
+ # CocoIndex inflight-component throttle. CocoIndex's default is 1024 inflight
29
+ # components (cocoindex/_internal/app.py: ``_ENV_MAX_INFLIGHT_COMPONENTS``),
30
+ # which spawns enough concurrent LanceDB merge-inserts to exhaust OS file
31
+ # descriptors under default ulimits -> "Too many open files (os error 24)".
32
+ # NOTE: this is the REAL env var. An earlier fix (#293) set the non-existent
33
+ # ``COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS`` — CocoIndex never reads it, so it was a
34
+ # no-op and the EMFILE error recurred (#306).
35
+ COCOINDEX_MAX_INFLIGHT_COMPONENTS_ENV = "COCOINDEX_MAX_INFLIGHT_COMPONENTS"
36
+ COCOINDEX_DEFAULT_MAX_INFLIGHT_COMPONENTS = "256"
37
+
38
+
39
+ def cocoindex_subprocess_env_defaults() -> dict[str, str]:
40
+ """Env defaults applied to every CocoIndex subprocess to bound concurrency.
41
+
42
+ Apply with ``env.setdefault(...)`` so a caller-provided (operator) value
43
+ always wins. See :issue:`306`.
44
+ """
45
+ return {
46
+ COCOINDEX_MAX_INFLIGHT_COMPONENTS_ENV: COCOINDEX_DEFAULT_MAX_INFLIGHT_COMPONENTS
47
+ }
48
+
28
49
  _DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
29
50
 
30
51
  # Matches either $VAR or ${VAR} (POSIX shell variable syntax).
@@ -67,10 +88,9 @@ def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
67
88
  # Legacy env keys: never honored; detection-only hints name the replacement (if any).
68
89
  _LEGACY_ENV_HINTS: tuple[tuple[str, str], ...] = (
69
90
  ("LANCEDB_URI", "JAVA_CODEBASE_RAG_INDEX_DIR"),
70
- ("KUZU_DB_PATH", "JAVA_CODEBASE_RAG_INDEX_DIR (Kuzu lives at <index_dir>/code_graph.kuzu)"),
71
91
  ("LANCEDB_MCP_PROJECT_ROOT", "cwd or --source-root (no env replacement)"),
72
92
  ("LANCEDB_MCP_ALLOW_REFRESH", "(removed; use init / increment / reprocess / erase)"),
73
- ("LANCEDB_MCP_GRAPH_ENABLED", "(removed; graph is used when code_graph.kuzu exists)"),
93
+ ("LANCEDB_MCP_GRAPH_ENABLED", "(removed; graph is used when code_graph.lbug exists)"),
74
94
  ("LANCEDB_MCP_MICROSERVICE_ROOTS", "microservice_roots: in .java-codebase-rag.yml"),
75
95
  ("LANCEDB_MCP_DEBUG_CONTEXT", ENV_DEBUG_CONTEXT),
76
96
  ("LANCEDB_MCP_RUN_HEAVY", ENV_RUN_HEAVY),
@@ -182,7 +202,7 @@ def load_yaml_mapping(source_root: Path) -> dict[str, Any]:
182
202
  class ResolvedOperatorConfig:
183
203
  source_root: Path
184
204
  index_dir: Path
185
- kuzu_path: Path
205
+ ladybug_path: Path
186
206
  cocoindex_db: Path
187
207
  embedding_model: str
188
208
  embedding_device: str | None
@@ -193,7 +213,7 @@ class ResolvedOperatorConfig:
193
213
  hints_enabled_source: SettingSource
194
214
 
195
215
  def apply_to_os_environ(self) -> None:
196
- """Make downstream modules (server, kuzu_queries, flows) see a consistent environment.
216
+ """Make downstream modules (server, ladybug_queries, flows) see a consistent environment.
197
217
 
198
218
  When ``embedding_device`` is unset, ``SBERT_DEVICE`` is not removed from ``os.environ`` so
199
219
  a long-lived host process is not mutated for unrelated callers; subprocesses still use
@@ -286,9 +306,19 @@ def _pick_bool(
286
306
  def _resolve_index_dir_path(
287
307
  *,
288
308
  source_root: Path,
309
+ config_dir: Path,
289
310
  cli_index_dir: str | None,
290
311
  yaml_dict: dict[str, Any],
291
312
  ) -> tuple[Path, SettingSource]:
313
+ # Bases for relative paths:
314
+ # - YAML ``index_dir`` -> the config file's directory (``config_dir``),
315
+ # the SAME base used for YAML ``source_root``. Paths written in the
316
+ # config file are relative to the file, so both keys stay consistent.
317
+ # - CLI / env ``index_dir`` -> ``source_root`` (unchanged). These are not
318
+ # "in the config file"; preserving the existing base avoids a semantics
319
+ # change for operators who pass ``--index-dir`` on the command line.
320
+ # - Default ``./.java-codebase-rag`` -> ``source_root`` so the index sits
321
+ # beside the Java tree (the layout ``discover_project_root`` anchors on).
292
322
  raw_cli = cli_index_dir.strip() if isinstance(cli_index_dir, str) else None
293
323
  if raw_cli:
294
324
  p = Path(raw_cli).expanduser()
@@ -304,7 +334,7 @@ def _resolve_index_dir_path(
304
334
  idx = yaml_dict.get("index_dir")
305
335
  if isinstance(idx, str) and idx.strip():
306
336
  p = Path(idx.strip()).expanduser()
307
- out = p.resolve() if p.is_absolute() else (source_root / p).resolve()
337
+ out = p.resolve() if p.is_absolute() else (config_dir / p).resolve()
308
338
  return out, "yaml"
309
339
 
310
340
  return (source_root / ".java-codebase-rag").resolve(), "default"
@@ -348,7 +378,7 @@ def resolve_operator_config(
348
378
  root = config_dir
349
379
 
350
380
  index_dir, index_src = _resolve_index_dir_path(
351
- source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
381
+ source_root=root, config_dir=config_dir, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
352
382
  )
353
383
  model, model_src = _pick_str(
354
384
  cli_val=cli_embedding_model,
@@ -369,12 +399,12 @@ def resolve_operator_config(
369
399
  yaml_path=("hints", "enabled"),
370
400
  default=True,
371
401
  )
372
- ku = index_dir / "code_graph.kuzu"
402
+ ku = index_dir / "code_graph.lbug"
373
403
  coco = index_dir / "cocoindex.db"
374
404
  return ResolvedOperatorConfig(
375
405
  source_root=root,
376
406
  index_dir=index_dir,
377
- kuzu_path=ku,
407
+ ladybug_path=ku,
378
408
  cocoindex_db=coco,
379
409
  embedding_model=model,
380
410
  embedding_device=device,
@@ -387,9 +417,9 @@ def resolve_operator_config(
387
417
 
388
418
 
389
419
  def index_dir_has_existing_artifacts(index_dir: Path) -> tuple[bool, list[str]]:
390
- """True if Kuzu graph dir or any Lance table already exists under index_dir."""
420
+ """True if graph dir or any Lance table already exists under index_dir."""
391
421
  paths: list[str] = []
392
- ku = index_dir / "code_graph.kuzu"
422
+ ku = index_dir / "code_graph.lbug"
393
423
  if ku.exists():
394
424
  paths.append(str(ku.resolve()))
395
425
  if index_dir.is_dir():
@@ -325,6 +325,66 @@ def select_hosts(*, non_interactive: bool, cli_agents: list[str] | None) -> list
325
325
  return [HOSTS[name] for name in selected]
326
326
 
327
327
 
328
+ def select_microservices(
329
+ java_dirs: list[Path],
330
+ *,
331
+ non_interactive: bool,
332
+ preselected: list[str] | None = None,
333
+ ) -> list[str] | None:
334
+ """Show an interactive checklist of detected microservices, all pre-checked.
335
+
336
+ Returns None when all are selected (-> microservice_roots omitted, index
337
+ everything) or a non-empty subset list. Never returns [].
338
+
339
+ Args:
340
+ java_dirs: Detected module roots (relative Path names) from
341
+ detect_java_directories. Caller must pass len >= 2.
342
+ non_interactive: If True, return None (all) without prompting.
343
+ preselected: On re-run, the prior microservice_roots subset to pre-check.
344
+ """
345
+ # Defensive guard: caller gates on len >= 2, but stay safe if called directly.
346
+ if len(java_dirs) < 2:
347
+ return None
348
+
349
+ dir_names = [str(d) for d in java_dirs]
350
+
351
+ if non_interactive:
352
+ return None
353
+
354
+ preselected_set = set(preselected) if preselected else None
355
+ choices = [
356
+ {
357
+ "name": name,
358
+ "value": name,
359
+ "checked": (name in preselected_set) if preselected_set is not None else True,
360
+ }
361
+ for name in dir_names
362
+ ]
363
+
364
+ print("Note: Select which modules to index. Toggle with Space, confirm with Enter.")
365
+ selected = prompt(
366
+ "checkbox",
367
+ "Select microservices to index:",
368
+ choices=choices,
369
+ default=dir_names, # non-TTY fallback returns all -> caller omits key
370
+ )
371
+
372
+ if not selected:
373
+ retry = prompt(
374
+ "confirm",
375
+ "At least one module is required. Re-select?",
376
+ )
377
+ if retry:
378
+ return select_microservices(java_dirs, non_interactive=False, preselected=preselected)
379
+ raise SystemExit(2)
380
+
381
+ selected_set = set(selected)
382
+ if selected_set == set(dir_names):
383
+ return None
384
+ # Preserve detection order for deterministic YAML output.
385
+ return [name for name in dir_names if name in selected_set]
386
+
387
+
328
388
  def select_scope(*, non_interactive: bool, cli_scope: str | None) -> Scope:
329
389
  """Select 'project' or 'user' scope.
330
390
 
@@ -791,7 +851,7 @@ def run_init_if_needed(
791
851
  # Run AST graph build
792
852
  g = run_build_ast_graph(
793
853
  source_root=cfg.source_root,
794
- kuzu_path=cfg.kuzu_path,
854
+ ladybug_path=cfg.ladybug_path,
795
855
  verbose=not quiet,
796
856
  quiet=quiet,
797
857
  env=env,
@@ -1182,7 +1242,7 @@ def run_update(
1182
1242
  index_dir_has_existing_artifacts,
1183
1243
  resolve_operator_config,
1184
1244
  )
1185
- from java_codebase_rag.pipeline import run_cocoindex_update
1245
+ from java_codebase_rag.pipeline import run_cocoindex_update, run_incremental_graph
1186
1246
 
1187
1247
  project_root = discover_project_root(cwd)
1188
1248
  if project_root is None:
@@ -1207,22 +1267,37 @@ def run_update(
1207
1267
  print("Run `java-codebase-rag install` to create one.")
1208
1268
  return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1209
1269
 
1210
- # Run increment (LanceDB catch-up)
1270
+ # Run increment: LanceDB catch-up + incremental graph rebuild.
1271
+ # Mirrors `java-codebase-rag increment` so both index layers stay current.
1272
+ # The "graph not implemented" warning belongs only on the vectors-only path
1273
+ # (increment --vectors-only), where the graph step is deliberately skipped.
1211
1274
  if not dry_run:
1212
- print("\nUpdating index (incremental LanceDB update)...")
1275
+ print("\nUpdating index (Lance + graph)...")
1213
1276
  cfg.apply_to_os_environ()
1214
1277
  env = cfg.subprocess_env()
1215
1278
 
1216
1279
  coco = run_cocoindex_update(env, full_reprocess=False, quiet=True)
1217
1280
  if coco.returncode != 0:
1218
- print(f"Error: Index update failed with code {coco.returncode}")
1281
+ print(f"Error: Lance index update failed with code {coco.returncode}")
1219
1282
  return 1
1220
1283
 
1221
- # Print graph staleness warning
1222
- from java_codebase_rag.cli import _INCREMENT_WARNING_LINES
1223
- print("\n" + "\n".join(_INCREMENT_WARNING_LINES))
1284
+ g = run_incremental_graph(
1285
+ source_root=cfg.source_root,
1286
+ ladybug_path=cfg.ladybug_path,
1287
+ verbose=False,
1288
+ quiet=True,
1289
+ env=env,
1290
+ )
1291
+ if g.returncode != 0:
1292
+ # Artifacts above already refreshed; the graph catch-up is best-effort
1293
+ # here. Surface a truthful, actionable message instead of leaving the
1294
+ # graph silently stale or claiming the feature is unimplemented.
1295
+ print(
1296
+ f"\nWarning: incremental graph update failed (exit {g.returncode}). "
1297
+ "Run `java-codebase-rag reprocess` for a full rebuild."
1298
+ )
1224
1299
  else:
1225
- print("\nWould run incremental index update.")
1300
+ print("\nWould run incremental index update (Lance + graph).")
1226
1301
 
1227
1302
  # Print summary
1228
1303
  print("\nUpdate complete.")
@@ -1270,6 +1345,20 @@ def run_install(
1270
1345
  except SystemExit as e:
1271
1346
  return e.code
1272
1347
 
1348
+ # Stage 1 (Case B): interactive microservice selection (only when 2+ detected)
1349
+ try:
1350
+ selected_roots = (
1351
+ select_microservices(
1352
+ java_dirs,
1353
+ non_interactive=non_interactive,
1354
+ preselected=existing_config.get("microservice_roots") if existing_config else None,
1355
+ )
1356
+ if len(java_dirs) >= 2
1357
+ else None
1358
+ )
1359
+ except SystemExit as e:
1360
+ return e.code
1361
+
1273
1362
  # Stage 2: Embedding model
1274
1363
  resolved_model = resolve_model(model, non_interactive=non_interactive)
1275
1364
 
@@ -1312,7 +1401,7 @@ def run_install(
1312
1401
  yaml_content = generate_yaml_config(
1313
1402
  source_root,
1314
1403
  resolved_model,
1315
- microservice_roots=[str(d) for d in java_dirs] if len(java_dirs) > 1 else None,
1404
+ microservice_roots=selected_roots,
1316
1405
  existing_yaml=existing_config,
1317
1406
  )
1318
1407