java-codebase-rag 0.2.2__py3-none-any.whl → 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ast_java.py CHANGED
@@ -83,7 +83,7 @@ _DTO_LOMBOK_ANNOTATIONS: frozenset[str] = frozenset({
83
83
  # Phase 11: `EDGE_SCHEMA` in `java_ontology.py` (canonical edge navigation schema; v14 re-index).
84
84
  # Phase 12: CALLS `callee_declaring_role`, supertype-walk dedup, pass3 unresolved counters (v15 re-index).
85
85
  # Bumps whenever extraction / enrichment semantics change.
86
- ONTOLOGY_VERSION = 15
86
+ ONTOLOGY_VERSION = 16
87
87
 
88
88
  ROLE_ANNOTATIONS: dict[str, str] = {
89
89
  # Spring Web
@@ -2732,6 +2732,19 @@ def infer_role(annotation_names: Iterable[str]) -> str:
2732
2732
  return "OTHER"
2733
2733
 
2734
2734
 
2735
+ def _type_injects_messaging(type_decl: "TypeDecl") -> bool:
2736
+ """True when the type injects a messaging template via field or constructor."""
2737
+ for fld in type_decl.fields:
2738
+ if fld.type_name in _INJECTED_TYPES_TO_CAPABILITY:
2739
+ return True
2740
+ for method in type_decl.methods:
2741
+ if method.is_constructor:
2742
+ for p in method.parameters:
2743
+ if p.type_name in _INJECTED_TYPES_TO_CAPABILITY:
2744
+ return True
2745
+ return False
2746
+
2747
+
2735
2748
  def infer_role_for_type(type_decl: "TypeDecl") -> str:
2736
2749
  """Role inference that also detects DTO-like passive data carriers.
2737
2750
 
@@ -2763,6 +2776,11 @@ def infer_role_for_type(type_decl: "TypeDecl") -> str:
2763
2776
  if name.endswith(suffix) and name != suffix:
2764
2777
  return "DTO"
2765
2778
 
2779
+ # Types injecting messaging templates are outbound callers (CLIENT role),
2780
+ # symmetric with CONTROLLER covering both HTTP and messaging inbound.
2781
+ if _type_injects_messaging(type_decl):
2782
+ return "CLIENT"
2783
+
2766
2784
  return "OTHER"
2767
2785
 
2768
2786
 
build_ast_graph.py CHANGED
@@ -70,13 +70,13 @@ log = logging.getLogger(__name__)
70
70
 
71
71
  _VERBOSE_STDERR_LOCK = threading.Lock()
72
72
 
73
- _PASS1_START = "[pass1] starting · parsing Java files under source root"
74
- _PASS2_START = "[pass2] starting · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
75
- _PASS3_START = "[pass3] starting · call resolution (outgoing calls per site)"
76
- _PASS4_START = "[pass4] starting · route and EXPOSES extraction"
77
- _PASS5_START = "[pass5] starting · imperative HTTP_CALLS / ASYNC_CALLS edges"
78
- _PASS6_START = "[pass6] starting · cross-service call-edge matching"
79
- _WRITE_START = "[write] starting · writing Kuzu graph to disk"
73
+ _PASS1_START = "[graph] pass 1 · parsing Java files"
74
+ _PASS2_START = "[graph] pass 2 · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
75
+ _PASS3_START = "[graph] pass 3 · call resolution (outgoing calls per site)"
76
+ _PASS4_START = "[graph] pass 4 · route and EXPOSES extraction"
77
+ _PASS5_START = "[graph] pass 5 · imperative HTTP_CALLS / ASYNC_CALLS edges"
78
+ _PASS6_START = "[graph] pass 6 · cross-service call-edge matching"
79
+ _WRITE_START = "[graph] writing · Kuzu graph to disk"
80
80
 
81
81
 
82
82
  def _verbose_stderr_line(content: str) -> None:
@@ -104,7 +104,7 @@ class _VerbosePassHeartbeats:
104
104
  t0 = time.monotonic()
105
105
  while not stop.wait(timeout=5.0):
106
106
  elapsed = int(time.monotonic() - t0)
107
- _verbose_stderr_line(f"{tag} running {elapsed}s elapsed")
107
+ _verbose_stderr_line(f"{tag} · {elapsed}s elapsed")
108
108
 
109
109
  self._thr = threading.Thread(target=worker, name=f"hb-{tag}", daemon=True)
110
110
  self._thr.start()
@@ -476,7 +476,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
476
476
  slow_sec = float(raw_slow)
477
477
  except ValueError:
478
478
  slow_sec = 0.0
479
- with _VerbosePassHeartbeats("[pass1]", verbose=verbose):
479
+ with _VerbosePassHeartbeats("[graph] pass 1", verbose=verbose):
480
480
  if verbose and slow_sec > 0:
481
481
  time.sleep(slow_sec)
482
482
  for p in iter_java_source_files(root, ignore=ignore):
@@ -521,7 +521,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
521
521
  if verbose:
522
522
  elapsed = time.time() - t0
523
523
  _verbose_stderr_line(
524
- f"[pass1] parsed {n_files} files in {elapsed:.2f}s: "
524
+ f"[graph] pass 1 · parsed {n_files} files in {elapsed:.2f}s: "
525
525
  f"{len(tables.types)} types, {len(tables.members)} members, "
526
526
  f"{tables.parse_errors} parse errors, {tables.skipped_files} skipped",
527
527
  )
@@ -759,7 +759,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
759
759
  seen_inj: set[tuple[str, str, str, str]] = set()
760
760
  if verbose:
761
761
  _verbose_stderr_line(_PASS2_START)
762
- with _VerbosePassHeartbeats("[pass2]", verbose=verbose):
762
+ with _VerbosePassHeartbeats("[graph] pass 2", verbose=verbose):
763
763
  for fqn, entry in tables.types.items():
764
764
  ast = asts.get(entry.file_path)
765
765
  if ast is None:
@@ -769,7 +769,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
769
769
  if verbose:
770
770
  elapsed = time.time() - t0
771
771
  _verbose_stderr_line(
772
- f"[pass2] emitted {len(tables.extends_rows)} EXTENDS, "
772
+ f"[graph] pass 2 · emitted {len(tables.extends_rows)} EXTENDS, "
773
773
  f"{len(tables.implements_rows)} IMPLEMENTS, "
774
774
  f"{len(tables.injects_rows)} INJECTS, "
775
775
  f"{len(tables.phantoms)} phantoms in {elapsed:.2f}s",
@@ -1432,7 +1432,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
1432
1432
  _verbose_stderr_line(_PASS3_START)
1433
1433
  _build_member_indexes(tables)
1434
1434
  stats = CallResolutionStats()
1435
- with _VerbosePassHeartbeats("[pass3]", verbose=verbose):
1435
+ with _VerbosePassHeartbeats("[graph] pass 3", verbose=verbose):
1436
1436
  for rel_path, file_ast in asts.items():
1437
1437
  try:
1438
1438
  _process_file_calls(file_ast, rel_path, tables, stats)
@@ -1455,7 +1455,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
1455
1455
  )
1456
1456
  log.info(msg)
1457
1457
  if verbose:
1458
- _verbose_stderr_line(f"[pass3] {msg}")
1458
+ _verbose_stderr_line(f"[graph] pass 3 · {msg}")
1459
1459
 
1460
1460
 
1461
1461
  _PATH_VAR_SEG = re.compile(r"^\{([^:{}]+)(?::([^}]*))?\}$") # whole path segment
@@ -1586,7 +1586,7 @@ def pass4_routes(
1586
1586
  meta_chain = collect_annotation_meta_chain(prs)
1587
1587
  if verbose:
1588
1588
  _verbose_stderr_line(_PASS4_START)
1589
- with _VerbosePassHeartbeats("[pass4]", verbose=verbose):
1589
+ with _VerbosePassHeartbeats("[graph] pass 4", verbose=verbose):
1590
1590
 
1591
1591
  for ast in asts.values():
1592
1592
  stats.routes_skipped_unresolved += ast.routes_skipped_unresolved
@@ -1710,7 +1710,7 @@ def pass4_routes(
1710
1710
  )
1711
1711
  log.info(msg)
1712
1712
  if verbose:
1713
- _verbose_stderr_line(f"[pass4] {msg}")
1713
+ _verbose_stderr_line(f"[graph] pass 4 · {msg}")
1714
1714
 
1715
1715
 
1716
1716
  def pass5_imperative_edges(
@@ -1763,7 +1763,7 @@ def pass5_imperative_edges(
1763
1763
 
1764
1764
  if verbose:
1765
1765
  _verbose_stderr_line(_PASS5_START)
1766
- with _VerbosePassHeartbeats("[pass5]", verbose=verbose):
1766
+ with _VerbosePassHeartbeats("[graph] pass 5", verbose=verbose):
1767
1767
  for member in sorted(tables.members, key=lambda x: x.node_id):
1768
1768
  if member.decl.is_constructor:
1769
1769
  continue
@@ -2018,7 +2018,7 @@ def pass5_imperative_edges(
2018
2018
  http_strategy = dict(sorted(tables.call_edge_stats.http_calls_by_strategy.items()))
2019
2019
  async_strategy = dict(sorted(tables.call_edge_stats.async_calls_by_strategy.items()))
2020
2020
  _verbose_stderr_line(
2021
- f"[pass5] HTTP_CALLS: {len(tables.http_call_rows)} edges, "
2021
+ f"[graph] pass 5 · HTTP_CALLS: {len(tables.http_call_rows)} edges, "
2022
2022
  f"ASYNC_CALLS: {len(tables.async_call_rows)} edges; "
2023
2023
  f"http_by_client_kind={http_client}, async_by_client_kind={async_client}, "
2024
2024
  f"http_by_strategy={http_strategy}, async_by_strategy={async_strategy}",
@@ -2165,7 +2165,7 @@ def pass6_match_edges(
2165
2165
 
2166
2166
  if verbose:
2167
2167
  _verbose_stderr_line(_PASS6_START)
2168
- with _VerbosePassHeartbeats("[pass6]", verbose=verbose):
2168
+ with _VerbosePassHeartbeats("[graph] pass 6", verbose=verbose):
2169
2169
  for row in tables.http_call_rows:
2170
2170
  if row.match != "unresolved":
2171
2171
  continue
@@ -2317,14 +2317,14 @@ def pass6_match_edges(
2317
2317
  first_http = ", ".join(suppressed_auto_cross_http)
2318
2318
  first_async = ", ".join(suppressed_auto_cross_async)
2319
2319
  _verbose_stderr_line(
2320
- f"[pass6] cross_service_resolution=brownfield_only:\n"
2320
+ f"[graph] pass 6 · cross_service_resolution=brownfield_only:\n"
2321
2321
  f" {n_bf} cross_service edges from brownfield layers,\n"
2322
2322
  f" {suppressed_auto_cross_count} auto-cross-service candidates suppressed -> unresolved\n"
2323
2323
  f" (first 5 http: {first_http})\n"
2324
2324
  f" (first 5 async: {first_async})",
2325
2325
  )
2326
2326
  _verbose_stderr_line(
2327
- f"[pass6] http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
2327
+ f"[graph] pass 6 · http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
2328
2328
  f"async_match={dict(sorted(tables.call_edge_stats.async_calls_match_breakdown.items()))}, "
2329
2329
  f"cross_service_calls_total={tables.call_edge_stats.cross_service_calls_total}",
2330
2330
  )
@@ -3004,7 +3004,7 @@ def write_kuzu(
3004
3004
  )
3005
3005
  if verbose:
3006
3006
  _verbose_stderr_line(_WRITE_START)
3007
- with _VerbosePassHeartbeats("[write]", verbose=verbose):
3007
+ with _VerbosePassHeartbeats("[graph] writing", verbose=verbose):
3008
3008
  db_path.parent.mkdir(parents=True, exist_ok=True)
3009
3009
  db = kuzu.Database(str(db_path))
3010
3010
  conn = kuzu.Connection(db)
@@ -3018,17 +3018,17 @@ def write_kuzu(
3018
3018
  meta_chain=meta_chain,
3019
3019
  )
3020
3020
  if verbose:
3021
- _verbose_stderr_line(f"[write] nodes written in {time.time() - t0:.2f}s")
3021
+ _verbose_stderr_line(f"[graph] writing · nodes written in {time.time() - t0:.2f}s")
3022
3022
  _populate_declares_rows(tables)
3023
3023
  _populate_overrides_rows(tables)
3024
3024
  t1 = time.time()
3025
3025
  _write_edges(conn, tables)
3026
3026
  if verbose:
3027
- _verbose_stderr_line(f"[write] edges written in {time.time() - t1:.2f}s")
3027
+ _verbose_stderr_line(f"[graph] writing · edges written in {time.time() - t1:.2f}s")
3028
3028
  t2 = time.time()
3029
3029
  _write_routes_and_exposes(conn, tables)
3030
3030
  if verbose:
3031
- _verbose_stderr_line(f"[write] routes/exposes written in {time.time() - t2:.2f}s")
3031
+ _verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s")
3032
3032
  _write_meta(conn, tables, source_root)
3033
3033
  conn.close()
3034
3034
 
@@ -3073,7 +3073,7 @@ def main() -> int:
3073
3073
  pass6_match_edges(tables, verbose=args.verbose)
3074
3074
  write_kuzu(kuzu_path, tables, source_root=root, verbose=args.verbose)
3075
3075
  if args.verbose:
3076
- _verbose_stderr_line(f"[done] kuzu at {kuzu_path}")
3076
+ _verbose_stderr_line(f"[graph] done · kuzu at {kuzu_path}")
3077
3077
  return 0
3078
3078
 
3079
3079
 
java_codebase_rag/cli.py CHANGED
@@ -100,19 +100,25 @@ _PIPELINE_SEP = "\u00b7"
100
100
 
101
101
 
102
102
  def _pipeline_header(subcommand: str, cfg: ResolvedOperatorConfig) -> None:
103
+ from java_codebase_rag.cli_format import bold
104
+
103
105
  root = cfg.source_root.resolve()
104
106
  idx = cfg.index_dir.resolve()
105
107
  print(
106
- f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}",
108
+ bold(f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}"),
107
109
  file=sys.stderr,
108
110
  flush=True,
109
111
  )
110
112
 
111
113
 
112
114
  def _pipeline_footer(subcommand: str, started: float, exit_code: int) -> None:
115
+ from java_codebase_rag.cli_format import bold, styled_check, styled_cross
116
+
113
117
  elapsed = time.perf_counter() - started
118
+ marker = styled_check() if exit_code == 0 else styled_cross()
114
119
  print(
115
- f"java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s (exit={exit_code})",
120
+ f"{marker} {bold(f'java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s')}"
121
+ + (f" (exit={exit_code})" if exit_code != 0 else ""),
116
122
  file=sys.stderr,
117
123
  flush=True,
118
124
  )
@@ -205,6 +211,22 @@ def _add_index_embedding_flags(p: argparse.ArgumentParser) -> None:
205
211
  p.add_argument("--embedding-device", type=str, default=None, help="Override SBERT_DEVICE / YAML embedding.device")
206
212
 
207
213
 
214
+ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
215
+ g = p.add_mutually_exclusive_group()
216
+ g.add_argument(
217
+ "--quiet", "-q",
218
+ action="store_true",
219
+ dest="quiet",
220
+ help="Suppress stderr progress relay; stdout payload unchanged.",
221
+ )
222
+ g.add_argument(
223
+ "--verbose", "-v",
224
+ action="store_true",
225
+ dest="verbose",
226
+ help="Show full subprocess output (Lance warnings, brownfield events, progress bars).",
227
+ )
228
+
229
+
208
230
  def _cmd_init(args: argparse.Namespace) -> int:
209
231
  cfg = _resolved_from_ns(args)
210
232
  _startup_hints(cfg)
@@ -227,10 +249,12 @@ def _cmd_init(args: argparse.Namespace) -> int:
227
249
 
228
250
  def work() -> int:
229
251
  env = cfg.subprocess_env()
252
+ verbose = bool(args.verbose)
230
253
  coco = run_cocoindex_update(
231
254
  env,
232
255
  full_reprocess=False,
233
256
  quiet=bool(args.quiet),
257
+ verbose=verbose,
234
258
  lance_project_root=None if args.quiet else cfg.source_root,
235
259
  )
236
260
  if coco.returncode != 0:
@@ -244,10 +268,13 @@ def _cmd_init(args: argparse.Namespace) -> int:
244
268
  }
245
269
  )
246
270
  return 1
271
+ if not args.quiet:
272
+ print(file=sys.stderr, flush=True)
247
273
  g = run_build_ast_graph(
248
274
  source_root=cfg.source_root,
249
275
  kuzu_path=cfg.kuzu_path,
250
- verbose=not args.quiet,
276
+ verbose=verbose,
277
+ quiet=bool(args.quiet),
251
278
  env=env,
252
279
  )
253
280
  if g.returncode != 0:
@@ -279,6 +306,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
279
306
  env,
280
307
  full_reprocess=False,
281
308
  quiet=bool(args.quiet),
309
+ verbose=bool(args.verbose),
282
310
  lance_project_root=None if args.quiet else cfg.source_root,
283
311
  )
284
312
  if coco.returncode != 0:
@@ -305,11 +333,12 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
305
333
 
306
334
  def work() -> int:
307
335
  env = cfg.subprocess_env()
336
+ verbose = bool(args.verbose)
308
337
  vectors_only = bool(getattr(args, "vectors_only", False))
309
338
  graph_only = bool(getattr(args, "graph_only", False))
310
339
 
311
340
  if vectors_only:
312
- coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet))
341
+ coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet), verbose=verbose)
313
342
  if _is_cocoindex_preflight_blocker(coco):
314
343
  payload: dict[str, Any] = {
315
344
  "success": False,
@@ -345,7 +374,8 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
345
374
  g = run_build_ast_graph(
346
375
  source_root=cfg.source_root,
347
376
  kuzu_path=cfg.kuzu_path,
348
- verbose=not args.quiet,
377
+ verbose=verbose,
378
+ quiet=bool(args.quiet),
349
379
  env=env,
350
380
  )
351
381
  if _is_graph_preflight_blocker(g):
@@ -381,7 +411,7 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
381
411
 
382
412
  import server # lazy: pulls sentence_transformers/torch/lancedb/kuzu
383
413
 
384
- result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet)))
414
+ result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet), verbose=verbose))
385
415
  payload = result.model_dump()
386
416
  _emit_reprocess_outcome(payload)
387
417
  return _reprocess_exit_code(payload)
@@ -473,6 +503,8 @@ def _cmd_meta(args: argparse.Namespace) -> int:
473
503
  payload["index_dir"] = str(cfg.index_dir.resolve())
474
504
  payload["kuzu_path"] = str(cfg.kuzu_path.resolve())
475
505
  payload["index_dir_source"] = cfg.index_dir_source
506
+ payload["hints_enabled"] = cfg.hints_enabled
507
+ payload["hints_enabled_source"] = cfg.hints_enabled_source
476
508
  _emit(payload)
477
509
  return 0 if payload.get("success") else 2
478
510
 
@@ -612,11 +644,7 @@ def build_parser() -> argparse.ArgumentParser:
612
644
  ),
613
645
  )
614
646
  _add_index_embedding_flags(init)
615
- init.add_argument(
616
- "--quiet",
617
- action="store_true",
618
- help="Suppress stderr progress relay; stdout payload unchanged.",
619
- )
647
+ _add_verbosity_flags(init)
620
648
  init.set_defaults(handler=_cmd_init)
621
649
 
622
650
  increment = subparsers.add_parser(
@@ -625,11 +653,7 @@ def build_parser() -> argparse.ArgumentParser:
625
653
  description="Runs cocoindex catch-up (no full reprocess). Does not rebuild Kuzu; see stderr warning.",
626
654
  )
627
655
  _add_index_embedding_flags(increment)
628
- increment.add_argument(
629
- "--quiet",
630
- action="store_true",
631
- help="Suppress stderr progress relay; stdout payload unchanged.",
632
- )
656
+ _add_verbosity_flags(increment)
633
657
  increment.set_defaults(handler=_cmd_increment)
634
658
 
635
659
  reprocess = subparsers.add_parser(
@@ -641,11 +665,7 @@ def build_parser() -> argparse.ArgumentParser:
641
665
  ),
642
666
  )
643
667
  _add_index_embedding_flags(reprocess)
644
- reprocess.add_argument(
645
- "--quiet",
646
- action="store_true",
647
- help="Suppress stderr progress relay; stdout payload unchanged.",
648
- )
668
+ _add_verbosity_flags(reprocess)
649
669
  _rex = reprocess.add_mutually_exclusive_group()
650
670
  _rex.add_argument(
651
671
  "--vectors-only",
@@ -667,8 +687,9 @@ def build_parser() -> argparse.ArgumentParser:
667
687
  _add_index_embedding_flags(erase)
668
688
  erase.add_argument("--yes", action="store_true", help="Confirm destructive deletion (required in CI)")
669
689
  erase.add_argument(
670
- "--quiet",
690
+ "--quiet", "-q",
671
691
  action="store_true",
692
+ dest="quiet",
672
693
  help="Suppress stderr progress relay; stdout payload unchanged.",
673
694
  )
674
695
  erase.set_defaults(handler=_cmd_erase)
@@ -0,0 +1,112 @@
1
+ """TTY-aware ANSI formatting for CLI stderr progress."""
2
+ from __future__ import annotations
3
+
4
+ import itertools
5
+ import sys
6
+ import threading
7
+ import time
8
+
9
+ _RESET = "\033[0m"
10
+ _BOLD = "\033[1m"
11
+ _DIM = "\033[2m"
12
+ _GREEN = "\033[32m"
13
+ _RED = "\033[31m"
14
+ _CYAN = "\033[36m"
15
+
16
+ CHECK = "✓"
17
+ CROSS = "✗"
18
+
19
+ _SPINNER_FRAMES = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
20
+
21
+ _NOISE_CONTAINS: tuple[bytes, ...] = (
22
+ b"lance::",
23
+ b"FutureWarning",
24
+ b"Loading weights:",
25
+ b'"event": "brownfield-',
26
+ b"unknown producer source strategy",
27
+ b"unknown client source strategy",
28
+ )
29
+
30
+
31
+ def is_noise_line(line: bytes) -> bool:
32
+ return any(p in line for p in _NOISE_CONTAINS)
33
+
34
+
35
+ def stderr_is_tty() -> bool:
36
+ return hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
37
+
38
+
39
+ def _styled(text: str, *codes: str) -> str:
40
+ if not stderr_is_tty():
41
+ return text
42
+ return "".join(codes) + text + _RESET
43
+
44
+
45
+ def bold(text: str) -> str:
46
+ return _styled(text, _BOLD)
47
+
48
+
49
+ def dim(text: str) -> str:
50
+ return _styled(text, _DIM)
51
+
52
+
53
+ def green(text: str) -> str:
54
+ return _styled(text, _GREEN)
55
+
56
+
57
+ def red(text: str) -> str:
58
+ return _styled(text, _RED)
59
+
60
+
61
+ def cyan(text: str) -> str:
62
+ return _styled(text, _CYAN)
63
+
64
+
65
+ def bold_green(text: str) -> str:
66
+ return _styled(text, _BOLD, _GREEN)
67
+
68
+
69
+ def bold_red(text: str) -> str:
70
+ return _styled(text, _BOLD, _RED)
71
+
72
+
73
+ def bold_cyan(text: str) -> str:
74
+ return _styled(text, _BOLD, _CYAN)
75
+
76
+
77
+ def styled_check() -> str:
78
+ return green(CHECK) if stderr_is_tty() else CHECK
79
+
80
+
81
+ def styled_cross() -> str:
82
+ return red(CROSS) if stderr_is_tty() else CROSS
83
+
84
+
85
+ class Spinner:
86
+ """Braille spinner that overwrites the current stderr line until stopped."""
87
+
88
+ def __init__(self, label: str) -> None:
89
+ self._label = label
90
+ self._stop = threading.Event()
91
+ self._thread: threading.Thread | None = None
92
+
93
+ def start(self) -> None:
94
+ self._thread = threading.Thread(target=self._run, name="spinner", daemon=True)
95
+ self._thread.start()
96
+
97
+ def stop(self) -> None:
98
+ self._stop.set()
99
+ if self._thread is not None:
100
+ self._thread.join(timeout=2.0)
101
+ sys.stderr.buffer.write(b"\r\x1b[2K")
102
+ sys.stderr.buffer.flush()
103
+
104
+ def _run(self) -> None:
105
+ frames = itertools.cycle(_SPINNER_FRAMES)
106
+ t0 = time.monotonic()
107
+ while not self._stop.wait(0.3):
108
+ elapsed = time.monotonic() - t0
109
+ frame = next(frames)
110
+ line = f"\r{frame} {self._label} · {elapsed:.0f}s"
111
+ sys.stderr.buffer.write(line.encode())
112
+ sys.stderr.buffer.flush()
@@ -3,32 +3,66 @@ from __future__ import annotations
3
3
 
4
4
  import asyncio
5
5
  import sys
6
- from pathlib import Path
7
6
 
7
+ from java_codebase_rag.cli_format import bold_cyan, is_noise_line, styled_check, styled_cross
8
8
 
9
- def emit_lance_cocoindex_start(project_root: Path) -> None:
10
- root = project_root.expanduser().resolve()
9
+
10
+ def emit_vectors_start() -> None:
11
11
  print(
12
- f"[lance] running cocoindex update (project_root={root})",
12
+ bold_cyan("[vectors]") + " running · cocoindex update",
13
13
  file=sys.stderr,
14
14
  flush=True,
15
15
  )
16
16
 
17
17
 
18
- def emit_lance_cocoindex_finish(*, elapsed_s: float, exit_code: int) -> None:
18
+ def emit_vectors_finish(*, elapsed_s: float, exit_code: int) -> None:
19
+ marker = styled_check() if exit_code == 0 else styled_cross()
19
20
  print(
20
- f"[lance] cocoindex update finished in {elapsed_s:.2f}s (exit={exit_code})",
21
+ f"{marker} {bold_cyan('[vectors]')} finished · {elapsed_s:.2f}s"
22
+ + (f" (exit={exit_code})" if exit_code != 0 else ""),
21
23
  file=sys.stderr,
22
24
  flush=True,
23
25
  )
24
26
 
25
27
 
28
+ class _AsyncLineFilter:
29
+ """Buffers byte chunks and relays only non-noise lines to stderr (async drain path)."""
30
+
31
+ def __init__(self) -> None:
32
+ self._buf = bytearray()
33
+ self._suppress_next = False
34
+
35
+ def feed(self, chunk: bytes) -> None:
36
+ self._buf.extend(chunk)
37
+ while b"\n" in self._buf:
38
+ line, self._buf = self._buf.split(b"\n", 1)
39
+ line += b"\n"
40
+ noise = is_noise_line(line)
41
+ if noise:
42
+ self._suppress_next = True
43
+ continue
44
+ if self._suppress_next and line[:1] in (b" ", b"\t"):
45
+ continue
46
+ self._suppress_next = False
47
+ sys.stderr.buffer.write(line)
48
+ sys.stderr.buffer.flush()
49
+
50
+ def flush(self) -> None:
51
+ if self._buf:
52
+ if not is_noise_line(self._buf):
53
+ sys.stderr.buffer.write(bytes(self._buf))
54
+ sys.stderr.buffer.flush()
55
+ self._buf.clear()
56
+ self._suppress_next = False
57
+
58
+
26
59
  async def accumulate_and_relay_subprocess_streams(
27
60
  proc: asyncio.subprocess.Process,
28
61
  *,
29
62
  relay: bool,
63
+ verbose: bool = True,
30
64
  ) -> tuple[bytes, bytes]:
31
- """Read stdout and stderr until EOF; optionally copy each chunk verbatim to stderr."""
65
+ """Read stdout and stderr until EOF; optionally copy non-noise stderr chunks to stderr."""
32
66
  stdout = proc.stdout
33
67
  stderr = proc.stderr
34
68
  if stdout is None or stderr is None:
@@ -36,17 +70,29 @@ async def accumulate_and_relay_subprocess_streams(
36
70
 
37
71
  out_buf = bytearray()
38
72
  err_buf = bytearray()
73
+ filt = _AsyncLineFilter() if (relay and not verbose) else None
74
+
75
+ async def drain_stdout(reader: asyncio.StreamReader, target: bytearray) -> None:
76
+ while True:
77
+ chunk = await reader.read(65536)
78
+ if not chunk:
79
+ break
80
+ target.extend(chunk)
39
81
 
40
- async def drain(reader: asyncio.StreamReader, target: bytearray) -> None:
82
+ async def drain_stderr(reader: asyncio.StreamReader, target: bytearray) -> None:
41
83
  while True:
42
84
  chunk = await reader.read(65536)
43
85
  if not chunk:
44
86
  break
45
87
  target.extend(chunk)
46
- if relay:
88
+ if filt is not None:
89
+ filt.feed(chunk)
90
+ elif relay:
47
91
  sys.stderr.buffer.write(chunk)
48
92
  sys.stderr.buffer.flush()
49
93
 
50
- await asyncio.gather(drain(stdout, out_buf), drain(stderr, err_buf))
94
+ await asyncio.gather(drain_stdout(stdout, out_buf), drain_stderr(stderr, err_buf))
51
95
  await proc.wait()
96
+ if filt is not None:
97
+ filt.flush()
52
98
  return bytes(out_buf), bytes(err_buf)
@@ -19,7 +19,7 @@ YAML_CONFIG_FILENAMES = (".java-codebase-rag.yml", ".java-codebase-rag.yaml")
19
19
  LEGACY_YAML_FILENAMES = (".lancedb-mcp.yml", ".lancedb-mcp.yaml")
20
20
 
21
21
  ENV_INDEX_DIR = "JAVA_CODEBASE_RAG_INDEX_DIR"
22
- # Public operator contract is five names: INDEX_DIR, DEBUG_CONTEXT, RUN_HEAVY, SBERT_MODEL, SBERT_DEVICE.
22
+ # Public operator contract is six names: INDEX_DIR, DEBUG_CONTEXT, RUN_HEAVY, SBERT_MODEL, SBERT_DEVICE, HINTS_ENABLED.
23
23
  # SOURCE_ROOT is still required for MCP / subprocess Java tree resolution (see mcp.json.example); it is not folded into the headline "5".
24
24
  ENV_SOURCE_ROOT = "JAVA_CODEBASE_RAG_SOURCE_ROOT"
25
25
  ENV_DEBUG_CONTEXT = "JAVA_CODEBASE_RAG_DEBUG_CONTEXT"
@@ -146,9 +146,11 @@ class ResolvedOperatorConfig:
146
146
  cocoindex_db: Path
147
147
  embedding_model: str
148
148
  embedding_device: str | None
149
+ hints_enabled: bool
149
150
  index_dir_source: SettingSource
150
151
  embedding_model_source: SettingSource
151
152
  embedding_device_source: SettingSource
153
+ hints_enabled_source: SettingSource
152
154
 
153
155
  def apply_to_os_environ(self) -> None:
154
156
  """Make downstream modules (server, kuzu_queries, flows) see a consistent environment.
@@ -218,6 +220,29 @@ def _pick_optional_device(
218
220
  return None, "default"
219
221
 
220
222
 
223
+ def _pick_bool(
224
+ *,
225
+ env_key: str,
226
+ yaml_dict: dict[str, Any],
227
+ yaml_path: tuple[str, ...],
228
+ default: bool,
229
+ ) -> tuple[bool, SettingSource]:
230
+ env_raw = os.environ.get(env_key, "").strip().lower()
231
+ if env_raw in ("1", "true", "yes"):
232
+ return True, "env"
233
+ if env_raw in ("0", "false", "no"):
234
+ return False, "env"
235
+ cur: Any = yaml_dict
236
+ for part in yaml_path:
237
+ if not isinstance(cur, dict) or part not in cur:
238
+ cur = None
239
+ break
240
+ cur = cur.get(part)
241
+ if isinstance(cur, bool):
242
+ return cur, "yaml"
243
+ return default, "default"
244
+
245
+
221
246
  def _resolve_index_dir_path(
222
247
  *,
223
248
  source_root: Path,
@@ -270,6 +295,12 @@ def resolve_operator_config(
270
295
  env_key="SBERT_DEVICE",
271
296
  yaml_dict=yaml_dict,
272
297
  )
298
+ hints, hints_src = _pick_bool(
299
+ env_key="JAVA_CODEBASE_RAG_HINTS_ENABLED",
300
+ yaml_dict=yaml_dict,
301
+ yaml_path=("hints", "enabled"),
302
+ default=True,
303
+ )
273
304
  ku = index_dir / "code_graph.kuzu"
274
305
  coco = index_dir / "cocoindex.db"
275
306
  return ResolvedOperatorConfig(
@@ -279,9 +310,11 @@ def resolve_operator_config(
279
310
  cocoindex_db=coco,
280
311
  embedding_model=model,
281
312
  embedding_device=device,
313
+ hints_enabled=hints,
282
314
  index_dir_source=index_src,
283
315
  embedding_model_source=model_src,
284
316
  embedding_device_source=device_src,
317
+ hints_enabled_source=hints_src,
285
318
  )
286
319
 
287
320
 
@@ -2,13 +2,15 @@
2
2
  from __future__ import annotations
3
3
 
4
4
  import os
5
+ import shutil
5
6
  import subprocess
6
7
  import sys
7
8
  import threading
8
9
  import time
9
10
  from pathlib import Path
10
11
 
11
- from java_codebase_rag.cli_progress import emit_lance_cocoindex_finish, emit_lance_cocoindex_start
12
+ from java_codebase_rag.cli_format import Spinner, is_noise_line, stderr_is_tty
13
+ from java_codebase_rag.cli_progress import emit_vectors_finish, emit_vectors_start
12
14
 
13
15
  COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
14
16
 
@@ -18,14 +20,55 @@ def bundle_dir() -> Path:
18
20
 
19
21
 
20
22
  def cocoindex_bin() -> Path:
21
- return Path(sys.executable).parent / "cocoindex"
23
+ candidate = Path(sys.executable).parent / "cocoindex"
24
+ if candidate.is_file():
25
+ return candidate
26
+ found = shutil.which("cocoindex")
27
+ if found:
28
+ return Path(found)
29
+ return candidate
30
+
31
+
32
+ class _LineFilter:
33
+ """Buffer byte chunks and relay only non-noise lines to stderr."""
34
+
35
+ def __init__(self) -> None:
36
+ self._buf = bytearray()
37
+ self._suppress_next = False
38
+
39
+ def feed(self, chunk: bytes) -> None:
40
+ self._buf.extend(chunk)
41
+ while b"\n" in self._buf:
42
+ line, self._buf = self._buf.split(b"\n", 1)
43
+ line += b"\n"
44
+ noise = is_noise_line(line)
45
+ if noise:
46
+ self._suppress_next = True
47
+ continue
48
+ if self._suppress_next and line[:1] in (b" ", b"\t"):
49
+ continue
50
+ self._suppress_next = False
51
+ sys.stderr.buffer.write(line)
52
+ sys.stderr.buffer.flush()
53
+
54
+ def flush(self) -> None:
55
+ if self._buf:
56
+ if not is_noise_line(self._buf):
57
+ sys.stderr.buffer.write(bytes(self._buf))
58
+ sys.stderr.buffer.flush()
59
+ self._buf.clear()
60
+ self._suppress_next = False
22
61
 
23
62
 
24
- def _popen_stream_to_stderr(
63
+ def _popen_capturing_stderr(
25
64
  proc: subprocess.Popen[bytes],
65
+ *,
66
+ verbose: bool = True,
26
67
  ) -> tuple[str, str, int]:
68
+ """Capture stdout/stderr; relay stderr through noise filter (or verbatim in verbose mode)."""
27
69
  out_buf = bytearray()
28
70
  err_buf = bytearray()
71
+ filt = _LineFilter() if not verbose else None
29
72
 
30
73
  def drain_out() -> None:
31
74
  assert proc.stdout is not None
@@ -34,8 +77,6 @@ def _popen_stream_to_stderr(
34
77
  if not chunk:
35
78
  break
36
79
  out_buf.extend(chunk)
37
- sys.stderr.buffer.write(chunk)
38
- sys.stderr.buffer.flush()
39
80
 
40
81
  def drain_err() -> None:
41
82
  assert proc.stderr is not None
@@ -44,8 +85,11 @@ def _popen_stream_to_stderr(
44
85
  if not chunk:
45
86
  break
46
87
  err_buf.extend(chunk)
47
- sys.stderr.buffer.write(chunk)
48
- sys.stderr.buffer.flush()
88
+ if filt is not None:
89
+ filt.feed(chunk)
90
+ else:
91
+ sys.stderr.buffer.write(chunk)
92
+ sys.stderr.buffer.flush()
49
93
 
50
94
  t_out = threading.Thread(target=drain_out, name="stream-stdout", daemon=True)
51
95
  t_err = threading.Thread(target=drain_err, name="stream-stderr", daemon=True)
@@ -53,6 +97,8 @@ def _popen_stream_to_stderr(
53
97
  t_err.start()
54
98
  t_out.join()
55
99
  t_err.join()
100
+ if filt is not None:
101
+ filt.flush()
56
102
  code = proc.wait()
57
103
  return out_buf.decode(errors="replace"), err_buf.decode(errors="replace"), code
58
104
 
@@ -62,6 +108,7 @@ def run_cocoindex_update(
62
108
  *,
63
109
  full_reprocess: bool,
64
110
  quiet: bool,
111
+ verbose: bool = True,
65
112
  lance_project_root: Path | None = None,
66
113
  ) -> subprocess.CompletedProcess[str]:
67
114
  exe = cocoindex_bin()
@@ -70,7 +117,7 @@ def run_cocoindex_update(
70
117
  args=[str(exe)],
71
118
  returncode=127,
72
119
  stdout="",
73
- stderr=f"cocoindex not found next to Python: {exe}",
120
+ stderr=f"cocoindex not found: {exe}",
74
121
  )
75
122
  bd = bundle_dir()
76
123
  flow = bd / "java_index_flow_lancedb.py"
@@ -96,9 +143,14 @@ def run_cocoindex_update(
96
143
  text=True,
97
144
  )
98
145
 
99
- emit_lance = lance_project_root is not None
100
- if emit_lance:
101
- emit_lance_cocoindex_start(lance_project_root)
146
+ emit_progress = lance_project_root is not None
147
+ use_spinner = emit_progress and stderr_is_tty()
148
+ if emit_progress and not use_spinner:
149
+ emit_vectors_start()
150
+ spinner: Spinner | None = None
151
+ if use_spinner:
152
+ spinner = Spinner("[vectors] running · cocoindex update")
153
+ spinner.start()
102
154
  t0 = time.perf_counter()
103
155
  code = -1
104
156
  out_s, err_s = "", ""
@@ -111,10 +163,12 @@ def run_cocoindex_update(
111
163
  stderr=subprocess.PIPE,
112
164
  bufsize=0,
113
165
  )
114
- out_s, err_s, code = _popen_stream_to_stderr(proc)
166
+ out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
115
167
  finally:
116
- if emit_lance:
117
- emit_lance_cocoindex_finish(elapsed_s=time.perf_counter() - t0, exit_code=code)
168
+ if spinner is not None:
169
+ spinner.stop()
170
+ if emit_progress:
171
+ emit_vectors_finish(elapsed_s=time.perf_counter() - t0, exit_code=code)
118
172
  return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
119
173
 
120
174
 
@@ -125,7 +179,7 @@ def run_cocoindex_drop(env: dict[str, str], *, quiet: bool) -> subprocess.Comple
125
179
  args=[str(exe)],
126
180
  returncode=127,
127
181
  stdout="",
128
- stderr=f"cocoindex not found next to Python: {exe}",
182
+ stderr=f"cocoindex not found: {exe}",
129
183
  )
130
184
  bd = bundle_dir()
131
185
  cmd = [str(exe), "drop", COCOINDEX_TARGET, "-f"]
@@ -145,6 +199,7 @@ def run_build_ast_graph(
145
199
  source_root: Path,
146
200
  kuzu_path: Path,
147
201
  verbose: bool,
202
+ quiet: bool = False,
148
203
  env: dict[str, str] | None = None,
149
204
  ) -> subprocess.CompletedProcess[str]:
150
205
  builder = bundle_dir() / "build_ast_graph.py"
@@ -163,9 +218,12 @@ def run_build_ast_graph(
163
218
  "--kuzu-path",
164
219
  str(kuzu_path),
165
220
  ]
166
- if verbose:
221
+ # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
222
+ # Default passes --verbose so the builder emits per-pass progress lines,
223
+ # which the parent filters via _LineFilter. --verbose bypasses the filter.
224
+ if verbose or not quiet:
167
225
  cmd.append("--verbose")
168
- if not verbose:
226
+ if quiet:
169
227
  return subprocess.run(
170
228
  cmd,
171
229
  cwd=str(source_root),
@@ -181,7 +239,11 @@ def run_build_ast_graph(
181
239
  stderr=subprocess.PIPE,
182
240
  bufsize=0,
183
241
  )
184
- out_s, err_s, code = _popen_stream_to_stderr(proc)
242
+ out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
243
+ if not verbose:
244
+ from java_codebase_rag.cli_format import bold_cyan, styled_check, styled_cross
245
+ marker = styled_check() if code == 0 else styled_cross()
246
+ print(f"{marker} {bold_cyan('[graph]')} done", file=sys.stderr, flush=True)
185
247
  return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
186
248
 
187
249
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.2.2
3
+ Version: 0.3.1
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -27,7 +27,6 @@ Requires-Dist: pathspec<2,>=1.0.4
27
27
  Requires-Dist: pyarrow<24,>=23.0.1
28
28
  Requires-Dist: PyYAML<7,>=6.0.3
29
29
  Requires-Dist: sentence-transformers<6,>=5.4.0
30
- Requires-Dist: transformers<=5.5.3,>=4.48.3
31
30
  Requires-Dist: tree-sitter<0.26,>=0.25.2
32
31
  Requires-Dist: tree-sitter-java<0.24,>=0.23.5
33
32
  Requires-Dist: unidiff<1,>=0.7.3
@@ -1,6 +1,6 @@
1
- ast_java.py,sha256=QIldCwZVFlJUu3BwBjjoHYAhu5Eas4dxMaAb3MSBWDg,98174
1
+ ast_java.py,sha256=NGs34vhoSypfHbKnNRpA9aj-gO4P6bED3ASmDWEVsZk,98881
2
2
  brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
- build_ast_graph.py,sha256=jF3EzHxZUWOYfghI0RQHZskKmYOzBzi8eFebqlq4kNg,118019
3
+ build_ast_graph.py,sha256=oK2C94tZqCL6KVxOHkrXTLfeF29xWXuBDF49KQxCMZo,118133
4
4
  chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
5
  graph_enrich.py,sha256=2-njD2alm7FFpLn217ZG3f3ln-zqbdtGwTghOpd44oo,62021
6
6
  index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
@@ -9,19 +9,20 @@ java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
9
9
  java_ontology.py,sha256=nM-oY8_91rmUudv9hAss1AMus9BFY9s5tTpAWjlCz00,16424
10
10
  kuzu_queries.py,sha256=9bQzrU311AOw_BcUp_KSGiZgPVSaLSU7y63XfcT_vqI,90137
11
11
  mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
12
- mcp_v2.py,sha256=KfOWxAUpgVFVA2xaQT-DO1wqCybor1lAi6koavuQ0Jo,78721
12
+ mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
13
13
  path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
14
  pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
15
15
  search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
16
- server.py,sha256=DGuRC1lSa0tfzDG6Z_JdRWzuTITEZsXwAWonTTVfQzs,25993
16
+ server.py,sha256=6pw3g29o7SwVYmRZV0NxSc2d_eFg521LkUn9kUCzbJw,26470
17
17
  java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
- java_codebase_rag/cli.py,sha256=hCjlmAXkS80noTX_bxm6BMiLIYEz_P5xfrw9C7LvkBE,27678
19
- java_codebase_rag/cli_progress.py,sha256=Vtio3RqJ3LkRoNpxrv8iGbEiX4klkTlJX-mR4l6oeBM,1586
20
- java_codebase_rag/config.py,sha256=h07zJrV8QoLv9hIhJZ2JgUI0Rh6uPBZUiPkGDEmTg_w,11687
21
- java_codebase_rag/pipeline.py,sha256=QyKNCrBsjdFU71N9Xygti-DdtMQQsrZ8aySisux46lI,5311
22
- java_codebase_rag-0.2.2.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
23
- java_codebase_rag-0.2.2.dist-info/METADATA,sha256=VWpfMNxxjvuY2x-rJviWa4pv-OlkX7R93ew-IkFyzjM,15112
24
- java_codebase_rag-0.2.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
25
- java_codebase_rag-0.2.2.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
26
- java_codebase_rag-0.2.2.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
27
- java_codebase_rag-0.2.2.dist-info/RECORD,,
18
+ java_codebase_rag/cli.py,sha256=7nwrnXdRGZvRKMYcHJDR0CecYsiBt1Fu1RJwrQAIMV0,28518
19
+ java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
20
+ java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
21
+ java_codebase_rag/config.py,sha256=F6NtbRAlcs9M96bhrkQVeptOkvCFdd0rt_UJFKNiRfA,12633
22
+ java_codebase_rag/pipeline.py,sha256=p0u6yJlBYip2kr7LaCUYFHI4sv9inEgXpZTzcJK_rJ8,7583
23
+ java_codebase_rag-0.3.1.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
24
+ java_codebase_rag-0.3.1.dist-info/METADATA,sha256=o4nk9F_JRE6yX0Jv7O4A45c2vHPXIGVryjxPz8dWEDQ,15068
25
+ java_codebase_rag-0.3.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
26
+ java_codebase_rag-0.3.1.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
27
+ java_codebase_rag-0.3.1.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
28
+ java_codebase_rag-0.3.1.dist-info/RECORD,,
mcp_v2.py CHANGED
@@ -34,6 +34,18 @@ from kuzu_queries import KuzuGraph, OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
34
34
  from mcp_hints import generate_hints, MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION
35
35
  from search_lancedb import TABLES, run_search
36
36
 
37
+ # Module-level flag set by server.py at startup from resolved config.
38
+ _hints_enabled: bool = True
39
+
40
+
41
+ def set_hints_enabled(enabled: bool) -> None:
42
+ global _hints_enabled
43
+ _hints_enabled = enabled
44
+
45
+
46
+ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
47
+ return generate_hints(tool, payload) if _hints_enabled else ([], [])
48
+
37
49
  DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
38
50
 
39
51
  # Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
@@ -937,7 +949,7 @@ def search_v2(
937
949
  "limit": limit,
938
950
  "offset": offset,
939
951
  }
940
- raw_struct, raw_advisories = generate_hints("search", hint_payload)
952
+ raw_struct, raw_advisories = _hints_or_skip("search", hint_payload)
941
953
  return SearchOutput(
942
954
  success=True,
943
955
  results=hits,
@@ -1028,7 +1040,7 @@ def find_v2(
1028
1040
  "filter": filter_dump,
1029
1041
  "has_more_results": has_more_results,
1030
1042
  }
1031
- raw_struct, raw_advisories = generate_hints("find", hint_payload)
1043
+ raw_struct, raw_advisories = _hints_or_skip("find", hint_payload)
1032
1044
  return FindOutput(
1033
1045
  success=True,
1034
1046
  results=refs,
@@ -1108,7 +1120,7 @@ def describe_v2(
1108
1120
  f"java-codebase-rag unresolved-calls list --method-id {node_id} for the full list"
1109
1121
  )
1110
1122
  record = NodeRecord(id=ref.id, kind=kind, fqn=ref.fqn, data=data, edge_summary=edge_summary)
1111
- raw_struct, raw_advisories = generate_hints("describe", {"success": True, "record": record.model_dump()})
1123
+ raw_struct, raw_advisories = _hints_or_skip("describe", {"success": True, "record": record.model_dump()})
1112
1124
  return DescribeOutput(
1113
1125
  success=True,
1114
1126
  record=record,
@@ -1438,7 +1450,7 @@ def _resolve_finalize_success(
1438
1450
  "path_prefix_seed": path_prefix_seed,
1439
1451
  "target_service_seed": target_service_seed,
1440
1452
  }
1441
- raw_struct, raw_advisories = generate_hints("resolve", hint_payload)
1453
+ raw_struct, raw_advisories = _hints_or_skip("resolve", hint_payload)
1442
1454
  out = out.model_copy(update={
1443
1455
  "advisories": raw_advisories,
1444
1456
  "hints_structured": _to_structured_hints(raw_struct),
@@ -1972,7 +1984,7 @@ def neighbors_v2(
1972
1984
  "unresolved_count": unresolved_count,
1973
1985
  "calls_row_count": calls_row_count,
1974
1986
  }
1975
- raw_struct, raw_advisories = generate_hints("neighbors", neigh_payload)
1987
+ raw_struct, raw_advisories = _hints_or_skip("neighbors", neigh_payload)
1976
1988
  return NeighborsOutput(
1977
1989
  success=True,
1978
1990
  results=sliced,
server.py CHANGED
@@ -13,10 +13,10 @@ import mcp_v2
13
13
  from index_common import SBERT_MODEL
14
14
  from java_codebase_rag.cli_progress import (
15
15
  accumulate_and_relay_subprocess_streams,
16
- emit_lance_cocoindex_finish,
17
- emit_lance_cocoindex_start,
16
+ emit_vectors_finish,
17
+ emit_vectors_start,
18
18
  )
19
- from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env
19
+ from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env, resolve_operator_config
20
20
  from kuzu_queries import KuzuGraph, resolve_kuzu_path
21
21
  from mcp.server.fastmcp import FastMCP
22
22
  from pydantic import BaseModel, Field
@@ -201,7 +201,7 @@ def list_code_index_tables_payload() -> IndexInfoOutput:
201
201
  )
202
202
 
203
203
 
204
- async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
204
+ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) -> RefreshIndexOutput:
205
205
  root = _project_root()
206
206
  cocoindex_bin = Path(sys.executable).parent / "cocoindex"
207
207
  if not cocoindex_bin.is_file():
@@ -245,7 +245,7 @@ async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
245
245
  phases_run=[],
246
246
  )
247
247
  else:
248
- emit_lance_cocoindex_start(root)
248
+ emit_vectors_start()
249
249
  t0 = time.perf_counter()
250
250
  code_c = -1
251
251
  try:
@@ -260,7 +260,7 @@ async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
260
260
  stdout=asyncio.subprocess.PIPE,
261
261
  stderr=asyncio.subprocess.PIPE,
262
262
  )
263
- out_b, err_b = await accumulate_and_relay_subprocess_streams(proc, relay=True)
263
+ out_b, err_b = await accumulate_and_relay_subprocess_streams(proc, relay=True, verbose=verbose)
264
264
  code_c = proc.returncode if proc.returncode is not None else -1
265
265
  except Exception as exc:
266
266
  return RefreshIndexOutput(
@@ -269,7 +269,7 @@ async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
269
269
  phases_run=[],
270
270
  )
271
271
  finally:
272
- emit_lance_cocoindex_finish(elapsed_s=time.perf_counter() - t0, exit_code=code_c)
272
+ emit_vectors_finish(elapsed_s=time.perf_counter() - t0, exit_code=code_c)
273
273
  assert proc is not None
274
274
  out = out_b.decode(errors="replace")
275
275
  err = err_b.decode(errors="replace")
@@ -279,6 +279,8 @@ async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
279
279
  graph_out = ""
280
280
  graph_err = ""
281
281
  if ok:
282
+ if not quiet:
283
+ print(file=sys.stderr, flush=True)
282
284
  builder = Path(__file__).resolve().parent / "build_ast_graph.py"
283
285
  if builder.is_file():
284
286
  try:
@@ -303,7 +305,7 @@ async def run_refresh_pipeline(*, quiet: bool = False) -> RefreshIndexOutput:
303
305
  if quiet:
304
306
  gout_b, gerr_b = await gproc.communicate()
305
307
  else:
306
- gout_b, gerr_b = await accumulate_and_relay_subprocess_streams(gproc, relay=True)
308
+ gout_b, gerr_b = await accumulate_and_relay_subprocess_streams(gproc, relay=True, verbose=verbose)
307
309
  graph_code = gproc.returncode
308
310
  graph_out = gout_b.decode(errors="replace")
309
311
  graph_err = gerr_b.decode(errors="replace")
@@ -570,6 +572,14 @@ def create_mcp_server() -> FastMCP:
570
572
 
571
573
  def main() -> None:
572
574
  emit_legacy_env_hints_if_present()
575
+
576
+ # Load YAML config and apply embedding settings to environment
577
+ # This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available
578
+ # before any tool handler runs (same behavior as CLI path)
579
+ cfg = resolve_operator_config(source_root=_project_root())
580
+ cfg.apply_to_os_environ()
581
+ mcp_v2.set_hints_enabled(cfg.hints_enabled)
582
+
573
583
  asyncio.run(create_mcp_server().run_stdio_async())
574
584
 
575
585