java-codebase-rag 0.2.2__tar.gz → 0.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. {java_codebase_rag-0.2.2/java_codebase_rag.egg-info → java_codebase_rag-0.3.1}/PKG-INFO +1 -2
  2. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/ast_java.py +19 -1
  3. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/build_ast_graph.py +26 -26
  4. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag/cli.py +43 -22
  5. java_codebase_rag-0.3.1/java_codebase_rag/cli_format.py +112 -0
  6. java_codebase_rag-0.3.1/java_codebase_rag/cli_progress.py +98 -0
  7. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag/config.py +34 -1
  8. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag/pipeline.py +80 -18
  9. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1/java_codebase_rag.egg-info}/PKG-INFO +1 -2
  10. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag.egg-info/SOURCES.txt +1 -0
  11. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag.egg-info/requires.txt +0 -1
  12. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/mcp_v2.py +17 -5
  13. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/pyproject.toml +1 -2
  14. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/server.py +18 -8
  15. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_agent_skills_static.py +3 -3
  16. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_cli_progress_stdout_invariant.py +22 -7
  17. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_cli_quiet_parity.py +7 -7
  18. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_client_role_rename.py +4 -2
  19. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_java_codebase_rag_cli.py +89 -2
  20. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_mcp_hints.py +26 -3
  21. java_codebase_rag-0.2.2/java_codebase_rag/cli_progress.py +0 -52
  22. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/LICENSE +0 -0
  23. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/README.md +0 -0
  24. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/brownfield_events.py +0 -0
  25. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/chunk_heuristics.py +0 -0
  26. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/graph_enrich.py +0 -0
  27. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/index_common.py +0 -0
  28. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag/__init__.py +0 -0
  29. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
  30. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag.egg-info/entry_points.txt +0 -0
  31. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_codebase_rag.egg-info/top_level.txt +0 -0
  32. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_index_flow_lancedb.py +0 -0
  33. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_index_v1_common.py +0 -0
  34. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/java_ontology.py +0 -0
  35. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/kuzu_queries.py +0 -0
  36. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/mcp_hints.py +0 -0
  37. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/path_filtering.py +0 -0
  38. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/pr_analysis.py +0 -0
  39. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/search_lancedb.py +0 -0
  40. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/setup.cfg +0 -0
  41. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_assign_endpoint_client_extraction.py +0 -0
  42. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_ast_graph_build.py +0 -0
  43. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_ast_java_calls.py +0 -0
  44. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_ast_java_capabilities.py +0 -0
  45. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_bank_chat_brownfield_integration.py +0 -0
  46. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_brownfield_clients.py +0 -0
  47. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_brownfield_events.py +0 -0
  48. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_brownfield_overrides.py +0 -0
  49. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_brownfield_routes.py +0 -0
  50. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_call_edge_matching.py +0 -0
  51. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_call_edges_e2e.py +0 -0
  52. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_call_graph_receiver_resolution.py +0 -0
  53. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_call_graph_smoke_roundtrip.py +0 -0
  54. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_call_invariant.py +0 -0
  55. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_client_hint_recovery.py +0 -0
  56. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_client_node_extraction.py +0 -0
  57. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_cross_service_resolution_flag.py +0 -0
  58. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_edge_navigation_doc.py +0 -0
  59. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_feign_not_exposer.py +0 -0
  60. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_graph_enrich.py +0 -0
  61. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_kuzu_queries.py +0 -0
  62. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_lancedb_e2e.py +0 -0
  63. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_mcp_tools.py +0 -0
  64. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_mcp_v2.py +0 -0
  65. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_mcp_v2_compose.py +0 -0
  66. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_meta_chain_core.py +0 -0
  67. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_outgoing_call_extraction.py +0 -0
  68. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_packaging_metadata.py +0 -0
  69. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_path_filtering.py +0 -0
  70. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_pr_analysis.py +0 -0
  71. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
  72. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_route_extraction.py +0 -0
  73. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_schema_consistency.py +0 -0
  74. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_search_lancedb.py +0 -0
  75. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_search_lancedb_capability.py +0 -0
  76. {java_codebase_rag-0.2.2 → java_codebase_rag-0.3.1}/tests/test_string_value_atoms.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.2.2
3
+ Version: 0.3.1
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -27,7 +27,6 @@ Requires-Dist: pathspec<2,>=1.0.4
27
27
  Requires-Dist: pyarrow<24,>=23.0.1
28
28
  Requires-Dist: PyYAML<7,>=6.0.3
29
29
  Requires-Dist: sentence-transformers<6,>=5.4.0
30
- Requires-Dist: transformers<=5.5.3,>=4.48.3
31
30
  Requires-Dist: tree-sitter<0.26,>=0.25.2
32
31
  Requires-Dist: tree-sitter-java<0.24,>=0.23.5
33
32
  Requires-Dist: unidiff<1,>=0.7.3
@@ -83,7 +83,7 @@ _DTO_LOMBOK_ANNOTATIONS: frozenset[str] = frozenset({
83
83
  # Phase 11: `EDGE_SCHEMA` in `java_ontology.py` (canonical edge navigation schema; v14 re-index).
84
84
  # Phase 12: CALLS `callee_declaring_role`, supertype-walk dedup, pass3 unresolved counters (v15 re-index).
85
85
  # Bumps whenever extraction / enrichment semantics change.
86
- ONTOLOGY_VERSION = 15
86
+ ONTOLOGY_VERSION = 16
87
87
 
88
88
  ROLE_ANNOTATIONS: dict[str, str] = {
89
89
  # Spring Web
@@ -2732,6 +2732,19 @@ def infer_role(annotation_names: Iterable[str]) -> str:
2732
2732
  return "OTHER"
2733
2733
 
2734
2734
 
2735
+ def _type_injects_messaging(type_decl: "TypeDecl") -> bool:
2736
+ """True when the type injects a messaging template via field or constructor."""
2737
+ for fld in type_decl.fields:
2738
+ if fld.type_name in _INJECTED_TYPES_TO_CAPABILITY:
2739
+ return True
2740
+ for method in type_decl.methods:
2741
+ if method.is_constructor:
2742
+ for p in method.parameters:
2743
+ if p.type_name in _INJECTED_TYPES_TO_CAPABILITY:
2744
+ return True
2745
+ return False
2746
+
2747
+
2735
2748
  def infer_role_for_type(type_decl: "TypeDecl") -> str:
2736
2749
  """Role inference that also detects DTO-like passive data carriers.
2737
2750
 
@@ -2763,6 +2776,11 @@ def infer_role_for_type(type_decl: "TypeDecl") -> str:
2763
2776
  if name.endswith(suffix) and name != suffix:
2764
2777
  return "DTO"
2765
2778
 
2779
+ # Types injecting messaging templates are outbound callers (CLIENT role),
2780
+ # symmetric with CONTROLLER covering both HTTP and messaging inbound.
2781
+ if _type_injects_messaging(type_decl):
2782
+ return "CLIENT"
2783
+
2766
2784
  return "OTHER"
2767
2785
 
2768
2786
 
@@ -70,13 +70,13 @@ log = logging.getLogger(__name__)
70
70
 
71
71
  _VERBOSE_STDERR_LOCK = threading.Lock()
72
72
 
73
- _PASS1_START = "[pass1] starting · parsing Java files under source root"
74
- _PASS2_START = "[pass2] starting · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
75
- _PASS3_START = "[pass3] starting · call resolution (outgoing calls per site)"
76
- _PASS4_START = "[pass4] starting · route and EXPOSES extraction"
77
- _PASS5_START = "[pass5] starting · imperative HTTP_CALLS / ASYNC_CALLS edges"
78
- _PASS6_START = "[pass6] starting · cross-service call-edge matching"
79
- _WRITE_START = "[write] starting · writing Kuzu graph to disk"
73
+ _PASS1_START = "[graph] pass 1 · parsing Java files"
74
+ _PASS2_START = "[graph] pass 2 · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
75
+ _PASS3_START = "[graph] pass 3 · call resolution (outgoing calls per site)"
76
+ _PASS4_START = "[graph] pass 4 · route and EXPOSES extraction"
77
+ _PASS5_START = "[graph] pass 5 · imperative HTTP_CALLS / ASYNC_CALLS edges"
78
+ _PASS6_START = "[graph] pass 6 · cross-service call-edge matching"
79
+ _WRITE_START = "[graph] writing · Kuzu graph to disk"
80
80
 
81
81
 
82
82
  def _verbose_stderr_line(content: str) -> None:
@@ -104,7 +104,7 @@ class _VerbosePassHeartbeats:
104
104
  t0 = time.monotonic()
105
105
  while not stop.wait(timeout=5.0):
106
106
  elapsed = int(time.monotonic() - t0)
107
- _verbose_stderr_line(f"{tag} running {elapsed}s elapsed")
107
+ _verbose_stderr_line(f"{tag} · {elapsed}s elapsed")
108
108
 
109
109
  self._thr = threading.Thread(target=worker, name=f"hb-{tag}", daemon=True)
110
110
  self._thr.start()
@@ -476,7 +476,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
476
476
  slow_sec = float(raw_slow)
477
477
  except ValueError:
478
478
  slow_sec = 0.0
479
- with _VerbosePassHeartbeats("[pass1]", verbose=verbose):
479
+ with _VerbosePassHeartbeats("[graph] pass 1", verbose=verbose):
480
480
  if verbose and slow_sec > 0:
481
481
  time.sleep(slow_sec)
482
482
  for p in iter_java_source_files(root, ignore=ignore):
@@ -521,7 +521,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
521
521
  if verbose:
522
522
  elapsed = time.time() - t0
523
523
  _verbose_stderr_line(
524
- f"[pass1] parsed {n_files} files in {elapsed:.2f}s: "
524
+ f"[graph] pass 1 · parsed {n_files} files in {elapsed:.2f}s: "
525
525
  f"{len(tables.types)} types, {len(tables.members)} members, "
526
526
  f"{tables.parse_errors} parse errors, {tables.skipped_files} skipped",
527
527
  )
@@ -759,7 +759,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
759
759
  seen_inj: set[tuple[str, str, str, str]] = set()
760
760
  if verbose:
761
761
  _verbose_stderr_line(_PASS2_START)
762
- with _VerbosePassHeartbeats("[pass2]", verbose=verbose):
762
+ with _VerbosePassHeartbeats("[graph] pass 2", verbose=verbose):
763
763
  for fqn, entry in tables.types.items():
764
764
  ast = asts.get(entry.file_path)
765
765
  if ast is None:
@@ -769,7 +769,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
769
769
  if verbose:
770
770
  elapsed = time.time() - t0
771
771
  _verbose_stderr_line(
772
- f"[pass2] emitted {len(tables.extends_rows)} EXTENDS, "
772
+ f"[graph] pass 2 · emitted {len(tables.extends_rows)} EXTENDS, "
773
773
  f"{len(tables.implements_rows)} IMPLEMENTS, "
774
774
  f"{len(tables.injects_rows)} INJECTS, "
775
775
  f"{len(tables.phantoms)} phantoms in {elapsed:.2f}s",
@@ -1432,7 +1432,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
1432
1432
  _verbose_stderr_line(_PASS3_START)
1433
1433
  _build_member_indexes(tables)
1434
1434
  stats = CallResolutionStats()
1435
- with _VerbosePassHeartbeats("[pass3]", verbose=verbose):
1435
+ with _VerbosePassHeartbeats("[graph] pass 3", verbose=verbose):
1436
1436
  for rel_path, file_ast in asts.items():
1437
1437
  try:
1438
1438
  _process_file_calls(file_ast, rel_path, tables, stats)
@@ -1455,7 +1455,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
1455
1455
  )
1456
1456
  log.info(msg)
1457
1457
  if verbose:
1458
- _verbose_stderr_line(f"[pass3] {msg}")
1458
+ _verbose_stderr_line(f"[graph] pass 3 · {msg}")
1459
1459
 
1460
1460
 
1461
1461
  _PATH_VAR_SEG = re.compile(r"^\{([^:{}]+)(?::([^}]*))?\}$") # whole path segment
@@ -1586,7 +1586,7 @@ def pass4_routes(
1586
1586
  meta_chain = collect_annotation_meta_chain(prs)
1587
1587
  if verbose:
1588
1588
  _verbose_stderr_line(_PASS4_START)
1589
- with _VerbosePassHeartbeats("[pass4]", verbose=verbose):
1589
+ with _VerbosePassHeartbeats("[graph] pass 4", verbose=verbose):
1590
1590
 
1591
1591
  for ast in asts.values():
1592
1592
  stats.routes_skipped_unresolved += ast.routes_skipped_unresolved
@@ -1710,7 +1710,7 @@ def pass4_routes(
1710
1710
  )
1711
1711
  log.info(msg)
1712
1712
  if verbose:
1713
- _verbose_stderr_line(f"[pass4] {msg}")
1713
+ _verbose_stderr_line(f"[graph] pass 4 · {msg}")
1714
1714
 
1715
1715
 
1716
1716
  def pass5_imperative_edges(
@@ -1763,7 +1763,7 @@ def pass5_imperative_edges(
1763
1763
 
1764
1764
  if verbose:
1765
1765
  _verbose_stderr_line(_PASS5_START)
1766
- with _VerbosePassHeartbeats("[pass5]", verbose=verbose):
1766
+ with _VerbosePassHeartbeats("[graph] pass 5", verbose=verbose):
1767
1767
  for member in sorted(tables.members, key=lambda x: x.node_id):
1768
1768
  if member.decl.is_constructor:
1769
1769
  continue
@@ -2018,7 +2018,7 @@ def pass5_imperative_edges(
2018
2018
  http_strategy = dict(sorted(tables.call_edge_stats.http_calls_by_strategy.items()))
2019
2019
  async_strategy = dict(sorted(tables.call_edge_stats.async_calls_by_strategy.items()))
2020
2020
  _verbose_stderr_line(
2021
- f"[pass5] HTTP_CALLS: {len(tables.http_call_rows)} edges, "
2021
+ f"[graph] pass 5 · HTTP_CALLS: {len(tables.http_call_rows)} edges, "
2022
2022
  f"ASYNC_CALLS: {len(tables.async_call_rows)} edges; "
2023
2023
  f"http_by_client_kind={http_client}, async_by_client_kind={async_client}, "
2024
2024
  f"http_by_strategy={http_strategy}, async_by_strategy={async_strategy}",
@@ -2165,7 +2165,7 @@ def pass6_match_edges(
2165
2165
 
2166
2166
  if verbose:
2167
2167
  _verbose_stderr_line(_PASS6_START)
2168
- with _VerbosePassHeartbeats("[pass6]", verbose=verbose):
2168
+ with _VerbosePassHeartbeats("[graph] pass 6", verbose=verbose):
2169
2169
  for row in tables.http_call_rows:
2170
2170
  if row.match != "unresolved":
2171
2171
  continue
@@ -2317,14 +2317,14 @@ def pass6_match_edges(
2317
2317
  first_http = ", ".join(suppressed_auto_cross_http)
2318
2318
  first_async = ", ".join(suppressed_auto_cross_async)
2319
2319
  _verbose_stderr_line(
2320
- f"[pass6] cross_service_resolution=brownfield_only:\n"
2320
+ f"[graph] pass 6 · cross_service_resolution=brownfield_only:\n"
2321
2321
  f" {n_bf} cross_service edges from brownfield layers,\n"
2322
2322
  f" {suppressed_auto_cross_count} auto-cross-service candidates suppressed -> unresolved\n"
2323
2323
  f" (first 5 http: {first_http})\n"
2324
2324
  f" (first 5 async: {first_async})",
2325
2325
  )
2326
2326
  _verbose_stderr_line(
2327
- f"[pass6] http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
2327
+ f"[graph] pass 6 · http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
2328
2328
  f"async_match={dict(sorted(tables.call_edge_stats.async_calls_match_breakdown.items()))}, "
2329
2329
  f"cross_service_calls_total={tables.call_edge_stats.cross_service_calls_total}",
2330
2330
  )
@@ -3004,7 +3004,7 @@ def write_kuzu(
3004
3004
  )
3005
3005
  if verbose:
3006
3006
  _verbose_stderr_line(_WRITE_START)
3007
- with _VerbosePassHeartbeats("[write]", verbose=verbose):
3007
+ with _VerbosePassHeartbeats("[graph] writing", verbose=verbose):
3008
3008
  db_path.parent.mkdir(parents=True, exist_ok=True)
3009
3009
  db = kuzu.Database(str(db_path))
3010
3010
  conn = kuzu.Connection(db)
@@ -3018,17 +3018,17 @@ def write_kuzu(
3018
3018
  meta_chain=meta_chain,
3019
3019
  )
3020
3020
  if verbose:
3021
- _verbose_stderr_line(f"[write] nodes written in {time.time() - t0:.2f}s")
3021
+ _verbose_stderr_line(f"[graph] writing · nodes written in {time.time() - t0:.2f}s")
3022
3022
  _populate_declares_rows(tables)
3023
3023
  _populate_overrides_rows(tables)
3024
3024
  t1 = time.time()
3025
3025
  _write_edges(conn, tables)
3026
3026
  if verbose:
3027
- _verbose_stderr_line(f"[write] edges written in {time.time() - t1:.2f}s")
3027
+ _verbose_stderr_line(f"[graph] writing · edges written in {time.time() - t1:.2f}s")
3028
3028
  t2 = time.time()
3029
3029
  _write_routes_and_exposes(conn, tables)
3030
3030
  if verbose:
3031
- _verbose_stderr_line(f"[write] routes/exposes written in {time.time() - t2:.2f}s")
3031
+ _verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s")
3032
3032
  _write_meta(conn, tables, source_root)
3033
3033
  conn.close()
3034
3034
 
@@ -3073,7 +3073,7 @@ def main() -> int:
3073
3073
  pass6_match_edges(tables, verbose=args.verbose)
3074
3074
  write_kuzu(kuzu_path, tables, source_root=root, verbose=args.verbose)
3075
3075
  if args.verbose:
3076
- _verbose_stderr_line(f"[done] kuzu at {kuzu_path}")
3076
+ _verbose_stderr_line(f"[graph] done · kuzu at {kuzu_path}")
3077
3077
  return 0
3078
3078
 
3079
3079
 
@@ -100,19 +100,25 @@ _PIPELINE_SEP = "\u00b7"
100
100
 
101
101
 
102
102
  def _pipeline_header(subcommand: str, cfg: ResolvedOperatorConfig) -> None:
103
+ from java_codebase_rag.cli_format import bold
104
+
103
105
  root = cfg.source_root.resolve()
104
106
  idx = cfg.index_dir.resolve()
105
107
  print(
106
- f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}",
108
+ bold(f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}"),
107
109
  file=sys.stderr,
108
110
  flush=True,
109
111
  )
110
112
 
111
113
 
112
114
  def _pipeline_footer(subcommand: str, started: float, exit_code: int) -> None:
115
+ from java_codebase_rag.cli_format import bold, styled_check, styled_cross
116
+
113
117
  elapsed = time.perf_counter() - started
118
+ marker = styled_check() if exit_code == 0 else styled_cross()
114
119
  print(
115
- f"java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s (exit={exit_code})",
120
+ f"{marker} {bold(f'java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s')}"
121
+ + (f" (exit={exit_code})" if exit_code != 0 else ""),
116
122
  file=sys.stderr,
117
123
  flush=True,
118
124
  )
@@ -205,6 +211,22 @@ def _add_index_embedding_flags(p: argparse.ArgumentParser) -> None:
205
211
  p.add_argument("--embedding-device", type=str, default=None, help="Override SBERT_DEVICE / YAML embedding.device")
206
212
 
207
213
 
214
+ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
215
+ g = p.add_mutually_exclusive_group()
216
+ g.add_argument(
217
+ "--quiet", "-q",
218
+ action="store_true",
219
+ dest="quiet",
220
+ help="Suppress stderr progress relay; stdout payload unchanged.",
221
+ )
222
+ g.add_argument(
223
+ "--verbose", "-v",
224
+ action="store_true",
225
+ dest="verbose",
226
+ help="Show full subprocess output (Lance warnings, brownfield events, progress bars).",
227
+ )
228
+
229
+
208
230
  def _cmd_init(args: argparse.Namespace) -> int:
209
231
  cfg = _resolved_from_ns(args)
210
232
  _startup_hints(cfg)
@@ -227,10 +249,12 @@ def _cmd_init(args: argparse.Namespace) -> int:
227
249
 
228
250
  def work() -> int:
229
251
  env = cfg.subprocess_env()
252
+ verbose = bool(args.verbose)
230
253
  coco = run_cocoindex_update(
231
254
  env,
232
255
  full_reprocess=False,
233
256
  quiet=bool(args.quiet),
257
+ verbose=verbose,
234
258
  lance_project_root=None if args.quiet else cfg.source_root,
235
259
  )
236
260
  if coco.returncode != 0:
@@ -244,10 +268,13 @@ def _cmd_init(args: argparse.Namespace) -> int:
244
268
  }
245
269
  )
246
270
  return 1
271
+ if not args.quiet:
272
+ print(file=sys.stderr, flush=True)
247
273
  g = run_build_ast_graph(
248
274
  source_root=cfg.source_root,
249
275
  kuzu_path=cfg.kuzu_path,
250
- verbose=not args.quiet,
276
+ verbose=verbose,
277
+ quiet=bool(args.quiet),
251
278
  env=env,
252
279
  )
253
280
  if g.returncode != 0:
@@ -279,6 +306,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
279
306
  env,
280
307
  full_reprocess=False,
281
308
  quiet=bool(args.quiet),
309
+ verbose=bool(args.verbose),
282
310
  lance_project_root=None if args.quiet else cfg.source_root,
283
311
  )
284
312
  if coco.returncode != 0:
@@ -305,11 +333,12 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
305
333
 
306
334
  def work() -> int:
307
335
  env = cfg.subprocess_env()
336
+ verbose = bool(args.verbose)
308
337
  vectors_only = bool(getattr(args, "vectors_only", False))
309
338
  graph_only = bool(getattr(args, "graph_only", False))
310
339
 
311
340
  if vectors_only:
312
- coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet))
341
+ coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet), verbose=verbose)
313
342
  if _is_cocoindex_preflight_blocker(coco):
314
343
  payload: dict[str, Any] = {
315
344
  "success": False,
@@ -345,7 +374,8 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
345
374
  g = run_build_ast_graph(
346
375
  source_root=cfg.source_root,
347
376
  kuzu_path=cfg.kuzu_path,
348
- verbose=not args.quiet,
377
+ verbose=verbose,
378
+ quiet=bool(args.quiet),
349
379
  env=env,
350
380
  )
351
381
  if _is_graph_preflight_blocker(g):
@@ -381,7 +411,7 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
381
411
 
382
412
  import server # lazy: pulls sentence_transformers/torch/lancedb/kuzu
383
413
 
384
- result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet)))
414
+ result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet), verbose=verbose))
385
415
  payload = result.model_dump()
386
416
  _emit_reprocess_outcome(payload)
387
417
  return _reprocess_exit_code(payload)
@@ -473,6 +503,8 @@ def _cmd_meta(args: argparse.Namespace) -> int:
473
503
  payload["index_dir"] = str(cfg.index_dir.resolve())
474
504
  payload["kuzu_path"] = str(cfg.kuzu_path.resolve())
475
505
  payload["index_dir_source"] = cfg.index_dir_source
506
+ payload["hints_enabled"] = cfg.hints_enabled
507
+ payload["hints_enabled_source"] = cfg.hints_enabled_source
476
508
  _emit(payload)
477
509
  return 0 if payload.get("success") else 2
478
510
 
@@ -612,11 +644,7 @@ def build_parser() -> argparse.ArgumentParser:
612
644
  ),
613
645
  )
614
646
  _add_index_embedding_flags(init)
615
- init.add_argument(
616
- "--quiet",
617
- action="store_true",
618
- help="Suppress stderr progress relay; stdout payload unchanged.",
619
- )
647
+ _add_verbosity_flags(init)
620
648
  init.set_defaults(handler=_cmd_init)
621
649
 
622
650
  increment = subparsers.add_parser(
@@ -625,11 +653,7 @@ def build_parser() -> argparse.ArgumentParser:
625
653
  description="Runs cocoindex catch-up (no full reprocess). Does not rebuild Kuzu; see stderr warning.",
626
654
  )
627
655
  _add_index_embedding_flags(increment)
628
- increment.add_argument(
629
- "--quiet",
630
- action="store_true",
631
- help="Suppress stderr progress relay; stdout payload unchanged.",
632
- )
656
+ _add_verbosity_flags(increment)
633
657
  increment.set_defaults(handler=_cmd_increment)
634
658
 
635
659
  reprocess = subparsers.add_parser(
@@ -641,11 +665,7 @@ def build_parser() -> argparse.ArgumentParser:
641
665
  ),
642
666
  )
643
667
  _add_index_embedding_flags(reprocess)
644
- reprocess.add_argument(
645
- "--quiet",
646
- action="store_true",
647
- help="Suppress stderr progress relay; stdout payload unchanged.",
648
- )
668
+ _add_verbosity_flags(reprocess)
649
669
  _rex = reprocess.add_mutually_exclusive_group()
650
670
  _rex.add_argument(
651
671
  "--vectors-only",
@@ -667,8 +687,9 @@ def build_parser() -> argparse.ArgumentParser:
667
687
  _add_index_embedding_flags(erase)
668
688
  erase.add_argument("--yes", action="store_true", help="Confirm destructive deletion (required in CI)")
669
689
  erase.add_argument(
670
- "--quiet",
690
+ "--quiet", "-q",
671
691
  action="store_true",
692
+ dest="quiet",
672
693
  help="Suppress stderr progress relay; stdout payload unchanged.",
673
694
  )
674
695
  erase.set_defaults(handler=_cmd_erase)
@@ -0,0 +1,112 @@
1
+ """TTY-aware ANSI formatting for CLI stderr progress."""
2
+ from __future__ import annotations
3
+
4
+ import itertools
5
+ import sys
6
+ import threading
7
+ import time
8
+
9
+ _RESET = "\033[0m"
10
+ _BOLD = "\033[1m"
11
+ _DIM = "\033[2m"
12
+ _GREEN = "\033[32m"
13
+ _RED = "\033[31m"
14
+ _CYAN = "\033[36m"
15
+
16
+ CHECK = "✓"
17
+ CROSS = "✗"
18
+
19
+ _SPINNER_FRAMES = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
20
+
21
+ _NOISE_CONTAINS: tuple[bytes, ...] = (
22
+ b"lance::",
23
+ b"FutureWarning",
24
+ b"Loading weights:",
25
+ b'"event": "brownfield-',
26
+ b"unknown producer source strategy",
27
+ b"unknown client source strategy",
28
+ )
29
+
30
+
31
+ def is_noise_line(line: bytes) -> bool:
32
+ return any(p in line for p in _NOISE_CONTAINS)
33
+
34
+
35
+ def stderr_is_tty() -> bool:
36
+ return hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
37
+
38
+
39
+ def _styled(text: str, *codes: str) -> str:
40
+ if not stderr_is_tty():
41
+ return text
42
+ return "".join(codes) + text + _RESET
43
+
44
+
45
+ def bold(text: str) -> str:
46
+ return _styled(text, _BOLD)
47
+
48
+
49
+ def dim(text: str) -> str:
50
+ return _styled(text, _DIM)
51
+
52
+
53
+ def green(text: str) -> str:
54
+ return _styled(text, _GREEN)
55
+
56
+
57
+ def red(text: str) -> str:
58
+ return _styled(text, _RED)
59
+
60
+
61
+ def cyan(text: str) -> str:
62
+ return _styled(text, _CYAN)
63
+
64
+
65
+ def bold_green(text: str) -> str:
66
+ return _styled(text, _BOLD, _GREEN)
67
+
68
+
69
+ def bold_red(text: str) -> str:
70
+ return _styled(text, _BOLD, _RED)
71
+
72
+
73
+ def bold_cyan(text: str) -> str:
74
+ return _styled(text, _BOLD, _CYAN)
75
+
76
+
77
+ def styled_check() -> str:
78
+ return green(CHECK) if stderr_is_tty() else CHECK
79
+
80
+
81
+ def styled_cross() -> str:
82
+ return red(CROSS) if stderr_is_tty() else CROSS
83
+
84
+
85
+ class Spinner:
86
+ """Braille spinner that overwrites the current stderr line until stopped."""
87
+
88
+ def __init__(self, label: str) -> None:
89
+ self._label = label
90
+ self._stop = threading.Event()
91
+ self._thread: threading.Thread | None = None
92
+
93
+ def start(self) -> None:
94
+ self._thread = threading.Thread(target=self._run, name="spinner", daemon=True)
95
+ self._thread.start()
96
+
97
+ def stop(self) -> None:
98
+ self._stop.set()
99
+ if self._thread is not None:
100
+ self._thread.join(timeout=2.0)
101
+ sys.stderr.buffer.write(b"\r\x1b[2K")
102
+ sys.stderr.buffer.flush()
103
+
104
+ def _run(self) -> None:
105
+ frames = itertools.cycle(_SPINNER_FRAMES)
106
+ t0 = time.monotonic()
107
+ while not self._stop.wait(0.3):
108
+ elapsed = time.monotonic() - t0
109
+ frame = next(frames)
110
+ line = f"\r{frame} {self._label} · {elapsed:.0f}s"
111
+ sys.stderr.buffer.write(line.encode())
112
+ sys.stderr.buffer.flush()
@@ -0,0 +1,98 @@
1
+ """CLI-owned stderr progress lines (shared by server reprocess path and pipeline helpers)."""
2
+ from __future__ import annotations
3
+
4
+ import asyncio
5
+ import sys
6
+
7
+ from java_codebase_rag.cli_format import bold_cyan, is_noise_line, styled_check, styled_cross
8
+
9
+
10
+ def emit_vectors_start() -> None:
11
+ print(
12
+ bold_cyan("[vectors]") + " running · cocoindex update",
13
+ file=sys.stderr,
14
+ flush=True,
15
+ )
16
+
17
+
18
+ def emit_vectors_finish(*, elapsed_s: float, exit_code: int) -> None:
19
+ marker = styled_check() if exit_code == 0 else styled_cross()
20
+ print(
21
+ f"{marker} {bold_cyan('[vectors]')} finished · {elapsed_s:.2f}s"
22
+ + (f" (exit={exit_code})" if exit_code != 0 else ""),
23
+ file=sys.stderr,
24
+ flush=True,
25
+ )
26
+
27
+
28
+ class _AsyncLineFilter:
29
+ """Buffers byte chunks and relays only non-noise lines to stderr (async drain path)."""
30
+
31
+ def __init__(self) -> None:
32
+ self._buf = bytearray()
33
+ self._suppress_next = False
34
+
35
+ def feed(self, chunk: bytes) -> None:
36
+ self._buf.extend(chunk)
37
+ while b"\n" in self._buf:
38
+ line, self._buf = self._buf.split(b"\n", 1)
39
+ line += b"\n"
40
+ noise = is_noise_line(line)
41
+ if noise:
42
+ self._suppress_next = True
43
+ continue
44
+ if self._suppress_next and line[:1] in (b" ", b"\t"):
45
+ continue
46
+ self._suppress_next = False
47
+ sys.stderr.buffer.write(line)
48
+ sys.stderr.buffer.flush()
49
+
50
+ def flush(self) -> None:
51
+ if self._buf:
52
+ if not is_noise_line(self._buf):
53
+ sys.stderr.buffer.write(bytes(self._buf))
54
+ sys.stderr.buffer.flush()
55
+ self._buf.clear()
56
+ self._suppress_next = False
57
+
58
+
59
+ async def accumulate_and_relay_subprocess_streams(
60
+ proc: asyncio.subprocess.Process,
61
+ *,
62
+ relay: bool,
63
+ verbose: bool = True,
64
+ ) -> tuple[bytes, bytes]:
65
+ """Read stdout and stderr until EOF; optionally copy non-noise stderr chunks to stderr."""
66
+ stdout = proc.stdout
67
+ stderr = proc.stderr
68
+ if stdout is None or stderr is None:
69
+ raise RuntimeError("subprocess must be created with stdout=PIPE and stderr=PIPE")
70
+
71
+ out_buf = bytearray()
72
+ err_buf = bytearray()
73
+ filt = _AsyncLineFilter() if (relay and not verbose) else None
74
+
75
+ async def drain_stdout(reader: asyncio.StreamReader, target: bytearray) -> None:
76
+ while True:
77
+ chunk = await reader.read(65536)
78
+ if not chunk:
79
+ break
80
+ target.extend(chunk)
81
+
82
+ async def drain_stderr(reader: asyncio.StreamReader, target: bytearray) -> None:
83
+ while True:
84
+ chunk = await reader.read(65536)
85
+ if not chunk:
86
+ break
87
+ target.extend(chunk)
88
+ if filt is not None:
89
+ filt.feed(chunk)
90
+ elif relay:
91
+ sys.stderr.buffer.write(chunk)
92
+ sys.stderr.buffer.flush()
93
+
94
+ await asyncio.gather(drain_stdout(stdout, out_buf), drain_stderr(stderr, err_buf))
95
+ await proc.wait()
96
+ if filt is not None:
97
+ filt.flush()
98
+ return bytes(out_buf), bytes(err_buf)