java-codebase-rag 0.2.1__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {java_codebase_rag-0.2.1/java_codebase_rag.egg-info → java_codebase_rag-0.3.0}/PKG-INFO +1 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/ast_java.py +19 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/build_ast_graph.py +26 -26
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag/cli.py +43 -22
- java_codebase_rag-0.3.0/java_codebase_rag/cli_format.py +112 -0
- java_codebase_rag-0.3.0/java_codebase_rag/cli_progress.py +98 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag/config.py +34 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag/pipeline.py +70 -15
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0/java_codebase_rag.egg-info}/PKG-INFO +1 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag.egg-info/SOURCES.txt +1 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/mcp_v2.py +17 -5
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/pyproject.toml +1 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/server.py +18 -8
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_agent_skills_static.py +6 -7
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_cli_progress_stdout_invariant.py +22 -7
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_cli_quiet_parity.py +7 -7
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_client_role_rename.py +4 -2
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_java_codebase_rag_cli.py +88 -1
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_mcp_hints.py +26 -3
- java_codebase_rag-0.2.1/java_codebase_rag/cli_progress.py +0 -52
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/LICENSE +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/README.md +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/brownfield_events.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/chunk_heuristics.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/graph_enrich.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/index_common.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag/__init__.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag.egg-info/entry_points.txt +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag.egg-info/requires.txt +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_codebase_rag.egg-info/top_level.txt +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_index_flow_lancedb.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_index_v1_common.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/java_ontology.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/kuzu_queries.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/mcp_hints.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/path_filtering.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/pr_analysis.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/search_lancedb.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/setup.cfg +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_assign_endpoint_client_extraction.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_ast_graph_build.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_ast_java_calls.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_ast_java_capabilities.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_bank_chat_brownfield_integration.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_brownfield_clients.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_brownfield_events.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_brownfield_overrides.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_brownfield_routes.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_call_edge_matching.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_call_edges_e2e.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_call_graph_receiver_resolution.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_call_graph_smoke_roundtrip.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_call_invariant.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_client_hint_recovery.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_client_node_extraction.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_cross_service_resolution_flag.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_edge_navigation_doc.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_feign_not_exposer.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_graph_enrich.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_kuzu_queries.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_lancedb_e2e.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_mcp_tools.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_mcp_v2.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_mcp_v2_compose.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_meta_chain_core.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_outgoing_call_extraction.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_packaging_metadata.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_path_filtering.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_pr_analysis.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_route_extraction.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_schema_consistency.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_search_lancedb.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_search_lancedb_capability.py +0 -0
- {java_codebase_rag-0.2.1 → java_codebase_rag-0.3.0}/tests/test_string_value_atoms.py +0 -0
|
@@ -83,7 +83,7 @@ _DTO_LOMBOK_ANNOTATIONS: frozenset[str] = frozenset({
|
|
|
83
83
|
# Phase 11: `EDGE_SCHEMA` in `java_ontology.py` (canonical edge navigation schema; v14 re-index).
|
|
84
84
|
# Phase 12: CALLS `callee_declaring_role`, supertype-walk dedup, pass3 unresolved counters (v15 re-index).
|
|
85
85
|
# Bumps whenever extraction / enrichment semantics change.
|
|
86
|
-
ONTOLOGY_VERSION =
|
|
86
|
+
ONTOLOGY_VERSION = 16
|
|
87
87
|
|
|
88
88
|
ROLE_ANNOTATIONS: dict[str, str] = {
|
|
89
89
|
# Spring Web
|
|
@@ -2732,6 +2732,19 @@ def infer_role(annotation_names: Iterable[str]) -> str:
|
|
|
2732
2732
|
return "OTHER"
|
|
2733
2733
|
|
|
2734
2734
|
|
|
2735
|
+
def _type_injects_messaging(type_decl: "TypeDecl") -> bool:
|
|
2736
|
+
"""True when the type injects a messaging template via field or constructor."""
|
|
2737
|
+
for fld in type_decl.fields:
|
|
2738
|
+
if fld.type_name in _INJECTED_TYPES_TO_CAPABILITY:
|
|
2739
|
+
return True
|
|
2740
|
+
for method in type_decl.methods:
|
|
2741
|
+
if method.is_constructor:
|
|
2742
|
+
for p in method.parameters:
|
|
2743
|
+
if p.type_name in _INJECTED_TYPES_TO_CAPABILITY:
|
|
2744
|
+
return True
|
|
2745
|
+
return False
|
|
2746
|
+
|
|
2747
|
+
|
|
2735
2748
|
def infer_role_for_type(type_decl: "TypeDecl") -> str:
|
|
2736
2749
|
"""Role inference that also detects DTO-like passive data carriers.
|
|
2737
2750
|
|
|
@@ -2763,6 +2776,11 @@ def infer_role_for_type(type_decl: "TypeDecl") -> str:
|
|
|
2763
2776
|
if name.endswith(suffix) and name != suffix:
|
|
2764
2777
|
return "DTO"
|
|
2765
2778
|
|
|
2779
|
+
# Types injecting messaging templates are outbound callers (CLIENT role),
|
|
2780
|
+
# symmetric with CONTROLLER covering both HTTP and messaging inbound.
|
|
2781
|
+
if _type_injects_messaging(type_decl):
|
|
2782
|
+
return "CLIENT"
|
|
2783
|
+
|
|
2766
2784
|
return "OTHER"
|
|
2767
2785
|
|
|
2768
2786
|
|
|
@@ -70,13 +70,13 @@ log = logging.getLogger(__name__)
|
|
|
70
70
|
|
|
71
71
|
_VERBOSE_STDERR_LOCK = threading.Lock()
|
|
72
72
|
|
|
73
|
-
_PASS1_START = "[
|
|
74
|
-
_PASS2_START = "[
|
|
75
|
-
_PASS3_START = "[
|
|
76
|
-
_PASS4_START = "[
|
|
77
|
-
_PASS5_START = "[
|
|
78
|
-
_PASS6_START = "[
|
|
79
|
-
_WRITE_START = "[
|
|
73
|
+
_PASS1_START = "[graph] pass 1 · parsing Java files"
|
|
74
|
+
_PASS2_START = "[graph] pass 2 · emitting EXTENDS / IMPLEMENTS / DECLARES rows"
|
|
75
|
+
_PASS3_START = "[graph] pass 3 · call resolution (outgoing calls per site)"
|
|
76
|
+
_PASS4_START = "[graph] pass 4 · route and EXPOSES extraction"
|
|
77
|
+
_PASS5_START = "[graph] pass 5 · imperative HTTP_CALLS / ASYNC_CALLS edges"
|
|
78
|
+
_PASS6_START = "[graph] pass 6 · cross-service call-edge matching"
|
|
79
|
+
_WRITE_START = "[graph] writing · Kuzu graph to disk"
|
|
80
80
|
|
|
81
81
|
|
|
82
82
|
def _verbose_stderr_line(content: str) -> None:
|
|
@@ -104,7 +104,7 @@ class _VerbosePassHeartbeats:
|
|
|
104
104
|
t0 = time.monotonic()
|
|
105
105
|
while not stop.wait(timeout=5.0):
|
|
106
106
|
elapsed = int(time.monotonic() - t0)
|
|
107
|
-
_verbose_stderr_line(f"{tag}
|
|
107
|
+
_verbose_stderr_line(f"{tag} · {elapsed}s elapsed")
|
|
108
108
|
|
|
109
109
|
self._thr = threading.Thread(target=worker, name=f"hb-{tag}", daemon=True)
|
|
110
110
|
self._thr.start()
|
|
@@ -476,7 +476,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
|
|
|
476
476
|
slow_sec = float(raw_slow)
|
|
477
477
|
except ValueError:
|
|
478
478
|
slow_sec = 0.0
|
|
479
|
-
with _VerbosePassHeartbeats("[
|
|
479
|
+
with _VerbosePassHeartbeats("[graph] pass 1", verbose=verbose):
|
|
480
480
|
if verbose and slow_sec > 0:
|
|
481
481
|
time.sleep(slow_sec)
|
|
482
482
|
for p in iter_java_source_files(root, ignore=ignore):
|
|
@@ -521,7 +521,7 @@ def pass1_parse(root: Path, tables: GraphTables, *, verbose: bool) -> dict[str,
|
|
|
521
521
|
if verbose:
|
|
522
522
|
elapsed = time.time() - t0
|
|
523
523
|
_verbose_stderr_line(
|
|
524
|
-
f"[
|
|
524
|
+
f"[graph] pass 1 · parsed {n_files} files in {elapsed:.2f}s: "
|
|
525
525
|
f"{len(tables.types)} types, {len(tables.members)} members, "
|
|
526
526
|
f"{tables.parse_errors} parse errors, {tables.skipped_files} skipped",
|
|
527
527
|
)
|
|
@@ -759,7 +759,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
|
|
|
759
759
|
seen_inj: set[tuple[str, str, str, str]] = set()
|
|
760
760
|
if verbose:
|
|
761
761
|
_verbose_stderr_line(_PASS2_START)
|
|
762
|
-
with _VerbosePassHeartbeats("[
|
|
762
|
+
with _VerbosePassHeartbeats("[graph] pass 2", verbose=verbose):
|
|
763
763
|
for fqn, entry in tables.types.items():
|
|
764
764
|
ast = asts.get(entry.file_path)
|
|
765
765
|
if ast is None:
|
|
@@ -769,7 +769,7 @@ def pass2_edges(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
|
|
|
769
769
|
if verbose:
|
|
770
770
|
elapsed = time.time() - t0
|
|
771
771
|
_verbose_stderr_line(
|
|
772
|
-
f"[
|
|
772
|
+
f"[graph] pass 2 · emitted {len(tables.extends_rows)} EXTENDS, "
|
|
773
773
|
f"{len(tables.implements_rows)} IMPLEMENTS, "
|
|
774
774
|
f"{len(tables.injects_rows)} INJECTS, "
|
|
775
775
|
f"{len(tables.phantoms)} phantoms in {elapsed:.2f}s",
|
|
@@ -1432,7 +1432,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
|
|
|
1432
1432
|
_verbose_stderr_line(_PASS3_START)
|
|
1433
1433
|
_build_member_indexes(tables)
|
|
1434
1434
|
stats = CallResolutionStats()
|
|
1435
|
-
with _VerbosePassHeartbeats("[
|
|
1435
|
+
with _VerbosePassHeartbeats("[graph] pass 3", verbose=verbose):
|
|
1436
1436
|
for rel_path, file_ast in asts.items():
|
|
1437
1437
|
try:
|
|
1438
1438
|
_process_file_calls(file_ast, rel_path, tables, stats)
|
|
@@ -1455,7 +1455,7 @@ def pass3_calls(tables: GraphTables, asts: dict[str, JavaFileAst], *, verbose: b
|
|
|
1455
1455
|
)
|
|
1456
1456
|
log.info(msg)
|
|
1457
1457
|
if verbose:
|
|
1458
|
-
_verbose_stderr_line(f"[
|
|
1458
|
+
_verbose_stderr_line(f"[graph] pass 3 · {msg}")
|
|
1459
1459
|
|
|
1460
1460
|
|
|
1461
1461
|
_PATH_VAR_SEG = re.compile(r"^\{([^:{}]+)(?::([^}]*))?\}$") # whole path segment
|
|
@@ -1586,7 +1586,7 @@ def pass4_routes(
|
|
|
1586
1586
|
meta_chain = collect_annotation_meta_chain(prs)
|
|
1587
1587
|
if verbose:
|
|
1588
1588
|
_verbose_stderr_line(_PASS4_START)
|
|
1589
|
-
with _VerbosePassHeartbeats("[
|
|
1589
|
+
with _VerbosePassHeartbeats("[graph] pass 4", verbose=verbose):
|
|
1590
1590
|
|
|
1591
1591
|
for ast in asts.values():
|
|
1592
1592
|
stats.routes_skipped_unresolved += ast.routes_skipped_unresolved
|
|
@@ -1710,7 +1710,7 @@ def pass4_routes(
|
|
|
1710
1710
|
)
|
|
1711
1711
|
log.info(msg)
|
|
1712
1712
|
if verbose:
|
|
1713
|
-
_verbose_stderr_line(f"[
|
|
1713
|
+
_verbose_stderr_line(f"[graph] pass 4 · {msg}")
|
|
1714
1714
|
|
|
1715
1715
|
|
|
1716
1716
|
def pass5_imperative_edges(
|
|
@@ -1763,7 +1763,7 @@ def pass5_imperative_edges(
|
|
|
1763
1763
|
|
|
1764
1764
|
if verbose:
|
|
1765
1765
|
_verbose_stderr_line(_PASS5_START)
|
|
1766
|
-
with _VerbosePassHeartbeats("[
|
|
1766
|
+
with _VerbosePassHeartbeats("[graph] pass 5", verbose=verbose):
|
|
1767
1767
|
for member in sorted(tables.members, key=lambda x: x.node_id):
|
|
1768
1768
|
if member.decl.is_constructor:
|
|
1769
1769
|
continue
|
|
@@ -2018,7 +2018,7 @@ def pass5_imperative_edges(
|
|
|
2018
2018
|
http_strategy = dict(sorted(tables.call_edge_stats.http_calls_by_strategy.items()))
|
|
2019
2019
|
async_strategy = dict(sorted(tables.call_edge_stats.async_calls_by_strategy.items()))
|
|
2020
2020
|
_verbose_stderr_line(
|
|
2021
|
-
f"[
|
|
2021
|
+
f"[graph] pass 5 · HTTP_CALLS: {len(tables.http_call_rows)} edges, "
|
|
2022
2022
|
f"ASYNC_CALLS: {len(tables.async_call_rows)} edges; "
|
|
2023
2023
|
f"http_by_client_kind={http_client}, async_by_client_kind={async_client}, "
|
|
2024
2024
|
f"http_by_strategy={http_strategy}, async_by_strategy={async_strategy}",
|
|
@@ -2165,7 +2165,7 @@ def pass6_match_edges(
|
|
|
2165
2165
|
|
|
2166
2166
|
if verbose:
|
|
2167
2167
|
_verbose_stderr_line(_PASS6_START)
|
|
2168
|
-
with _VerbosePassHeartbeats("[
|
|
2168
|
+
with _VerbosePassHeartbeats("[graph] pass 6", verbose=verbose):
|
|
2169
2169
|
for row in tables.http_call_rows:
|
|
2170
2170
|
if row.match != "unresolved":
|
|
2171
2171
|
continue
|
|
@@ -2317,14 +2317,14 @@ def pass6_match_edges(
|
|
|
2317
2317
|
first_http = ", ".join(suppressed_auto_cross_http)
|
|
2318
2318
|
first_async = ", ".join(suppressed_auto_cross_async)
|
|
2319
2319
|
_verbose_stderr_line(
|
|
2320
|
-
f"[
|
|
2320
|
+
f"[graph] pass 6 · cross_service_resolution=brownfield_only:\n"
|
|
2321
2321
|
f" {n_bf} cross_service edges from brownfield layers,\n"
|
|
2322
2322
|
f" {suppressed_auto_cross_count} auto-cross-service candidates suppressed -> unresolved\n"
|
|
2323
2323
|
f" (first 5 http: {first_http})\n"
|
|
2324
2324
|
f" (first 5 async: {first_async})",
|
|
2325
2325
|
)
|
|
2326
2326
|
_verbose_stderr_line(
|
|
2327
|
-
f"[
|
|
2327
|
+
f"[graph] pass 6 · http_match={dict(sorted(tables.call_edge_stats.http_calls_match_breakdown.items()))}, "
|
|
2328
2328
|
f"async_match={dict(sorted(tables.call_edge_stats.async_calls_match_breakdown.items()))}, "
|
|
2329
2329
|
f"cross_service_calls_total={tables.call_edge_stats.cross_service_calls_total}",
|
|
2330
2330
|
)
|
|
@@ -3004,7 +3004,7 @@ def write_kuzu(
|
|
|
3004
3004
|
)
|
|
3005
3005
|
if verbose:
|
|
3006
3006
|
_verbose_stderr_line(_WRITE_START)
|
|
3007
|
-
with _VerbosePassHeartbeats("[
|
|
3007
|
+
with _VerbosePassHeartbeats("[graph] writing", verbose=verbose):
|
|
3008
3008
|
db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
3009
3009
|
db = kuzu.Database(str(db_path))
|
|
3010
3010
|
conn = kuzu.Connection(db)
|
|
@@ -3018,17 +3018,17 @@ def write_kuzu(
|
|
|
3018
3018
|
meta_chain=meta_chain,
|
|
3019
3019
|
)
|
|
3020
3020
|
if verbose:
|
|
3021
|
-
_verbose_stderr_line(f"[
|
|
3021
|
+
_verbose_stderr_line(f"[graph] writing · nodes written in {time.time() - t0:.2f}s")
|
|
3022
3022
|
_populate_declares_rows(tables)
|
|
3023
3023
|
_populate_overrides_rows(tables)
|
|
3024
3024
|
t1 = time.time()
|
|
3025
3025
|
_write_edges(conn, tables)
|
|
3026
3026
|
if verbose:
|
|
3027
|
-
_verbose_stderr_line(f"[
|
|
3027
|
+
_verbose_stderr_line(f"[graph] writing · edges written in {time.time() - t1:.2f}s")
|
|
3028
3028
|
t2 = time.time()
|
|
3029
3029
|
_write_routes_and_exposes(conn, tables)
|
|
3030
3030
|
if verbose:
|
|
3031
|
-
_verbose_stderr_line(f"[
|
|
3031
|
+
_verbose_stderr_line(f"[graph] writing · routes/exposes written in {time.time() - t2:.2f}s")
|
|
3032
3032
|
_write_meta(conn, tables, source_root)
|
|
3033
3033
|
conn.close()
|
|
3034
3034
|
|
|
@@ -3073,7 +3073,7 @@ def main() -> int:
|
|
|
3073
3073
|
pass6_match_edges(tables, verbose=args.verbose)
|
|
3074
3074
|
write_kuzu(kuzu_path, tables, source_root=root, verbose=args.verbose)
|
|
3075
3075
|
if args.verbose:
|
|
3076
|
-
_verbose_stderr_line(f"[
|
|
3076
|
+
_verbose_stderr_line(f"[graph] done · kuzu at {kuzu_path}")
|
|
3077
3077
|
return 0
|
|
3078
3078
|
|
|
3079
3079
|
|
|
@@ -100,19 +100,25 @@ _PIPELINE_SEP = "\u00b7"
|
|
|
100
100
|
|
|
101
101
|
|
|
102
102
|
def _pipeline_header(subcommand: str, cfg: ResolvedOperatorConfig) -> None:
|
|
103
|
+
from java_codebase_rag.cli_format import bold
|
|
104
|
+
|
|
103
105
|
root = cfg.source_root.resolve()
|
|
104
106
|
idx = cfg.index_dir.resolve()
|
|
105
107
|
print(
|
|
106
|
-
f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}",
|
|
108
|
+
bold(f"java-codebase-rag {subcommand} {_PIPELINE_SEP} source={root} {_PIPELINE_SEP} index={idx}"),
|
|
107
109
|
file=sys.stderr,
|
|
108
110
|
flush=True,
|
|
109
111
|
)
|
|
110
112
|
|
|
111
113
|
|
|
112
114
|
def _pipeline_footer(subcommand: str, started: float, exit_code: int) -> None:
|
|
115
|
+
from java_codebase_rag.cli_format import bold, styled_check, styled_cross
|
|
116
|
+
|
|
113
117
|
elapsed = time.perf_counter() - started
|
|
118
|
+
marker = styled_check() if exit_code == 0 else styled_cross()
|
|
114
119
|
print(
|
|
115
|
-
f"java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s
|
|
120
|
+
f"{marker} {bold(f'java-codebase-rag {subcommand} {_PIPELINE_SEP} finished in {elapsed:.2f}s')}"
|
|
121
|
+
+ (f" (exit={exit_code})" if exit_code != 0 else ""),
|
|
116
122
|
file=sys.stderr,
|
|
117
123
|
flush=True,
|
|
118
124
|
)
|
|
@@ -205,6 +211,22 @@ def _add_index_embedding_flags(p: argparse.ArgumentParser) -> None:
|
|
|
205
211
|
p.add_argument("--embedding-device", type=str, default=None, help="Override SBERT_DEVICE / YAML embedding.device")
|
|
206
212
|
|
|
207
213
|
|
|
214
|
+
def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
|
|
215
|
+
g = p.add_mutually_exclusive_group()
|
|
216
|
+
g.add_argument(
|
|
217
|
+
"--quiet", "-q",
|
|
218
|
+
action="store_true",
|
|
219
|
+
dest="quiet",
|
|
220
|
+
help="Suppress stderr progress relay; stdout payload unchanged.",
|
|
221
|
+
)
|
|
222
|
+
g.add_argument(
|
|
223
|
+
"--verbose", "-v",
|
|
224
|
+
action="store_true",
|
|
225
|
+
dest="verbose",
|
|
226
|
+
help="Show full subprocess output (Lance warnings, brownfield events, progress bars).",
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
|
|
208
230
|
def _cmd_init(args: argparse.Namespace) -> int:
|
|
209
231
|
cfg = _resolved_from_ns(args)
|
|
210
232
|
_startup_hints(cfg)
|
|
@@ -227,10 +249,12 @@ def _cmd_init(args: argparse.Namespace) -> int:
|
|
|
227
249
|
|
|
228
250
|
def work() -> int:
|
|
229
251
|
env = cfg.subprocess_env()
|
|
252
|
+
verbose = bool(args.verbose)
|
|
230
253
|
coco = run_cocoindex_update(
|
|
231
254
|
env,
|
|
232
255
|
full_reprocess=False,
|
|
233
256
|
quiet=bool(args.quiet),
|
|
257
|
+
verbose=verbose,
|
|
234
258
|
lance_project_root=None if args.quiet else cfg.source_root,
|
|
235
259
|
)
|
|
236
260
|
if coco.returncode != 0:
|
|
@@ -244,10 +268,13 @@ def _cmd_init(args: argparse.Namespace) -> int:
|
|
|
244
268
|
}
|
|
245
269
|
)
|
|
246
270
|
return 1
|
|
271
|
+
if not args.quiet:
|
|
272
|
+
print(file=sys.stderr, flush=True)
|
|
247
273
|
g = run_build_ast_graph(
|
|
248
274
|
source_root=cfg.source_root,
|
|
249
275
|
kuzu_path=cfg.kuzu_path,
|
|
250
|
-
verbose=
|
|
276
|
+
verbose=verbose,
|
|
277
|
+
quiet=bool(args.quiet),
|
|
251
278
|
env=env,
|
|
252
279
|
)
|
|
253
280
|
if g.returncode != 0:
|
|
@@ -279,6 +306,7 @@ def _cmd_increment(args: argparse.Namespace) -> int:
|
|
|
279
306
|
env,
|
|
280
307
|
full_reprocess=False,
|
|
281
308
|
quiet=bool(args.quiet),
|
|
309
|
+
verbose=bool(args.verbose),
|
|
282
310
|
lance_project_root=None if args.quiet else cfg.source_root,
|
|
283
311
|
)
|
|
284
312
|
if coco.returncode != 0:
|
|
@@ -305,11 +333,12 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
|
|
|
305
333
|
|
|
306
334
|
def work() -> int:
|
|
307
335
|
env = cfg.subprocess_env()
|
|
336
|
+
verbose = bool(args.verbose)
|
|
308
337
|
vectors_only = bool(getattr(args, "vectors_only", False))
|
|
309
338
|
graph_only = bool(getattr(args, "graph_only", False))
|
|
310
339
|
|
|
311
340
|
if vectors_only:
|
|
312
|
-
coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet))
|
|
341
|
+
coco = run_cocoindex_update(env, full_reprocess=True, quiet=bool(args.quiet), verbose=verbose)
|
|
313
342
|
if _is_cocoindex_preflight_blocker(coco):
|
|
314
343
|
payload: dict[str, Any] = {
|
|
315
344
|
"success": False,
|
|
@@ -345,7 +374,8 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
|
|
|
345
374
|
g = run_build_ast_graph(
|
|
346
375
|
source_root=cfg.source_root,
|
|
347
376
|
kuzu_path=cfg.kuzu_path,
|
|
348
|
-
verbose=
|
|
377
|
+
verbose=verbose,
|
|
378
|
+
quiet=bool(args.quiet),
|
|
349
379
|
env=env,
|
|
350
380
|
)
|
|
351
381
|
if _is_graph_preflight_blocker(g):
|
|
@@ -381,7 +411,7 @@ def _cmd_reprocess(args: argparse.Namespace) -> int:
|
|
|
381
411
|
|
|
382
412
|
import server # lazy: pulls sentence_transformers/torch/lancedb/kuzu
|
|
383
413
|
|
|
384
|
-
result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet)))
|
|
414
|
+
result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet), verbose=verbose))
|
|
385
415
|
payload = result.model_dump()
|
|
386
416
|
_emit_reprocess_outcome(payload)
|
|
387
417
|
return _reprocess_exit_code(payload)
|
|
@@ -473,6 +503,8 @@ def _cmd_meta(args: argparse.Namespace) -> int:
|
|
|
473
503
|
payload["index_dir"] = str(cfg.index_dir.resolve())
|
|
474
504
|
payload["kuzu_path"] = str(cfg.kuzu_path.resolve())
|
|
475
505
|
payload["index_dir_source"] = cfg.index_dir_source
|
|
506
|
+
payload["hints_enabled"] = cfg.hints_enabled
|
|
507
|
+
payload["hints_enabled_source"] = cfg.hints_enabled_source
|
|
476
508
|
_emit(payload)
|
|
477
509
|
return 0 if payload.get("success") else 2
|
|
478
510
|
|
|
@@ -612,11 +644,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
612
644
|
),
|
|
613
645
|
)
|
|
614
646
|
_add_index_embedding_flags(init)
|
|
615
|
-
init
|
|
616
|
-
"--quiet",
|
|
617
|
-
action="store_true",
|
|
618
|
-
help="Suppress stderr progress relay; stdout payload unchanged.",
|
|
619
|
-
)
|
|
647
|
+
_add_verbosity_flags(init)
|
|
620
648
|
init.set_defaults(handler=_cmd_init)
|
|
621
649
|
|
|
622
650
|
increment = subparsers.add_parser(
|
|
@@ -625,11 +653,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
625
653
|
description="Runs cocoindex catch-up (no full reprocess). Does not rebuild Kuzu; see stderr warning.",
|
|
626
654
|
)
|
|
627
655
|
_add_index_embedding_flags(increment)
|
|
628
|
-
increment
|
|
629
|
-
"--quiet",
|
|
630
|
-
action="store_true",
|
|
631
|
-
help="Suppress stderr progress relay; stdout payload unchanged.",
|
|
632
|
-
)
|
|
656
|
+
_add_verbosity_flags(increment)
|
|
633
657
|
increment.set_defaults(handler=_cmd_increment)
|
|
634
658
|
|
|
635
659
|
reprocess = subparsers.add_parser(
|
|
@@ -641,11 +665,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
641
665
|
),
|
|
642
666
|
)
|
|
643
667
|
_add_index_embedding_flags(reprocess)
|
|
644
|
-
reprocess
|
|
645
|
-
"--quiet",
|
|
646
|
-
action="store_true",
|
|
647
|
-
help="Suppress stderr progress relay; stdout payload unchanged.",
|
|
648
|
-
)
|
|
668
|
+
_add_verbosity_flags(reprocess)
|
|
649
669
|
_rex = reprocess.add_mutually_exclusive_group()
|
|
650
670
|
_rex.add_argument(
|
|
651
671
|
"--vectors-only",
|
|
@@ -667,8 +687,9 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
667
687
|
_add_index_embedding_flags(erase)
|
|
668
688
|
erase.add_argument("--yes", action="store_true", help="Confirm destructive deletion (required in CI)")
|
|
669
689
|
erase.add_argument(
|
|
670
|
-
"--quiet",
|
|
690
|
+
"--quiet", "-q",
|
|
671
691
|
action="store_true",
|
|
692
|
+
dest="quiet",
|
|
672
693
|
help="Suppress stderr progress relay; stdout payload unchanged.",
|
|
673
694
|
)
|
|
674
695
|
erase.set_defaults(handler=_cmd_erase)
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""TTY-aware ANSI formatting for CLI stderr progress."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import itertools
|
|
5
|
+
import sys
|
|
6
|
+
import threading
|
|
7
|
+
import time
|
|
8
|
+
|
|
9
|
+
_RESET = "\033[0m"
|
|
10
|
+
_BOLD = "\033[1m"
|
|
11
|
+
_DIM = "\033[2m"
|
|
12
|
+
_GREEN = "\033[32m"
|
|
13
|
+
_RED = "\033[31m"
|
|
14
|
+
_CYAN = "\033[36m"
|
|
15
|
+
|
|
16
|
+
CHECK = "✓"
|
|
17
|
+
CROSS = "✗"
|
|
18
|
+
|
|
19
|
+
_SPINNER_FRAMES = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
|
|
20
|
+
|
|
21
|
+
_NOISE_CONTAINS: tuple[bytes, ...] = (
|
|
22
|
+
b"lance::",
|
|
23
|
+
b"FutureWarning",
|
|
24
|
+
b"Loading weights:",
|
|
25
|
+
b'"event": "brownfield-',
|
|
26
|
+
b"unknown producer source strategy",
|
|
27
|
+
b"unknown client source strategy",
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def is_noise_line(line: bytes) -> bool:
|
|
32
|
+
return any(p in line for p in _NOISE_CONTAINS)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def stderr_is_tty() -> bool:
|
|
36
|
+
return hasattr(sys.stderr, "isatty") and sys.stderr.isatty()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _styled(text: str, *codes: str) -> str:
|
|
40
|
+
if not stderr_is_tty():
|
|
41
|
+
return text
|
|
42
|
+
return "".join(codes) + text + _RESET
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def bold(text: str) -> str:
|
|
46
|
+
return _styled(text, _BOLD)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def dim(text: str) -> str:
|
|
50
|
+
return _styled(text, _DIM)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def green(text: str) -> str:
|
|
54
|
+
return _styled(text, _GREEN)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def red(text: str) -> str:
|
|
58
|
+
return _styled(text, _RED)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def cyan(text: str) -> str:
|
|
62
|
+
return _styled(text, _CYAN)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def bold_green(text: str) -> str:
|
|
66
|
+
return _styled(text, _BOLD, _GREEN)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def bold_red(text: str) -> str:
|
|
70
|
+
return _styled(text, _BOLD, _RED)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def bold_cyan(text: str) -> str:
|
|
74
|
+
return _styled(text, _BOLD, _CYAN)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def styled_check() -> str:
|
|
78
|
+
return green(CHECK) if stderr_is_tty() else CHECK
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def styled_cross() -> str:
|
|
82
|
+
return red(CROSS) if stderr_is_tty() else CROSS
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class Spinner:
|
|
86
|
+
"""Braille spinner that overwrites the current stderr line until stopped."""
|
|
87
|
+
|
|
88
|
+
def __init__(self, label: str) -> None:
|
|
89
|
+
self._label = label
|
|
90
|
+
self._stop = threading.Event()
|
|
91
|
+
self._thread: threading.Thread | None = None
|
|
92
|
+
|
|
93
|
+
def start(self) -> None:
|
|
94
|
+
self._thread = threading.Thread(target=self._run, name="spinner", daemon=True)
|
|
95
|
+
self._thread.start()
|
|
96
|
+
|
|
97
|
+
def stop(self) -> None:
|
|
98
|
+
self._stop.set()
|
|
99
|
+
if self._thread is not None:
|
|
100
|
+
self._thread.join(timeout=2.0)
|
|
101
|
+
sys.stderr.buffer.write(b"\r\x1b[2K")
|
|
102
|
+
sys.stderr.buffer.flush()
|
|
103
|
+
|
|
104
|
+
def _run(self) -> None:
|
|
105
|
+
frames = itertools.cycle(_SPINNER_FRAMES)
|
|
106
|
+
t0 = time.monotonic()
|
|
107
|
+
while not self._stop.wait(0.3):
|
|
108
|
+
elapsed = time.monotonic() - t0
|
|
109
|
+
frame = next(frames)
|
|
110
|
+
line = f"\r{frame} {self._label} · {elapsed:.0f}s"
|
|
111
|
+
sys.stderr.buffer.write(line.encode())
|
|
112
|
+
sys.stderr.buffer.flush()
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""CLI-owned stderr progress lines (shared by server reprocess path and pipeline helpers)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import asyncio
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
from java_codebase_rag.cli_format import bold_cyan, is_noise_line, styled_check, styled_cross
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def emit_vectors_start() -> None:
|
|
11
|
+
print(
|
|
12
|
+
bold_cyan("[vectors]") + " running · cocoindex update",
|
|
13
|
+
file=sys.stderr,
|
|
14
|
+
flush=True,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def emit_vectors_finish(*, elapsed_s: float, exit_code: int) -> None:
|
|
19
|
+
marker = styled_check() if exit_code == 0 else styled_cross()
|
|
20
|
+
print(
|
|
21
|
+
f"{marker} {bold_cyan('[vectors]')} finished · {elapsed_s:.2f}s"
|
|
22
|
+
+ (f" (exit={exit_code})" if exit_code != 0 else ""),
|
|
23
|
+
file=sys.stderr,
|
|
24
|
+
flush=True,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _AsyncLineFilter:
|
|
29
|
+
"""Buffers byte chunks and relays only non-noise lines to stderr (async drain path)."""
|
|
30
|
+
|
|
31
|
+
def __init__(self) -> None:
|
|
32
|
+
self._buf = bytearray()
|
|
33
|
+
self._suppress_next = False
|
|
34
|
+
|
|
35
|
+
def feed(self, chunk: bytes) -> None:
|
|
36
|
+
self._buf.extend(chunk)
|
|
37
|
+
while b"\n" in self._buf:
|
|
38
|
+
line, self._buf = self._buf.split(b"\n", 1)
|
|
39
|
+
line += b"\n"
|
|
40
|
+
noise = is_noise_line(line)
|
|
41
|
+
if noise:
|
|
42
|
+
self._suppress_next = True
|
|
43
|
+
continue
|
|
44
|
+
if self._suppress_next and line[:1] in (b" ", b"\t"):
|
|
45
|
+
continue
|
|
46
|
+
self._suppress_next = False
|
|
47
|
+
sys.stderr.buffer.write(line)
|
|
48
|
+
sys.stderr.buffer.flush()
|
|
49
|
+
|
|
50
|
+
def flush(self) -> None:
|
|
51
|
+
if self._buf:
|
|
52
|
+
if not is_noise_line(self._buf):
|
|
53
|
+
sys.stderr.buffer.write(bytes(self._buf))
|
|
54
|
+
sys.stderr.buffer.flush()
|
|
55
|
+
self._buf.clear()
|
|
56
|
+
self._suppress_next = False
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
async def accumulate_and_relay_subprocess_streams(
|
|
60
|
+
proc: asyncio.subprocess.Process,
|
|
61
|
+
*,
|
|
62
|
+
relay: bool,
|
|
63
|
+
verbose: bool = True,
|
|
64
|
+
) -> tuple[bytes, bytes]:
|
|
65
|
+
"""Read stdout and stderr until EOF; optionally copy non-noise stderr chunks to stderr."""
|
|
66
|
+
stdout = proc.stdout
|
|
67
|
+
stderr = proc.stderr
|
|
68
|
+
if stdout is None or stderr is None:
|
|
69
|
+
raise RuntimeError("subprocess must be created with stdout=PIPE and stderr=PIPE")
|
|
70
|
+
|
|
71
|
+
out_buf = bytearray()
|
|
72
|
+
err_buf = bytearray()
|
|
73
|
+
filt = _AsyncLineFilter() if (relay and not verbose) else None
|
|
74
|
+
|
|
75
|
+
async def drain_stdout(reader: asyncio.StreamReader, target: bytearray) -> None:
|
|
76
|
+
while True:
|
|
77
|
+
chunk = await reader.read(65536)
|
|
78
|
+
if not chunk:
|
|
79
|
+
break
|
|
80
|
+
target.extend(chunk)
|
|
81
|
+
|
|
82
|
+
async def drain_stderr(reader: asyncio.StreamReader, target: bytearray) -> None:
|
|
83
|
+
while True:
|
|
84
|
+
chunk = await reader.read(65536)
|
|
85
|
+
if not chunk:
|
|
86
|
+
break
|
|
87
|
+
target.extend(chunk)
|
|
88
|
+
if filt is not None:
|
|
89
|
+
filt.feed(chunk)
|
|
90
|
+
elif relay:
|
|
91
|
+
sys.stderr.buffer.write(chunk)
|
|
92
|
+
sys.stderr.buffer.flush()
|
|
93
|
+
|
|
94
|
+
await asyncio.gather(drain_stdout(stdout, out_buf), drain_stderr(stderr, err_buf))
|
|
95
|
+
await proc.wait()
|
|
96
|
+
if filt is not None:
|
|
97
|
+
filt.flush()
|
|
98
|
+
return bytes(out_buf), bytes(err_buf)
|