java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +24 -7
- build_ast_graph.py +153 -94
- graph_enrich.py +3 -3
- java_codebase_rag/_fdlimit.py +48 -0
- java_codebase_rag/cli.py +31 -28
- java_codebase_rag/config.py +40 -10
- java_codebase_rag/installer.py +99 -10
- java_codebase_rag/lance_optimize.py +148 -0
- java_codebase_rag/pipeline.py +63 -9
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/METADATA +6 -5
- java_codebase_rag-0.6.1.dist-info/RECORD +36 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/top_level.txt +1 -1
- java_index_flow_lancedb.py +22 -4
- java_ontology.py +5 -2
- ladybug_queries.py +1995 -0
- mcp_v2.py +51 -26
- pr_analysis.py +1 -1
- search_lancedb.py +8 -8
- server.py +116 -68
- user_rag/__init__.py +1 -0
- user_rag/cli.py +175 -0
- java_codebase_rag-0.5.3.dist-info/RECORD +0 -31
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/WHEEL +0 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/entry_points.txt +0 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/licenses/LICENSE +0 -0
mcp_v2.py
CHANGED
|
@@ -30,7 +30,7 @@ from sentence_transformers import SentenceTransformer
|
|
|
30
30
|
from index_common import SBERT_MODEL
|
|
31
31
|
from java_codebase_rag.config import resolved_sbert_model_for_process_env
|
|
32
32
|
from java_ontology import EDGE_SCHEMA, ResolveReason
|
|
33
|
-
from
|
|
33
|
+
from ladybug_queries import LadybugGraph, OVERRIDE_AXIS_COMPOSED_EDGE_TYPES
|
|
34
34
|
from mcp_hints import generate_hints, MCP_HINTS_STRUCTURED_FIELD_DESCRIPTION
|
|
35
35
|
from search_lancedb import TABLES, run_search
|
|
36
36
|
|
|
@@ -48,6 +48,22 @@ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
|
|
|
48
48
|
|
|
49
49
|
DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
|
|
50
50
|
|
|
51
|
+
# Closed value taxonomies surfaced to MCP consumers as enums. Sources of truth:
|
|
52
|
+
# Role — VALID_ROLES in java_ontology.py + the "OTHER" inference fallback (ast_java.infer_role)
|
|
53
|
+
# Framework — hardcoded literals across ast_java.py / build_ast_graph.py
|
|
54
|
+
# SourceLayer — exhaustive classifier build_ast_graph._client_source_layer / _producer_source_layer
|
|
55
|
+
# ClientKind — VALID_CLIENT_KINDS in java_ontology.py (every producer validated at index time)
|
|
56
|
+
# ProducerKind — VALID_PRODUCER_KINDS in java_ontology.py (every producer validated at index time)
|
|
57
|
+
# Keep these in sync with the indexing-side taxonomies if they change.
|
|
58
|
+
Role = Literal[
|
|
59
|
+
"CONTROLLER", "SERVICE", "REPOSITORY", "COMPONENT", "CONFIG",
|
|
60
|
+
"ENTITY", "CLIENT", "MAPPER", "DTO", "OTHER",
|
|
61
|
+
]
|
|
62
|
+
Framework = Literal["spring_mvc", "webflux", "kafka", "rabbitmq", "jms", "stream", "feign", ""]
|
|
63
|
+
SourceLayer = Literal["builtin", "layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"]
|
|
64
|
+
ClientKind = Literal["feign_method", "rest_template", "web_client"]
|
|
65
|
+
ProducerKind = Literal["kafka_send", "stream_bridge_send"]
|
|
66
|
+
|
|
51
67
|
# Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
|
|
52
68
|
# dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
|
|
53
69
|
EdgeType = Literal[
|
|
@@ -133,21 +149,30 @@ class NodeFilter(BaseModel):
|
|
|
133
149
|
|
|
134
150
|
microservice: str | None = None
|
|
135
151
|
module: str | None = None
|
|
136
|
-
source_layer:
|
|
137
|
-
role:
|
|
138
|
-
exclude_roles: list[
|
|
152
|
+
source_layer: SourceLayer | None = None
|
|
153
|
+
role: Role | None = None
|
|
154
|
+
exclude_roles: list[Role] | None = None
|
|
139
155
|
annotation: str | None = None
|
|
140
156
|
capability: str | None = None
|
|
141
157
|
fqn_prefix: str | None = None
|
|
142
158
|
symbol_kind: DeclarationSymbolKind | None = None
|
|
143
159
|
symbol_kinds: list[DeclarationSymbolKind] | None = None
|
|
144
|
-
http_method: str | None =
|
|
160
|
+
http_method: str | None = Field(
|
|
161
|
+
default=None,
|
|
162
|
+
description="HTTP verb (commonly GET/POST/PUT/DELETE/PATCH; user route annotations may yield others).",
|
|
163
|
+
)
|
|
145
164
|
path_prefix: str | None = None
|
|
146
|
-
framework:
|
|
147
|
-
client_kind:
|
|
165
|
+
framework: Framework | None = None
|
|
166
|
+
client_kind: ClientKind | None = Field(
|
|
167
|
+
default=None,
|
|
168
|
+
description="Outbound HTTP client kind: feign_method, rest_template, or web_client.",
|
|
169
|
+
)
|
|
148
170
|
target_service: str | None = None
|
|
149
171
|
target_path_prefix: str | None = None
|
|
150
|
-
producer_kind:
|
|
172
|
+
producer_kind: ProducerKind | None = Field(
|
|
173
|
+
default=None,
|
|
174
|
+
description="Outbound async producer kind: kafka_send or stream_bridge_send.",
|
|
175
|
+
)
|
|
151
176
|
topic_prefix: str | None = None
|
|
152
177
|
|
|
153
178
|
|
|
@@ -157,9 +182,9 @@ class EdgeFilter(BaseModel):
|
|
|
157
182
|
min_confidence: float | None = None
|
|
158
183
|
exclude_strategies: list[str] | None = None
|
|
159
184
|
include_strategies: list[str] | None = None
|
|
160
|
-
callee_declaring_role:
|
|
161
|
-
callee_declaring_roles: list[
|
|
162
|
-
exclude_callee_declaring_roles: list[
|
|
185
|
+
callee_declaring_role: Role | None = None
|
|
186
|
+
callee_declaring_roles: list[Role] | None = None
|
|
187
|
+
exclude_callee_declaring_roles: list[Role] | None = None
|
|
163
188
|
|
|
164
189
|
@model_validator(mode="after")
|
|
165
190
|
def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
|
|
@@ -604,7 +629,7 @@ def _node_kind_from_id(
|
|
|
604
629
|
|
|
605
630
|
|
|
606
631
|
def _resolve_node_kind(
|
|
607
|
-
graph:
|
|
632
|
+
graph: LadybugGraph,
|
|
608
633
|
node_id: str,
|
|
609
634
|
) -> Literal["symbol", "route", "client", "producer", "unresolved_call_site"]:
|
|
610
635
|
try:
|
|
@@ -733,7 +758,7 @@ def _node_ref_from_row(kind: Literal["symbol", "route", "client", "producer"], r
|
|
|
733
758
|
|
|
734
759
|
|
|
735
760
|
def _load_node_record(
|
|
736
|
-
graph:
|
|
761
|
+
graph: LadybugGraph, node_id: str, kind: Literal["symbol", "route", "client", "producer"],
|
|
737
762
|
) -> dict[str, Any] | None:
|
|
738
763
|
if kind == "symbol":
|
|
739
764
|
projection = (
|
|
@@ -807,7 +832,7 @@ def _merge_overrides_edge_summary(
|
|
|
807
832
|
|
|
808
833
|
|
|
809
834
|
def _edge_summary_for_node(
|
|
810
|
-
graph:
|
|
835
|
+
graph: LadybugGraph, node_id: str, *, kind: str, row: dict[str, Any]
|
|
811
836
|
) -> dict[str, dict[str, int]]:
|
|
812
837
|
summary = dict(graph.edge_counts_for(node_id))
|
|
813
838
|
sym_kind = str(row.get("kind") or "")
|
|
@@ -887,7 +912,7 @@ def search_v2(
|
|
|
887
912
|
offset: int = 0,
|
|
888
913
|
path_contains: str | None = None,
|
|
889
914
|
filter: NodeFilter | dict[str, Any] | str | None = None,
|
|
890
|
-
graph:
|
|
915
|
+
graph: LadybugGraph | None = None,
|
|
891
916
|
) -> SearchOutput:
|
|
892
917
|
try:
|
|
893
918
|
raw_filter = _coerce_filter(filter)
|
|
@@ -967,10 +992,10 @@ def find_v2(
|
|
|
967
992
|
filter: NodeFilter | dict[str, Any] | str,
|
|
968
993
|
limit: int = 25,
|
|
969
994
|
offset: int = 0,
|
|
970
|
-
graph:
|
|
995
|
+
graph: LadybugGraph | None = None,
|
|
971
996
|
) -> FindOutput:
|
|
972
997
|
try:
|
|
973
|
-
g = graph or
|
|
998
|
+
g = graph or LadybugGraph.get()
|
|
974
999
|
raw_filter = _coerce_filter(filter)
|
|
975
1000
|
if raw_filter is None:
|
|
976
1001
|
raw_filter = {}
|
|
@@ -1063,10 +1088,10 @@ _DESCRIBE_UCS_ID_MESSAGE = (
|
|
|
1063
1088
|
def describe_v2(
|
|
1064
1089
|
id: str | None = None,
|
|
1065
1090
|
fqn: str | None = None,
|
|
1066
|
-
graph:
|
|
1091
|
+
graph: LadybugGraph | None = None,
|
|
1067
1092
|
) -> DescribeOutput:
|
|
1068
1093
|
try:
|
|
1069
|
-
g = graph or
|
|
1094
|
+
g = graph or LadybugGraph.get()
|
|
1070
1095
|
has_id = bool(id and str(id).strip())
|
|
1071
1096
|
has_fqn = bool(fqn and str(fqn).strip())
|
|
1072
1097
|
if not has_id and not has_fqn:
|
|
@@ -1171,7 +1196,7 @@ def _resolve_parse_microservice_route(identifier: str) -> tuple[str, str, str] |
|
|
|
1171
1196
|
|
|
1172
1197
|
|
|
1173
1198
|
def _resolve_symbol_candidates(
|
|
1174
|
-
g:
|
|
1199
|
+
g: LadybugGraph,
|
|
1175
1200
|
identifier: str,
|
|
1176
1201
|
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1177
1202
|
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
@@ -1213,7 +1238,7 @@ def _resolve_symbol_candidates(
|
|
|
1213
1238
|
|
|
1214
1239
|
|
|
1215
1240
|
def _resolve_route_candidates(
|
|
1216
|
-
g:
|
|
1241
|
+
g: LadybugGraph,
|
|
1217
1242
|
identifier: str,
|
|
1218
1243
|
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1219
1244
|
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
@@ -1265,7 +1290,7 @@ def _resolve_route_candidates(
|
|
|
1265
1290
|
|
|
1266
1291
|
|
|
1267
1292
|
def _resolve_client_candidates(
|
|
1268
|
-
g:
|
|
1293
|
+
g: LadybugGraph,
|
|
1269
1294
|
identifier: str,
|
|
1270
1295
|
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1271
1296
|
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
@@ -1304,7 +1329,7 @@ def _resolve_client_candidates(
|
|
|
1304
1329
|
|
|
1305
1330
|
|
|
1306
1331
|
def _resolve_producer_candidates(
|
|
1307
|
-
g:
|
|
1332
|
+
g: LadybugGraph,
|
|
1308
1333
|
identifier: str,
|
|
1309
1334
|
) -> list[tuple[NodeRef, ResolveReason, int]]:
|
|
1310
1335
|
out: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
@@ -1462,7 +1487,7 @@ def _resolve_finalize_success(
|
|
|
1462
1487
|
def resolve_v2(
|
|
1463
1488
|
identifier: str,
|
|
1464
1489
|
hint_kind: Literal["symbol", "route", "client", "producer"] | None = None,
|
|
1465
|
-
graph:
|
|
1490
|
+
graph: LadybugGraph | None = None,
|
|
1466
1491
|
) -> ResolveOutput:
|
|
1467
1492
|
try:
|
|
1468
1493
|
trimmed, err = _resolve_validate_identifier(identifier)
|
|
@@ -1481,7 +1506,7 @@ def resolve_v2(
|
|
|
1481
1506
|
if "*" in trimmed or "?" in trimmed:
|
|
1482
1507
|
return _resolve_finalize_success(trimmed, hint_kind, [])
|
|
1483
1508
|
|
|
1484
|
-
g = graph or
|
|
1509
|
+
g = graph or LadybugGraph.get()
|
|
1485
1510
|
raw: list[tuple[NodeRef, ResolveReason, int]] = []
|
|
1486
1511
|
for kind in _resolve_kinds_to_search(hint_kind):
|
|
1487
1512
|
if kind == "symbol":
|
|
@@ -1726,7 +1751,7 @@ def neighbors_v2(
|
|
|
1726
1751
|
declares_composed = [k for k in composed_keys if k in _MEMBER_COMPOSED_EDGE_TYPES]
|
|
1727
1752
|
override_composed = [k for k in composed_keys if k in _OVERRIDE_COMPOSED_EDGE_TYPES]
|
|
1728
1753
|
ordered_composed = declares_composed + override_composed
|
|
1729
|
-
g = graph or
|
|
1754
|
+
g = graph or LadybugGraph.get()
|
|
1730
1755
|
try:
|
|
1731
1756
|
raw_filter = _coerce_filter(filter)
|
|
1732
1757
|
nf = (
|
pr_analysis.py
CHANGED
|
@@ -12,7 +12,7 @@ from typing import Any
|
|
|
12
12
|
from unidiff import PatchSet
|
|
13
13
|
from unidiff.errors import UnidiffParseError
|
|
14
14
|
|
|
15
|
-
from
|
|
15
|
+
from ladybug_queries import SymbolHit, find_symbols_in_file_range, _row_to_symbol
|
|
16
16
|
|
|
17
17
|
|
|
18
18
|
@dataclass
|
search_lancedb.py
CHANGED
|
@@ -675,16 +675,16 @@ def _graph_expand_merge(
|
|
|
675
675
|
limit: int,
|
|
676
676
|
extra_predicates: list[str],
|
|
677
677
|
expand_depth: int,
|
|
678
|
-
|
|
678
|
+
ladybug_path: str | None,
|
|
679
679
|
) -> list[dict]:
|
|
680
680
|
"""Expand vector top-k through the Kuzu graph and fuse (RRF) with the original list."""
|
|
681
681
|
# Lazy import so the module works without kuzu installed when graph_expand=False.
|
|
682
682
|
try:
|
|
683
|
-
from
|
|
683
|
+
from ladybug_queries import LadybugGraph
|
|
684
684
|
except Exception:
|
|
685
685
|
return vector_rows
|
|
686
686
|
|
|
687
|
-
if not
|
|
687
|
+
if not LadybugGraph.exists(ladybug_path):
|
|
688
688
|
return vector_rows
|
|
689
689
|
|
|
690
690
|
seed_fqns = sorted({r.get("primary_type_fqn") for r in vector_rows if r.get("primary_type_fqn")})
|
|
@@ -692,7 +692,7 @@ def _graph_expand_merge(
|
|
|
692
692
|
return vector_rows
|
|
693
693
|
|
|
694
694
|
try:
|
|
695
|
-
graph =
|
|
695
|
+
graph = LadybugGraph.get(ladybug_path)
|
|
696
696
|
structural = graph.expand_fqns(seed_fqns, depth=expand_depth)
|
|
697
697
|
method_pairs = graph.expand_methods(
|
|
698
698
|
seed_fqns, depth=expand_depth, exclude_external=True,
|
|
@@ -804,7 +804,7 @@ def run_search(
|
|
|
804
804
|
package_prefix: str | None = None,
|
|
805
805
|
graph_expand: bool = False,
|
|
806
806
|
expand_depth: int = 1,
|
|
807
|
-
|
|
807
|
+
ladybug_path: str | None = None,
|
|
808
808
|
context_neighbors: int = 0,
|
|
809
809
|
role_in: list[str] | None = None,
|
|
810
810
|
exclude_roles: list[str] | None = None,
|
|
@@ -890,7 +890,7 @@ def run_search(
|
|
|
890
890
|
limit=need,
|
|
891
891
|
extra_predicates=extra_java,
|
|
892
892
|
expand_depth=expand_depth,
|
|
893
|
-
|
|
893
|
+
ladybug_path=ladybug_path,
|
|
894
894
|
)
|
|
895
895
|
|
|
896
896
|
window = rows[offset : offset + limit]
|
|
@@ -966,7 +966,7 @@ def main() -> None:
|
|
|
966
966
|
parser.add_argument("--package-prefix", default=None)
|
|
967
967
|
parser.add_argument("--graph-expand", action="store_true")
|
|
968
968
|
parser.add_argument("--expand-depth", type=int, default=1)
|
|
969
|
-
parser.add_argument("--
|
|
969
|
+
parser.add_argument("--ladybug-path", default=None)
|
|
970
970
|
parser.add_argument(
|
|
971
971
|
"--context-neighbors", type=int, default=0,
|
|
972
972
|
help="Attach N adjacent chunks per hit as surrounding context (Java only).",
|
|
@@ -1010,7 +1010,7 @@ def main() -> None:
|
|
|
1010
1010
|
package_prefix=args.package_prefix,
|
|
1011
1011
|
graph_expand=args.graph_expand,
|
|
1012
1012
|
expand_depth=args.expand_depth,
|
|
1013
|
-
|
|
1013
|
+
ladybug_path=args.ladybug_path,
|
|
1014
1014
|
context_neighbors=args.context_neighbors,
|
|
1015
1015
|
)
|
|
1016
1016
|
except Exception as e:
|