java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +24 -7
- build_ast_graph.py +153 -94
- graph_enrich.py +3 -3
- java_codebase_rag/_fdlimit.py +48 -0
- java_codebase_rag/cli.py +31 -28
- java_codebase_rag/config.py +40 -10
- java_codebase_rag/installer.py +99 -10
- java_codebase_rag/lance_optimize.py +148 -0
- java_codebase_rag/pipeline.py +63 -9
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/METADATA +6 -5
- java_codebase_rag-0.6.1.dist-info/RECORD +36 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/top_level.txt +1 -1
- java_index_flow_lancedb.py +22 -4
- java_ontology.py +5 -2
- ladybug_queries.py +1995 -0
- mcp_v2.py +51 -26
- pr_analysis.py +1 -1
- search_lancedb.py +8 -8
- server.py +116 -68
- user_rag/__init__.py +1 -0
- user_rag/cli.py +175 -0
- java_codebase_rag-0.5.3.dist-info/RECORD +0 -31
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/WHEEL +0 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/entry_points.txt +0 -0
- {java_codebase_rag-0.5.3.dist-info → java_codebase_rag-0.6.1.dist-info}/licenses/LICENSE +0 -0
server.py
CHANGED
|
@@ -7,7 +7,7 @@ import os
|
|
|
7
7
|
import sys
|
|
8
8
|
import time
|
|
9
9
|
from pathlib import Path
|
|
10
|
-
from typing import
|
|
10
|
+
from typing import Literal
|
|
11
11
|
|
|
12
12
|
import mcp_v2
|
|
13
13
|
from index_common import SBERT_MODEL
|
|
@@ -16,27 +16,29 @@ from java_codebase_rag.cli_progress import (
|
|
|
16
16
|
emit_vectors_finish,
|
|
17
17
|
emit_vectors_start,
|
|
18
18
|
)
|
|
19
|
+
from java_codebase_rag._fdlimit import raise_fd_limit
|
|
19
20
|
from java_codebase_rag.config import (
|
|
21
|
+
cocoindex_subprocess_env_defaults,
|
|
20
22
|
discover_project_root,
|
|
21
23
|
emit_legacy_env_hints_if_present,
|
|
22
24
|
resolved_sbert_model_for_process_env,
|
|
23
25
|
resolve_operator_config,
|
|
24
26
|
)
|
|
25
|
-
from
|
|
27
|
+
from ladybug_queries import LadybugGraph, resolve_ladybug_path
|
|
26
28
|
from mcp.server.fastmcp import FastMCP
|
|
27
29
|
from pydantic import BaseModel, Field
|
|
28
30
|
from search_lancedb import TABLES
|
|
29
31
|
|
|
30
32
|
_COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
|
|
31
33
|
_INSTRUCTIONS = (
|
|
32
|
-
"Java codebase graph navigator
|
|
34
|
+
"Java codebase graph navigator over an indexed Java codebase. "
|
|
33
35
|
"Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), "
|
|
34
36
|
"neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), "
|
|
35
|
-
"resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many|none). "
|
|
36
|
-
"NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. "
|
|
37
|
+
"resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses: one | many | none). "
|
|
37
38
|
"Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. "
|
|
39
|
+
"Successful responses from any tool may include `hints_structured` (tool call suggestions with a `reason` field) and `advisories` (pure informational text) when hints are enabled. "
|
|
38
40
|
"Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS; "
|
|
39
|
-
"type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only). "
|
|
41
|
+
"type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only, type Symbol origin). "
|
|
40
42
|
"Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP."
|
|
41
43
|
)
|
|
42
44
|
|
|
@@ -85,6 +87,7 @@ class RefreshIndexOutput(BaseModel):
|
|
|
85
87
|
graph_stdout: str = ""
|
|
86
88
|
graph_stderr: str = ""
|
|
87
89
|
phases_run: list[Literal["vectors", "graph"]] = Field(default_factory=list)
|
|
90
|
+
optimize_error: str | None = None
|
|
88
91
|
|
|
89
92
|
|
|
90
93
|
class IndexInfoOutput(BaseModel):
|
|
@@ -120,19 +123,15 @@ class ScopeManager:
|
|
|
120
123
|
print("[scope] No microservice detected (at project root)", file=sys.stderr)
|
|
121
124
|
print("[scope] Queries will span all microservices", file=sys.stderr)
|
|
122
125
|
|
|
123
|
-
def apply_auto_scope(self, node_filter:
|
|
126
|
+
def apply_auto_scope(self, node_filter: mcp_v2.NodeFilter | None) -> mcp_v2.NodeFilter | None:
|
|
124
127
|
"""Apply auto-detected scope to filter if no explicit microservice is set."""
|
|
125
128
|
if self.default_scope is None:
|
|
126
129
|
return node_filter
|
|
127
|
-
# Convert to dict for manipulation
|
|
128
130
|
if node_filter is None:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
if "microservice" not in filter_dict:
|
|
134
|
-
filter_dict["microservice"] = self.default_scope
|
|
135
|
-
return filter_dict
|
|
131
|
+
return mcp_v2.NodeFilter(microservice=self.default_scope)
|
|
132
|
+
if node_filter.microservice is None:
|
|
133
|
+
return node_filter.model_copy(update={"microservice": self.default_scope})
|
|
134
|
+
return node_filter
|
|
136
135
|
|
|
137
136
|
|
|
138
137
|
def _resolve_lancedb_uri() -> str:
|
|
@@ -156,45 +155,67 @@ def _project_root() -> Path:
|
|
|
156
155
|
return discovered if discovered is not None else Path.cwd().resolve()
|
|
157
156
|
|
|
158
157
|
|
|
158
|
+
def _source_root_for_operator_config() -> Path | None:
|
|
159
|
+
"""``source_root`` arg to hand ``resolve_operator_config`` from the MCP server.
|
|
160
|
+
|
|
161
|
+
Returns ``JAVA_CODEBASE_RAG_SOURCE_ROOT`` when set (an explicit operator
|
|
162
|
+
override that wins and suppresses the YAML ``source_root`` field, exactly
|
|
163
|
+
like CLI ``--source-root``), otherwise ``None`` — so
|
|
164
|
+
``resolve_operator_config`` runs its OWN walk-up discovery and HONORS the
|
|
165
|
+
YAML ``source_root`` field, matching the CLI (``init`` / ``increment`` /
|
|
166
|
+
``reprocess``) path.
|
|
167
|
+
|
|
168
|
+
Do NOT pass ``_project_root()`` (the walk-up-discovered dir) here: a
|
|
169
|
+
non-``None`` value routes into the "explicit source root" branch that
|
|
170
|
+
skips the YAML ``source_root`` field, which made the MCP server and the
|
|
171
|
+
CLI resolve different ``source_root`` / ``index_dir`` from the same config
|
|
172
|
+
file (the init-vs-MCP index_dir divergence). ``_project_root()`` is kept
|
|
173
|
+
only for the ``_resolve_lancedb_uri()`` fallback below.
|
|
174
|
+
"""
|
|
175
|
+
env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
|
|
176
|
+
return Path(env).expanduser().resolve() if env else None
|
|
177
|
+
|
|
178
|
+
|
|
159
179
|
def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
|
|
160
180
|
sub_env = os.environ.copy()
|
|
161
181
|
sub_env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(project_root)
|
|
162
182
|
idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
163
183
|
if idx:
|
|
164
184
|
sub_env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(Path(idx).expanduser().resolve())
|
|
165
|
-
#
|
|
166
|
-
# See: https://github.com/HumanBean17/java-codebase-rag/issues/
|
|
167
|
-
|
|
185
|
+
# Cap CocoIndex concurrency to avoid EMFILE ("too many open files") under
|
|
186
|
+
# default OS fd limits. See: https://github.com/HumanBean17/java-codebase-rag/issues/306
|
|
187
|
+
for _k, _v in cocoindex_subprocess_env_defaults().items():
|
|
188
|
+
sub_env.setdefault(_k, _v)
|
|
168
189
|
return sub_env
|
|
169
190
|
|
|
170
191
|
|
|
171
192
|
def _graph_enabled() -> bool:
|
|
172
|
-
return
|
|
193
|
+
return LadybugGraph.exists()
|
|
173
194
|
|
|
174
195
|
|
|
175
196
|
def _graph_meta_output() -> GraphMetaOutput:
|
|
176
|
-
if not
|
|
197
|
+
if not LadybugGraph.exists():
|
|
177
198
|
return GraphMetaOutput(
|
|
178
199
|
success=True,
|
|
179
200
|
enabled=False,
|
|
180
|
-
db_path=
|
|
181
|
-
message="
|
|
201
|
+
db_path=resolve_ladybug_path(),
|
|
202
|
+
message="Ladybug graph not present; run java-codebase-rag reprocess or build_ast_graph.py",
|
|
182
203
|
)
|
|
183
204
|
try:
|
|
184
|
-
graph =
|
|
205
|
+
graph = LadybugGraph.get()
|
|
185
206
|
meta = graph.meta()
|
|
186
207
|
except Exception as e:
|
|
187
208
|
return GraphMetaOutput(
|
|
188
209
|
success=False,
|
|
189
210
|
enabled=_graph_enabled(),
|
|
190
|
-
db_path=
|
|
191
|
-
message=f"
|
|
211
|
+
db_path=resolve_ladybug_path(),
|
|
212
|
+
message=f"Ladybug open failed: {e}",
|
|
192
213
|
)
|
|
193
214
|
if "error" in meta:
|
|
194
215
|
return GraphMetaOutput(
|
|
195
216
|
success=False,
|
|
196
217
|
enabled=_graph_enabled(),
|
|
197
|
-
db_path=meta.get("db_path",
|
|
218
|
+
db_path=meta.get("db_path", resolve_ladybug_path()),
|
|
198
219
|
message=str(meta["error"]),
|
|
199
220
|
)
|
|
200
221
|
try:
|
|
@@ -212,7 +233,7 @@ def _graph_meta_output() -> GraphMetaOutput:
|
|
|
212
233
|
return GraphMetaOutput(
|
|
213
234
|
success=True,
|
|
214
235
|
enabled=_graph_enabled(),
|
|
215
|
-
db_path=meta.get("db_path",
|
|
236
|
+
db_path=meta.get("db_path", resolve_ladybug_path()),
|
|
216
237
|
ontology_version=int(meta.get("ontology_version") or 0),
|
|
217
238
|
built_at=int(meta.get("built_at") or 0),
|
|
218
239
|
source_root=str(meta.get("source_root") or ""),
|
|
@@ -326,9 +347,29 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
|
|
|
326
347
|
graph_code: int | None = None
|
|
327
348
|
graph_out = ""
|
|
328
349
|
graph_err = ""
|
|
350
|
+
optimize_error: str | None = None
|
|
329
351
|
if ok:
|
|
330
352
|
if not quiet:
|
|
331
353
|
print(file=sys.stderr, flush=True)
|
|
354
|
+
# Serialized post-flow Lance optimize: the flow disabled its background
|
|
355
|
+
# optimize, so with cocoindex returned exit 0 there are no concurrent
|
|
356
|
+
# writers — this is the safe window to compact. An optimize failure is
|
|
357
|
+
# surfaced via optimize_error / stderr and must NOT flip the success of
|
|
358
|
+
# a vectors phase that succeeded; the index is still searchable.
|
|
359
|
+
try:
|
|
360
|
+
from java_codebase_rag.lance_optimize import optimize_lance_tables
|
|
361
|
+
|
|
362
|
+
idx_raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
363
|
+
if idx_raw and not idx_raw.startswith(("s3://", "gs://", "az://")):
|
|
364
|
+
idx_dir = Path(idx_raw).expanduser().resolve()
|
|
365
|
+
elif idx_raw:
|
|
366
|
+
idx_dir = Path(idx_raw)
|
|
367
|
+
else:
|
|
368
|
+
idx_dir = (root / ".java-codebase-rag").resolve()
|
|
369
|
+
await optimize_lance_tables(idx_dir, quiet=quiet)
|
|
370
|
+
except Exception as exc:
|
|
371
|
+
optimize_error = f"lance optimize failed: {exc}"
|
|
372
|
+
print(f"java-codebase-rag: {optimize_error}", file=sys.stderr)
|
|
332
373
|
builder = Path(__file__).resolve().parent / "build_ast_graph.py"
|
|
333
374
|
if builder.is_file():
|
|
334
375
|
try:
|
|
@@ -337,8 +378,8 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
|
|
|
337
378
|
str(builder),
|
|
338
379
|
"--source-root",
|
|
339
380
|
str(root),
|
|
340
|
-
"--
|
|
341
|
-
|
|
381
|
+
"--ladybug-path",
|
|
382
|
+
resolve_ladybug_path(),
|
|
342
383
|
]
|
|
343
384
|
if not quiet:
|
|
344
385
|
graph_args.append("--verbose")
|
|
@@ -365,6 +406,10 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
|
|
|
365
406
|
message = f"cocoindex exit {proc.returncode}"
|
|
366
407
|
elif graph_code is not None and graph_code != 0:
|
|
367
408
|
message = f"graph builder exit {graph_code}"
|
|
409
|
+
# Surface a post-flow optimize failure in the message too (success is not
|
|
410
|
+
# flipped — the vectors phase succeeded and the index is still usable).
|
|
411
|
+
if optimize_error is not None:
|
|
412
|
+
message = optimize_error if message is None else f"{message}; {optimize_error}"
|
|
368
413
|
return RefreshIndexOutput(
|
|
369
414
|
success=ok and (graph_code is None or graph_code == 0),
|
|
370
415
|
exit_code=proc.returncode,
|
|
@@ -375,6 +420,7 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
|
|
|
375
420
|
graph_stdout=graph_out[-4000:] if len(graph_out) > 4000 else graph_out,
|
|
376
421
|
graph_stderr=graph_err[-4000:] if len(graph_err) > 4000 else graph_err,
|
|
377
422
|
phases_run=phases_run,
|
|
423
|
+
optimize_error=optimize_error,
|
|
378
424
|
)
|
|
379
425
|
|
|
380
426
|
|
|
@@ -384,14 +430,15 @@ def create_mcp_server() -> FastMCP:
|
|
|
384
430
|
@mcp.tool(
|
|
385
431
|
name="search",
|
|
386
432
|
description=(
|
|
387
|
-
"Ranked chunk retrieval
|
|
388
|
-
"results are score-ranked, not boolean-matched.
|
|
389
|
-
"
|
|
433
|
+
"Ranked chunk retrieval over content tables (java/sql/yaml); `query` is opaque text (natural language or code "
|
|
434
|
+
"fragments) and results are score-ranked, not boolean-matched. For graph-structured listing "
|
|
435
|
+
"(symbols/routes/clients/producers) use `find`, not `search`. Optional `filter` uses the same NodeFilter "
|
|
436
|
+
"schema as `find` but only **symbol-applicable** fields apply — others return success=false. Wildcards "
|
|
390
437
|
"(`*`, `?`) in prefix fields are rejected—use ranked `query` text instead. There is **no** "
|
|
391
438
|
"structured DSL inside `query`; structured predicates belong in `find`. "
|
|
392
439
|
"For identifier-shaped lookups (FQN, id prefix, route/client identifiers, …), use `resolve` first; "
|
|
393
440
|
"use `search` for natural-language or ranked fuzzy discovery. "
|
|
394
|
-
"Successful responses echo `limit`/`offset
|
|
441
|
+
"Successful responses echo `limit`/`offset`."
|
|
395
442
|
),
|
|
396
443
|
)
|
|
397
444
|
async def search(
|
|
@@ -402,7 +449,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
402
449
|
),
|
|
403
450
|
hybrid: bool = Field(
|
|
404
451
|
default=False,
|
|
405
|
-
description="If true, fuse FTS + vector
|
|
452
|
+
description="If true, fuse FTS + vector. Requires a single table (java/sql/yaml); hybrid with table='all' returns success=false.",
|
|
406
453
|
),
|
|
407
454
|
limit: int = Field(default=5, ge=1, le=50, description="Max hits to return"),
|
|
408
455
|
offset: int = Field(default=0, ge=0, le=500, description="Skip this many hits (pagination)"),
|
|
@@ -410,11 +457,11 @@ def create_mcp_server() -> FastMCP:
|
|
|
410
457
|
default=None,
|
|
411
458
|
description="Substring match on file path (pre-filter from index)",
|
|
412
459
|
),
|
|
413
|
-
filter:
|
|
460
|
+
filter: mcp_v2.NodeFilter | None = Field(
|
|
414
461
|
default=None,
|
|
415
462
|
description=(
|
|
416
|
-
"Optional NodeFilter post-filter on symbol-oriented hit rows.
|
|
417
|
-
"applicable to symbols return success=false.
|
|
463
|
+
"Optional NodeFilter post-filter on symbol-oriented hit rows. An empty object or omitted means no "
|
|
464
|
+
"predicate. Unknown keys or populated fields not applicable to symbols return success=false."
|
|
418
465
|
),
|
|
419
466
|
),
|
|
420
467
|
) -> mcp_v2.SearchOutput:
|
|
@@ -439,9 +486,11 @@ def create_mcp_server() -> FastMCP:
|
|
|
439
486
|
"**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, "
|
|
440
487
|
"source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, "
|
|
441
488
|
"module, source_layer, producer_kind, topic_prefix. "
|
|
489
|
+
"`role` is singular and `exclude_roles` plural; `capability` is a functional tag assigned during indexing. "
|
|
490
|
+
"`fqn_prefix` is a prefix predicate — for exact FQN or id lookup use `resolve`/`describe`. "
|
|
442
491
|
"Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of "
|
|
443
492
|
"that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. "
|
|
444
|
-
"Successful responses echo `limit`/`offset
|
|
493
|
+
"Successful responses echo `limit`/`offset`."
|
|
445
494
|
),
|
|
446
495
|
)
|
|
447
496
|
async def find(
|
|
@@ -452,11 +501,10 @@ def create_mcp_server() -> FastMCP:
|
|
|
452
501
|
"'producer' = outbound async producers."
|
|
453
502
|
)
|
|
454
503
|
),
|
|
455
|
-
filter:
|
|
504
|
+
filter: mcp_v2.NodeFilter = Field(
|
|
456
505
|
...,
|
|
457
506
|
description=(
|
|
458
|
-
"Required NodeFilter
|
|
459
|
-
"Prefer a JSON object; a JSON-encoded string is accepted."
|
|
507
|
+
"Required NodeFilter object (extra keys forbidden). Fields must be applicable to `kind`."
|
|
460
508
|
),
|
|
461
509
|
),
|
|
462
510
|
limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
|
|
@@ -468,17 +516,14 @@ def create_mcp_server() -> FastMCP:
|
|
|
468
516
|
@mcp.tool(
|
|
469
517
|
name="describe",
|
|
470
518
|
description=(
|
|
471
|
-
"Full node record plus `edge_summary` (in/out counts per stored edge label
|
|
472
|
-
"composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER,
|
|
473
|
-
"method Symbols
|
|
474
|
-
"
|
|
475
|
-
"
|
|
476
|
-
"(out only; composed keys include via_id in attrs). The stored `OVERRIDES` relationship "
|
|
477
|
-
"is also a normal edge label (e.g. direction in from declaration toward overriders). "
|
|
519
|
+
"Full node record plus `edge_summary` (in/out counts per stored edge label). For type Symbols, `edge_summary` "
|
|
520
|
+
"also exposes composed keys (DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES); for "
|
|
521
|
+
"non-static method Symbols it adds override-axis virtual keys (OVERRIDDEN_BY and its composed forms, plus an "
|
|
522
|
+
"`OVERRIDES` map merging stored `[:OVERRIDES]` counts with the dispatch-up rollup). These composed/override keys "
|
|
523
|
+
"are out-only and navigable via `neighbors`; the stored `OVERRIDES` is also a normal edge label (in toward declaration). "
|
|
478
524
|
"Pass `id` for any kind, or exact `fqn` for Symbol lookup (`id` wins when both are set). "
|
|
479
525
|
"`describe(fqn=…)` keeps the first graph row when multiple symbols share that FQN; when an FQN may collide, "
|
|
480
|
-
"prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node.
|
|
481
|
-
"Successful responses may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
|
|
526
|
+
"prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node."
|
|
482
527
|
),
|
|
483
528
|
)
|
|
484
529
|
async def describe(
|
|
@@ -502,18 +547,19 @@ def create_mcp_server() -> FastMCP:
|
|
|
502
547
|
@mcp.tool(
|
|
503
548
|
name="neighbors",
|
|
504
549
|
description=(
|
|
505
|
-
"Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (
|
|
506
|
-
"type Symbol origins may also pass composed
|
|
507
|
-
"
|
|
508
|
-
"
|
|
509
|
-
"
|
|
550
|
+
"Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (one hop over stored edge "
|
|
551
|
+
"labels; type/method Symbol origins may also pass composed or override-axis keys — see `edge_types`). From a "
|
|
552
|
+
"type Symbol, `direction='out'` with EXPOSES yields route nodes and HTTP_CALLS/ASYNC_CALLS yield client/producer "
|
|
553
|
+
"nodes; `direction='in'` reverses each relationship. "
|
|
554
|
+
"`direction` and `edge_types` have no defaults; an empty `edge_types` fails. The CALLS-only features — "
|
|
555
|
+
"`edge_filter`, `include_unresolved`, `dedup_calls` — each require `edge_types=['CALLS']`; `edge_filter` and "
|
|
556
|
+
"`include_unresolved` are mutually exclusive. Violating a precondition (wrong CALLS context, composed/override "
|
|
557
|
+
"keys on an ineligible origin or with `direction='in'`, wildcards in prefix fields, unknown filter keys) returns "
|
|
558
|
+
"success=false with a message; `dedup_calls` with other edge_types is a silent no-op. "
|
|
510
559
|
"Optional `filter` applies to each neighbor endpoint row; populated fields must be applicable to that "
|
|
511
|
-
"neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (strict frame). "
|
|
512
|
-
"
|
|
513
|
-
"
|
|
514
|
-
"callee_declaring_role). Wildcards in prefix fields are rejected. Unknown filter keys return success=false. "
|
|
515
|
-
"Successful responses echo `requested_edge_types` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text). "
|
|
516
|
-
"Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths)."
|
|
560
|
+
"neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (per-neighbor strict frame). "
|
|
561
|
+
"Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths). "
|
|
562
|
+
"Successful responses echo `requested_edge_types`."
|
|
517
563
|
),
|
|
518
564
|
)
|
|
519
565
|
async def neighbors(
|
|
@@ -544,19 +590,19 @@ def create_mcp_server() -> FastMCP:
|
|
|
544
590
|
le=1000,
|
|
545
591
|
description="Skip this many edges after merge (pagination)",
|
|
546
592
|
),
|
|
547
|
-
filter:
|
|
593
|
+
filter: mcp_v2.NodeFilter | None = Field(
|
|
548
594
|
default=None,
|
|
549
595
|
description=(
|
|
550
|
-
"Optional NodeFilter on the neighbor node.
|
|
551
|
-
"
|
|
596
|
+
"Optional NodeFilter on the neighbor node. An empty object or omitted means no predicate. "
|
|
597
|
+
"Same applicability rules as `find` for that node's kind."
|
|
552
598
|
),
|
|
553
599
|
),
|
|
554
|
-
edge_filter:
|
|
600
|
+
edge_filter: mcp_v2.EdgeFilter | None = Field(
|
|
555
601
|
default=None,
|
|
556
602
|
description=(
|
|
557
603
|
"Optional EdgeFilter on CALLS edge attributes (edge_types=['CALLS'] only). Use "
|
|
558
604
|
"callee_declaring_role for callee stereotype projection — not NodeFilter.role on method neighbors. "
|
|
559
|
-
"Mutually exclusive with include_unresolved.
|
|
605
|
+
"Mutually exclusive with include_unresolved."
|
|
560
606
|
),
|
|
561
607
|
),
|
|
562
608
|
include_unresolved: bool = Field(
|
|
@@ -598,10 +644,11 @@ def create_mcp_server() -> FastMCP:
|
|
|
598
644
|
"status=one (single node), many (≥2 ranked candidates with reason), or none "
|
|
599
645
|
"(no match — fall back to search(query=...) for natural language or fuzzy text). "
|
|
600
646
|
"Optional hint_kind narrows to symbol, route, client, or producer. "
|
|
601
|
-
"Successful responses may include hints_structured (tool call suggestions with `reason` field) and advisories (pure informational text) — same contract as other v2 tools. "
|
|
602
647
|
"Malformed empty/whitespace identifier returns success=false. "
|
|
603
648
|
"Examples: resolve('com.foo.Bar', hint_kind='symbol'); "
|
|
604
649
|
"resolve('GET /api/v1/customers', hint_kind='route'); "
|
|
650
|
+
"resolve('PaymentClient', hint_kind='client'); "
|
|
651
|
+
"resolve('order.created', hint_kind='producer'); "
|
|
605
652
|
"resolve('the client that handles assignments') → none (use search instead)."
|
|
606
653
|
),
|
|
607
654
|
)
|
|
@@ -622,12 +669,13 @@ def create_mcp_server() -> FastMCP:
|
|
|
622
669
|
|
|
623
670
|
|
|
624
671
|
def main() -> None:
|
|
672
|
+
raise_fd_limit()
|
|
625
673
|
emit_legacy_env_hints_if_present()
|
|
626
674
|
|
|
627
675
|
# Load YAML config and apply embedding settings to environment
|
|
628
676
|
# This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available
|
|
629
677
|
# before any tool handler runs (same behavior as CLI path)
|
|
630
|
-
cfg = resolve_operator_config(source_root=
|
|
678
|
+
cfg = resolve_operator_config(source_root=_source_root_for_operator_config())
|
|
631
679
|
cfg.apply_to_os_environ()
|
|
632
680
|
mcp_v2.set_hints_enabled(cfg.hints_enabled)
|
|
633
681
|
|
user_rag/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
user_rag/cli.py
ADDED
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import argparse
|
|
4
|
+
import asyncio
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import pprint
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import pr_analysis
|
|
13
|
+
import server
|
|
14
|
+
from path_filtering import LayeredIgnore
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _jsonable(value: Any) -> Any:
|
|
18
|
+
if hasattr(value, "model_dump"):
|
|
19
|
+
return value.model_dump()
|
|
20
|
+
if isinstance(value, Path):
|
|
21
|
+
return str(value)
|
|
22
|
+
raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _to_payload(value: Any) -> Any:
|
|
26
|
+
if hasattr(value, "model_dump"):
|
|
27
|
+
return value.model_dump()
|
|
28
|
+
return value
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _emit(value: Any) -> None:
|
|
32
|
+
payload = _to_payload(value)
|
|
33
|
+
if sys.stdout.isatty():
|
|
34
|
+
print(pprint.pformat(payload, sort_dicts=True))
|
|
35
|
+
return
|
|
36
|
+
print(json.dumps(payload, default=_jsonable, sort_keys=True, indent=None))
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _parse_common_graph_flags(parser: argparse.ArgumentParser) -> None:
|
|
40
|
+
parser.add_argument("--source-root", type=str, default=None)
|
|
41
|
+
parser.add_argument("--kuzu-path", type=str, default=None)
|
|
42
|
+
parser.add_argument("--lancedb-path", type=str, default=None)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _apply_graph_env(args: argparse.Namespace) -> None:
|
|
46
|
+
if args.source_root:
|
|
47
|
+
os.environ["LANCEDB_MCP_PROJECT_ROOT"] = str(Path(args.source_root).expanduser().resolve())
|
|
48
|
+
if args.kuzu_path:
|
|
49
|
+
os.environ["KUZU_DB_PATH"] = str(Path(args.kuzu_path).expanduser().resolve())
|
|
50
|
+
# Reset singleton to pick up override path.
|
|
51
|
+
from kuzu_queries import KuzuGraph
|
|
52
|
+
|
|
53
|
+
KuzuGraph._instance = None
|
|
54
|
+
KuzuGraph._instance_path = None
|
|
55
|
+
if args.lancedb_path:
|
|
56
|
+
os.environ["LANCEDB_URI"] = str(Path(args.lancedb_path).expanduser().resolve())
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _cmd_refresh(args: argparse.Namespace) -> int:
|
|
60
|
+
"""Return 1 for launched-subprocess failures, 2 for internal pre-launch errors."""
|
|
61
|
+
_apply_graph_env(args)
|
|
62
|
+
result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet)))
|
|
63
|
+
payload = result.model_dump()
|
|
64
|
+
if payload.get("success"):
|
|
65
|
+
_emit(payload)
|
|
66
|
+
return 0
|
|
67
|
+
_emit(payload)
|
|
68
|
+
return 2 if payload.get("exit_code") is None else 1
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _cmd_meta(args: argparse.Namespace) -> int:
|
|
72
|
+
_apply_graph_env(args)
|
|
73
|
+
payload = server._graph_meta_output().model_dump()
|
|
74
|
+
_emit(payload)
|
|
75
|
+
return 0 if payload.get("success") else 2
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _cmd_tables(args: argparse.Namespace) -> int:
|
|
79
|
+
_apply_graph_env(args)
|
|
80
|
+
payload = server.list_code_index_tables_payload().model_dump()
|
|
81
|
+
_emit(payload)
|
|
82
|
+
return 0
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _cmd_diagnose_ignore(args: argparse.Namespace) -> int:
|
|
86
|
+
_apply_graph_env(args)
|
|
87
|
+
# Keep this after _apply_graph_env so relative paths resolve from --source-root.
|
|
88
|
+
root = server._project_root()
|
|
89
|
+
raw = Path(args.path)
|
|
90
|
+
try:
|
|
91
|
+
abs_path = raw.resolve() if raw.is_absolute() else (root / raw).resolve()
|
|
92
|
+
except OSError as exc:
|
|
93
|
+
_emit({"success": False, "message": f"Invalid path: {exc}"})
|
|
94
|
+
return 1
|
|
95
|
+
li = LayeredIgnore(root)
|
|
96
|
+
_emit(li.diagnose_dict(abs_path))
|
|
97
|
+
return 0
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _read_diff_text(args: argparse.Namespace) -> str:
|
|
101
|
+
if args.diff_file:
|
|
102
|
+
return Path(args.diff_file).read_text(encoding="utf-8")
|
|
103
|
+
if args.diff_stdin:
|
|
104
|
+
return sys.stdin.read()
|
|
105
|
+
raise ValueError("Provide exactly one of --diff-file or --diff-stdin")
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def _cmd_analyze_pr(args: argparse.Namespace) -> int:
|
|
109
|
+
_apply_graph_env(args)
|
|
110
|
+
try:
|
|
111
|
+
diff_text = _read_diff_text(args)
|
|
112
|
+
except Exception as exc:
|
|
113
|
+
_emit({"success": False, "message": str(exc)})
|
|
114
|
+
return 1
|
|
115
|
+
if not diff_text.strip():
|
|
116
|
+
_emit({"success": False, "message": "Diff is empty"})
|
|
117
|
+
return 1
|
|
118
|
+
from kuzu_queries import KuzuGraph
|
|
119
|
+
|
|
120
|
+
if not KuzuGraph.exists():
|
|
121
|
+
_emit({"success": False, "message": "Kuzu graph not found"})
|
|
122
|
+
return 1
|
|
123
|
+
graph = KuzuGraph.get()
|
|
124
|
+
report = pr_analysis.analyze_pr_pipeline(graph, diff_text)
|
|
125
|
+
_emit(pr_analysis.pr_report_to_dict(report))
|
|
126
|
+
return 0
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
130
|
+
parser = argparse.ArgumentParser(prog="java-codebase-rag")
|
|
131
|
+
subparsers = parser.add_subparsers(dest="subcommand")
|
|
132
|
+
|
|
133
|
+
refresh = subparsers.add_parser("refresh")
|
|
134
|
+
_parse_common_graph_flags(refresh)
|
|
135
|
+
refresh.add_argument("--quiet", action="store_true")
|
|
136
|
+
refresh.set_defaults(handler=_cmd_refresh)
|
|
137
|
+
|
|
138
|
+
meta = subparsers.add_parser("meta")
|
|
139
|
+
_parse_common_graph_flags(meta)
|
|
140
|
+
meta.set_defaults(handler=_cmd_meta)
|
|
141
|
+
|
|
142
|
+
tables = subparsers.add_parser("tables")
|
|
143
|
+
_parse_common_graph_flags(tables)
|
|
144
|
+
tables.set_defaults(handler=_cmd_tables)
|
|
145
|
+
|
|
146
|
+
diagnose = subparsers.add_parser("diagnose-ignore")
|
|
147
|
+
_parse_common_graph_flags(diagnose)
|
|
148
|
+
diagnose.add_argument("path", type=str)
|
|
149
|
+
diagnose.set_defaults(handler=_cmd_diagnose_ignore)
|
|
150
|
+
|
|
151
|
+
analyze = subparsers.add_parser("analyze-pr")
|
|
152
|
+
_parse_common_graph_flags(analyze)
|
|
153
|
+
group = analyze.add_mutually_exclusive_group(required=True)
|
|
154
|
+
group.add_argument("--diff-file", type=str)
|
|
155
|
+
group.add_argument("--diff-stdin", action="store_true")
|
|
156
|
+
analyze.set_defaults(handler=_cmd_analyze_pr)
|
|
157
|
+
return parser
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def main(argv: list[str] | None = None) -> int:
|
|
161
|
+
parser = build_parser()
|
|
162
|
+
args = parser.parse_args(argv)
|
|
163
|
+
handler = getattr(args, "handler", None)
|
|
164
|
+
if handler is None:
|
|
165
|
+
parser.print_help(sys.stderr)
|
|
166
|
+
return 2
|
|
167
|
+
try:
|
|
168
|
+
return int(handler(args))
|
|
169
|
+
except Exception as exc: # pragma: no cover - defensive top-level guard
|
|
170
|
+
_emit({"success": False, "exit_code": 2, "message": f"internal error: {exc}"})
|
|
171
|
+
return 2
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
if __name__ == "__main__":
|
|
175
|
+
raise SystemExit(main())
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
|
|
2
|
-
brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
|
|
3
|
-
build_ast_graph.py,sha256=1uqgFK2ebBdEc2QcAYK5vU4afOb95jU3zht5FracCkI,148683
|
|
4
|
-
chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
|
|
5
|
-
graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
|
|
6
|
-
index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
|
|
7
|
-
java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
|
|
8
|
-
java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
|
|
9
|
-
java_ontology.py,sha256=nM-oY8_91rmUudv9hAss1AMus9BFY9s5tTpAWjlCz00,16424
|
|
10
|
-
kuzu_queries.py,sha256=9bQzrU311AOw_BcUp_KSGiZgPVSaLSU7y63XfcT_vqI,90137
|
|
11
|
-
mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
|
|
12
|
-
mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
|
|
13
|
-
path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
|
|
14
|
-
pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
|
|
15
|
-
search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
|
|
16
|
-
server.py,sha256=1ZEDkRAOMs0ORncMh9CP2ICCTGEuAe2qmptytQ4QYYU,28862
|
|
17
|
-
java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
18
|
-
java_codebase_rag/cli.py,sha256=WW-DsskSGr-d0JXBLkj4IdAa2OsAcLz5e54_DWvD9Sk,33872
|
|
19
|
-
java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
|
|
20
|
-
java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
|
|
21
|
-
java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
|
|
22
|
-
java_codebase_rag/installer.py,sha256=flj330ZPSBrO2iw_yuNFBILHOTVbarMufYwqjZ8JzN0,42778
|
|
23
|
-
java_codebase_rag/pipeline.py,sha256=D9SNdffcmJLoKHnNZLWZzfor1fI4bkkpJkU0KFsqfdA,9722
|
|
24
|
-
java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
|
|
25
|
-
java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
|
|
26
|
-
java_codebase_rag-0.5.3.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
|
|
27
|
-
java_codebase_rag-0.5.3.dist-info/METADATA,sha256=iI08-selyGz8kYjgqBsWbt4Z9e7MeQd_aF7kHFPu65Q,16807
|
|
28
|
-
java_codebase_rag-0.5.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
29
|
-
java_codebase_rag-0.5.3.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
|
|
30
|
-
java_codebase_rag-0.5.3.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
|
|
31
|
-
java_codebase_rag-0.5.3.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|