java-codebase-rag 0.5.3__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
server.py CHANGED
@@ -7,7 +7,7 @@ import os
7
7
  import sys
8
8
  import time
9
9
  from pathlib import Path
10
- from typing import Any, Literal
10
+ from typing import Literal
11
11
 
12
12
  import mcp_v2
13
13
  from index_common import SBERT_MODEL
@@ -16,27 +16,29 @@ from java_codebase_rag.cli_progress import (
16
16
  emit_vectors_finish,
17
17
  emit_vectors_start,
18
18
  )
19
+ from java_codebase_rag._fdlimit import raise_fd_limit
19
20
  from java_codebase_rag.config import (
21
+ cocoindex_subprocess_env_defaults,
20
22
  discover_project_root,
21
23
  emit_legacy_env_hints_if_present,
22
24
  resolved_sbert_model_for_process_env,
23
25
  resolve_operator_config,
24
26
  )
25
- from kuzu_queries import KuzuGraph, resolve_kuzu_path
27
+ from ladybug_queries import LadybugGraph, resolve_ladybug_path
26
28
  from mcp.server.fastmcp import FastMCP
27
29
  from pydantic import BaseModel, Field
28
30
  from search_lancedb import TABLES
29
31
 
30
32
  _COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
31
33
  _INSTRUCTIONS = (
32
- "Java codebase graph navigator (LanceDB + Kuzu). "
34
+ "Java codebase graph navigator over an indexed Java codebase. "
33
35
  "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), "
34
36
  "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), "
35
- "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many|none). "
36
- "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. "
37
+ "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses: one | many | none). "
37
38
  "Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. "
39
+ "Successful responses from any tool may include `hints_structured` (tool call suggestions with a `reason` field) and `advisories` (pure informational text) when hints are enabled. "
38
40
  "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS; "
39
- "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only). "
41
+ "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only, type Symbol origin). "
40
42
  "Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP."
41
43
  )
42
44
 
@@ -85,6 +87,7 @@ class RefreshIndexOutput(BaseModel):
85
87
  graph_stdout: str = ""
86
88
  graph_stderr: str = ""
87
89
  phases_run: list[Literal["vectors", "graph"]] = Field(default_factory=list)
90
+ optimize_error: str | None = None
88
91
 
89
92
 
90
93
  class IndexInfoOutput(BaseModel):
@@ -120,19 +123,15 @@ class ScopeManager:
120
123
  print("[scope] No microservice detected (at project root)", file=sys.stderr)
121
124
  print("[scope] Queries will span all microservices", file=sys.stderr)
122
125
 
123
- def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
126
+ def apply_auto_scope(self, node_filter: mcp_v2.NodeFilter | None) -> mcp_v2.NodeFilter | None:
124
127
  """Apply auto-detected scope to filter if no explicit microservice is set."""
125
128
  if self.default_scope is None:
126
129
  return node_filter
127
- # Convert to dict for manipulation
128
130
  if node_filter is None:
129
- filter_dict = {}
130
- else:
131
- filter_dict = dict(node_filter)
132
- # Only inject if user didn't specify microservice
133
- if "microservice" not in filter_dict:
134
- filter_dict["microservice"] = self.default_scope
135
- return filter_dict
131
+ return mcp_v2.NodeFilter(microservice=self.default_scope)
132
+ if node_filter.microservice is None:
133
+ return node_filter.model_copy(update={"microservice": self.default_scope})
134
+ return node_filter
136
135
 
137
136
 
138
137
  def _resolve_lancedb_uri() -> str:
@@ -156,45 +155,67 @@ def _project_root() -> Path:
156
155
  return discovered if discovered is not None else Path.cwd().resolve()
157
156
 
158
157
 
158
+ def _source_root_for_operator_config() -> Path | None:
159
+ """``source_root`` arg to hand ``resolve_operator_config`` from the MCP server.
160
+
161
+ Returns ``JAVA_CODEBASE_RAG_SOURCE_ROOT`` when set (an explicit operator
162
+ override that wins and suppresses the YAML ``source_root`` field, exactly
163
+ like CLI ``--source-root``), otherwise ``None`` — so
164
+ ``resolve_operator_config`` runs its OWN walk-up discovery and HONORS the
165
+ YAML ``source_root`` field, matching the CLI (``init`` / ``increment`` /
166
+ ``reprocess``) path.
167
+
168
+ Do NOT pass ``_project_root()`` (the walk-up-discovered dir) here: a
169
+ non-``None`` value routes into the "explicit source root" branch that
170
+ skips the YAML ``source_root`` field, which made the MCP server and the
171
+ CLI resolve different ``source_root`` / ``index_dir`` from the same config
172
+ file (the init-vs-MCP index_dir divergence). ``_project_root()`` is kept
173
+ only for the ``_resolve_lancedb_uri()`` fallback below.
174
+ """
175
+ env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
176
+ return Path(env).expanduser().resolve() if env else None
177
+
178
+
159
179
  def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
160
180
  sub_env = os.environ.copy()
161
181
  sub_env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(project_root)
162
182
  idx = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
163
183
  if idx:
164
184
  sub_env["JAVA_CODEBASE_RAG_INDEX_DIR"] = str(Path(idx).expanduser().resolve())
165
- # Set CocoIndex concurrency limits to prevent "too many open files" error
166
- # See: https://github.com/HumanBean17/java-codebase-rag/issues/293
167
- sub_env.setdefault("COCOINDEX_SOURCE_MAX_INFLIGHT_ROWS", "256")
185
+ # Cap CocoIndex concurrency to avoid EMFILE ("too many open files") under
186
+ # default OS fd limits. See: https://github.com/HumanBean17/java-codebase-rag/issues/306
187
+ for _k, _v in cocoindex_subprocess_env_defaults().items():
188
+ sub_env.setdefault(_k, _v)
168
189
  return sub_env
169
190
 
170
191
 
171
192
  def _graph_enabled() -> bool:
172
- return KuzuGraph.exists()
193
+ return LadybugGraph.exists()
173
194
 
174
195
 
175
196
  def _graph_meta_output() -> GraphMetaOutput:
176
- if not KuzuGraph.exists():
197
+ if not LadybugGraph.exists():
177
198
  return GraphMetaOutput(
178
199
  success=True,
179
200
  enabled=False,
180
- db_path=resolve_kuzu_path(),
181
- message="Kuzu graph not present; run java-codebase-rag reprocess or build_ast_graph.py",
201
+ db_path=resolve_ladybug_path(),
202
+ message="Ladybug graph not present; run java-codebase-rag reprocess or build_ast_graph.py",
182
203
  )
183
204
  try:
184
- graph = KuzuGraph.get()
205
+ graph = LadybugGraph.get()
185
206
  meta = graph.meta()
186
207
  except Exception as e:
187
208
  return GraphMetaOutput(
188
209
  success=False,
189
210
  enabled=_graph_enabled(),
190
- db_path=resolve_kuzu_path(),
191
- message=f"Kuzu open failed: {e}",
211
+ db_path=resolve_ladybug_path(),
212
+ message=f"Ladybug open failed: {e}",
192
213
  )
193
214
  if "error" in meta:
194
215
  return GraphMetaOutput(
195
216
  success=False,
196
217
  enabled=_graph_enabled(),
197
- db_path=meta.get("db_path", resolve_kuzu_path()),
218
+ db_path=meta.get("db_path", resolve_ladybug_path()),
198
219
  message=str(meta["error"]),
199
220
  )
200
221
  try:
@@ -212,7 +233,7 @@ def _graph_meta_output() -> GraphMetaOutput:
212
233
  return GraphMetaOutput(
213
234
  success=True,
214
235
  enabled=_graph_enabled(),
215
- db_path=meta.get("db_path", resolve_kuzu_path()),
236
+ db_path=meta.get("db_path", resolve_ladybug_path()),
216
237
  ontology_version=int(meta.get("ontology_version") or 0),
217
238
  built_at=int(meta.get("built_at") or 0),
218
239
  source_root=str(meta.get("source_root") or ""),
@@ -326,9 +347,29 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
326
347
  graph_code: int | None = None
327
348
  graph_out = ""
328
349
  graph_err = ""
350
+ optimize_error: str | None = None
329
351
  if ok:
330
352
  if not quiet:
331
353
  print(file=sys.stderr, flush=True)
354
+ # Serialized post-flow Lance optimize: the flow disabled its background
355
+ # optimize, so with cocoindex returned exit 0 there are no concurrent
356
+ # writers — this is the safe window to compact. An optimize failure is
357
+ # surfaced via optimize_error / stderr and must NOT flip the success of
358
+ # a vectors phase that succeeded; the index is still searchable.
359
+ try:
360
+ from java_codebase_rag.lance_optimize import optimize_lance_tables
361
+
362
+ idx_raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
363
+ if idx_raw and not idx_raw.startswith(("s3://", "gs://", "az://")):
364
+ idx_dir = Path(idx_raw).expanduser().resolve()
365
+ elif idx_raw:
366
+ idx_dir = Path(idx_raw)
367
+ else:
368
+ idx_dir = (root / ".java-codebase-rag").resolve()
369
+ await optimize_lance_tables(idx_dir, quiet=quiet)
370
+ except Exception as exc:
371
+ optimize_error = f"lance optimize failed: {exc}"
372
+ print(f"java-codebase-rag: {optimize_error}", file=sys.stderr)
332
373
  builder = Path(__file__).resolve().parent / "build_ast_graph.py"
333
374
  if builder.is_file():
334
375
  try:
@@ -337,8 +378,8 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
337
378
  str(builder),
338
379
  "--source-root",
339
380
  str(root),
340
- "--kuzu-path",
341
- resolve_kuzu_path(),
381
+ "--ladybug-path",
382
+ resolve_ladybug_path(),
342
383
  ]
343
384
  if not quiet:
344
385
  graph_args.append("--verbose")
@@ -365,6 +406,10 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
365
406
  message = f"cocoindex exit {proc.returncode}"
366
407
  elif graph_code is not None and graph_code != 0:
367
408
  message = f"graph builder exit {graph_code}"
409
+ # Surface a post-flow optimize failure in the message too (success is not
410
+ # flipped — the vectors phase succeeded and the index is still usable).
411
+ if optimize_error is not None:
412
+ message = optimize_error if message is None else f"{message}; {optimize_error}"
368
413
  return RefreshIndexOutput(
369
414
  success=ok and (graph_code is None or graph_code == 0),
370
415
  exit_code=proc.returncode,
@@ -375,6 +420,7 @@ async def run_refresh_pipeline(*, quiet: bool = False, verbose: bool = True) ->
375
420
  graph_stdout=graph_out[-4000:] if len(graph_out) > 4000 else graph_out,
376
421
  graph_stderr=graph_err[-4000:] if len(graph_err) > 4000 else graph_err,
377
422
  phases_run=phases_run,
423
+ optimize_error=optimize_error,
378
424
  )
379
425
 
380
426
 
@@ -384,14 +430,15 @@ def create_mcp_server() -> FastMCP:
384
430
  @mcp.tool(
385
431
  name="search",
386
432
  description=(
387
- "Ranked chunk retrieval: `query` is opaque text (natural language or code fragments); "
388
- "results are score-ranked, not boolean-matched. Optional `filter` uses the same NodeFilter "
389
- "schema as `find` but only **symbol-applicable** fields apply (strict frame). Wildcards "
433
+ "Ranked chunk retrieval over content tables (java/sql/yaml); `query` is opaque text (natural language or code "
434
+ "fragments) and results are score-ranked, not boolean-matched. For graph-structured listing "
435
+ "(symbols/routes/clients/producers) use `find`, not `search`. Optional `filter` uses the same NodeFilter "
436
+ "schema as `find` but only **symbol-applicable** fields apply — others return success=false. Wildcards "
390
437
  "(`*`, `?`) in prefix fields are rejected—use ranked `query` text instead. There is **no** "
391
438
  "structured DSL inside `query`; structured predicates belong in `find`. "
392
439
  "For identifier-shaped lookups (FQN, id prefix, route/client identifiers, …), use `resolve` first; "
393
440
  "use `search` for natural-language or ranked fuzzy discovery. "
394
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
441
+ "Successful responses echo `limit`/`offset`."
395
442
  ),
396
443
  )
397
444
  async def search(
@@ -402,7 +449,7 @@ def create_mcp_server() -> FastMCP:
402
449
  ),
403
450
  hybrid: bool = Field(
404
451
  default=False,
405
- description="If true, fuse FTS + vector (single-table java/sql/yaml only)",
452
+ description="If true, fuse FTS + vector. Requires a single table (java/sql/yaml); hybrid with table='all' returns success=false.",
406
453
  ),
407
454
  limit: int = Field(default=5, ge=1, le=50, description="Max hits to return"),
408
455
  offset: int = Field(default=0, ge=0, le=500, description="Skip this many hits (pagination)"),
@@ -410,11 +457,11 @@ def create_mcp_server() -> FastMCP:
410
457
  default=None,
411
458
  description="Substring match on file path (pre-filter from index)",
412
459
  ),
413
- filter: dict[str, Any] | str | None = Field(
460
+ filter: mcp_v2.NodeFilter | None = Field(
414
461
  default=None,
415
462
  description=(
416
- "Optional NodeFilter post-filter on symbol-oriented hit rows. Unknown keys or populated fields not "
417
- "applicable to symbols return success=false. Prefer a JSON object; a JSON-encoded string is accepted."
463
+ "Optional NodeFilter post-filter on symbol-oriented hit rows. An empty object or omitted means no "
464
+ "predicate. Unknown keys or populated fields not applicable to symbols return success=false."
418
465
  ),
419
466
  ),
420
467
  ) -> mcp_v2.SearchOutput:
@@ -439,9 +486,11 @@ def create_mcp_server() -> FastMCP:
439
486
  "**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, "
440
487
  "source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, "
441
488
  "module, source_layer, producer_kind, topic_prefix. "
489
+ "`role` is singular and `exclude_roles` plural; `capability` is a functional tag assigned during indexing. "
490
+ "`fqn_prefix` is a prefix predicate — for exact FQN or id lookup use `resolve`/`describe`. "
442
491
  "Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of "
443
492
  "that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. "
444
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
493
+ "Successful responses echo `limit`/`offset`."
445
494
  ),
446
495
  )
447
496
  async def find(
@@ -452,11 +501,10 @@ def create_mcp_server() -> FastMCP:
452
501
  "'producer' = outbound async producers."
453
502
  )
454
503
  ),
455
- filter: dict[str, Any] | str = Field(
504
+ filter: mcp_v2.NodeFilter = Field(
456
505
  ...,
457
506
  description=(
458
- "Required NodeFilter dict (extra keys forbidden). Fields must be applicable to `kind`. "
459
- "Prefer a JSON object; a JSON-encoded string is accepted."
507
+ "Required NodeFilter object (extra keys forbidden). Fields must be applicable to `kind`."
460
508
  ),
461
509
  ),
462
510
  limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
@@ -468,17 +516,14 @@ def create_mcp_server() -> FastMCP:
468
516
  @mcp.tool(
469
517
  name="describe",
470
518
  description=(
471
- "Full node record plus `edge_summary` (in/out counts per stored edge label, plus optional describe-time keys). Type Symbols may add "
472
- "composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, and DECLARES.EXPOSES (navigable on type Symbols via neighbors, out only); "
473
- "method Symbols may add override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.DECLARES_PRODUCER, "
474
- "OVERRIDDEN_BY.EXPOSES, plus an `OVERRIDES` map entry that merges stored `[:OVERRIDES]` counts with the dispatch-up rollup per direction). "
475
- "Override-axis virtual keys are navigable via neighbors on non-static method Symbol origins "
476
- "(out only; composed keys include via_id in attrs). The stored `OVERRIDES` relationship "
477
- "is also a normal edge label (e.g. direction in from declaration toward overriders). "
519
+ "Full node record plus `edge_summary` (in/out counts per stored edge label). For type Symbols, `edge_summary` "
520
+ "also exposes composed keys (DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES); for "
521
+ "non-static method Symbols it adds override-axis virtual keys (OVERRIDDEN_BY and its composed forms, plus an "
522
+ "`OVERRIDES` map merging stored `[:OVERRIDES]` counts with the dispatch-up rollup). These composed/override keys "
523
+ "are out-only and navigable via `neighbors`; the stored `OVERRIDES` is also a normal edge label (in toward declaration). "
478
524
  "Pass `id` for any kind, or exact `fqn` for Symbol lookup (`id` wins when both are set). "
479
525
  "`describe(fqn=…)` keeps the first graph row when multiple symbols share that FQN; when an FQN may collide, "
480
- "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node. "
481
- "Successful responses may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
526
+ "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node."
482
527
  ),
483
528
  )
484
529
  async def describe(
@@ -502,18 +547,19 @@ def create_mcp_server() -> FastMCP:
502
547
  @mcp.tool(
503
548
  name="neighbors",
504
549
  description=(
505
- "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (stored labels for one hop; "
506
- "type Symbol origins may also pass composed DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, or DECLARES.EXPOSES "
507
- "for 2-hop member rollups; method Symbol origins may pass OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, "
508
- "OVERRIDDEN_BY.DECLARES_PRODUCER, OVERRIDDEN_BY.EXPOSES for override-axis rollups — out only, via_id in "
509
- "attrs on composed keys). "
550
+ "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (one hop over stored edge "
551
+ "labels; type/method Symbol origins may also pass composed or override-axis keys — see `edge_types`). From a "
552
+ "type Symbol, `direction='out'` with EXPOSES yields route nodes and HTTP_CALLS/ASYNC_CALLS yield client/producer "
553
+ "nodes; `direction='in'` reverses each relationship. "
554
+ "`direction` and `edge_types` have no defaults; an empty `edge_types` fails. The CALLS-only features — "
555
+ "`edge_filter`, `include_unresolved`, `dedup_calls` — each require `edge_types=['CALLS']`; `edge_filter` and "
556
+ "`include_unresolved` are mutually exclusive. Violating a precondition (wrong CALLS context, composed/override "
557
+ "keys on an ineligible origin or with `direction='in'`, wildcards in prefix fields, unknown filter keys) returns "
558
+ "success=false with a message; `dedup_calls` with other edge_types is a silent no-op. "
510
559
  "Optional `filter` applies to each neighbor endpoint row; populated fields must be applicable to that "
511
- "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (strict frame). "
512
- "Optional `edge_filter` requires edge_types=['CALLS'] only (no composed dot-keys or extra stored "
513
- "labels); projects the ordered CALLS stream by edge attributes (min_confidence, strategies, "
514
- "callee_declaring_role). Wildcards in prefix fields are rejected. Unknown filter keys return success=false. "
515
- "Successful responses echo `requested_edge_types` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text). "
516
- "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths)."
560
+ "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (per-neighbor strict frame). "
561
+ "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths). "
562
+ "Successful responses echo `requested_edge_types`."
517
563
  ),
518
564
  )
519
565
  async def neighbors(
@@ -544,19 +590,19 @@ def create_mcp_server() -> FastMCP:
544
590
  le=1000,
545
591
  description="Skip this many edges after merge (pagination)",
546
592
  ),
547
- filter: dict[str, Any] | str | None = Field(
593
+ filter: mcp_v2.NodeFilter | None = Field(
548
594
  default=None,
549
595
  description=(
550
- "Optional NodeFilter on the neighbor node. Same applicability rules as `find` for that node's kind. "
551
- "Prefer a JSON object; a JSON-encoded string is accepted."
596
+ "Optional NodeFilter on the neighbor node. An empty object or omitted means no predicate. "
597
+ "Same applicability rules as `find` for that node's kind."
552
598
  ),
553
599
  ),
554
- edge_filter: dict[str, Any] | str | None = Field(
600
+ edge_filter: mcp_v2.EdgeFilter | None = Field(
555
601
  default=None,
556
602
  description=(
557
603
  "Optional EdgeFilter on CALLS edge attributes (edge_types=['CALLS'] only). Use "
558
604
  "callee_declaring_role for callee stereotype projection — not NodeFilter.role on method neighbors. "
559
- "Mutually exclusive with include_unresolved. Prefer a JSON object; a JSON-encoded string is accepted."
605
+ "Mutually exclusive with include_unresolved."
560
606
  ),
561
607
  ),
562
608
  include_unresolved: bool = Field(
@@ -598,10 +644,11 @@ def create_mcp_server() -> FastMCP:
598
644
  "status=one (single node), many (≥2 ranked candidates with reason), or none "
599
645
  "(no match — fall back to search(query=...) for natural language or fuzzy text). "
600
646
  "Optional hint_kind narrows to symbol, route, client, or producer. "
601
- "Successful responses may include hints_structured (tool call suggestions with `reason` field) and advisories (pure informational text) — same contract as other v2 tools. "
602
647
  "Malformed empty/whitespace identifier returns success=false. "
603
648
  "Examples: resolve('com.foo.Bar', hint_kind='symbol'); "
604
649
  "resolve('GET /api/v1/customers', hint_kind='route'); "
650
+ "resolve('PaymentClient', hint_kind='client'); "
651
+ "resolve('order.created', hint_kind='producer'); "
605
652
  "resolve('the client that handles assignments') → none (use search instead)."
606
653
  ),
607
654
  )
@@ -622,12 +669,13 @@ def create_mcp_server() -> FastMCP:
622
669
 
623
670
 
624
671
  def main() -> None:
672
+ raise_fd_limit()
625
673
  emit_legacy_env_hints_if_present()
626
674
 
627
675
  # Load YAML config and apply embedding settings to environment
628
676
  # This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available
629
677
  # before any tool handler runs (same behavior as CLI path)
630
- cfg = resolve_operator_config(source_root=_project_root())
678
+ cfg = resolve_operator_config(source_root=_source_root_for_operator_config())
631
679
  cfg.apply_to_os_environ()
632
680
  mcp_v2.set_hints_enabled(cfg.hints_enabled)
633
681
 
user_rag/__init__.py ADDED
@@ -0,0 +1 @@
1
+
user_rag/cli.py ADDED
@@ -0,0 +1,175 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import asyncio
5
+ import json
6
+ import os
7
+ import pprint
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import Any
11
+
12
+ import pr_analysis
13
+ import server
14
+ from path_filtering import LayeredIgnore
15
+
16
+
17
+ def _jsonable(value: Any) -> Any:
18
+ if hasattr(value, "model_dump"):
19
+ return value.model_dump()
20
+ if isinstance(value, Path):
21
+ return str(value)
22
+ raise TypeError(f"Object of type {type(value).__name__} is not JSON serializable")
23
+
24
+
25
+ def _to_payload(value: Any) -> Any:
26
+ if hasattr(value, "model_dump"):
27
+ return value.model_dump()
28
+ return value
29
+
30
+
31
+ def _emit(value: Any) -> None:
32
+ payload = _to_payload(value)
33
+ if sys.stdout.isatty():
34
+ print(pprint.pformat(payload, sort_dicts=True))
35
+ return
36
+ print(json.dumps(payload, default=_jsonable, sort_keys=True, indent=None))
37
+
38
+
39
+ def _parse_common_graph_flags(parser: argparse.ArgumentParser) -> None:
40
+ parser.add_argument("--source-root", type=str, default=None)
41
+ parser.add_argument("--kuzu-path", type=str, default=None)
42
+ parser.add_argument("--lancedb-path", type=str, default=None)
43
+
44
+
45
+ def _apply_graph_env(args: argparse.Namespace) -> None:
46
+ if args.source_root:
47
+ os.environ["LANCEDB_MCP_PROJECT_ROOT"] = str(Path(args.source_root).expanduser().resolve())
48
+ if args.kuzu_path:
49
+ os.environ["KUZU_DB_PATH"] = str(Path(args.kuzu_path).expanduser().resolve())
50
+ # Reset singleton to pick up override path.
51
+ from kuzu_queries import KuzuGraph
52
+
53
+ KuzuGraph._instance = None
54
+ KuzuGraph._instance_path = None
55
+ if args.lancedb_path:
56
+ os.environ["LANCEDB_URI"] = str(Path(args.lancedb_path).expanduser().resolve())
57
+
58
+
59
+ def _cmd_refresh(args: argparse.Namespace) -> int:
60
+ """Return 1 for launched-subprocess failures, 2 for internal pre-launch errors."""
61
+ _apply_graph_env(args)
62
+ result = asyncio.run(server.run_refresh_pipeline(quiet=bool(args.quiet)))
63
+ payload = result.model_dump()
64
+ if payload.get("success"):
65
+ _emit(payload)
66
+ return 0
67
+ _emit(payload)
68
+ return 2 if payload.get("exit_code") is None else 1
69
+
70
+
71
+ def _cmd_meta(args: argparse.Namespace) -> int:
72
+ _apply_graph_env(args)
73
+ payload = server._graph_meta_output().model_dump()
74
+ _emit(payload)
75
+ return 0 if payload.get("success") else 2
76
+
77
+
78
+ def _cmd_tables(args: argparse.Namespace) -> int:
79
+ _apply_graph_env(args)
80
+ payload = server.list_code_index_tables_payload().model_dump()
81
+ _emit(payload)
82
+ return 0
83
+
84
+
85
+ def _cmd_diagnose_ignore(args: argparse.Namespace) -> int:
86
+ _apply_graph_env(args)
87
+ # Keep this after _apply_graph_env so relative paths resolve from --source-root.
88
+ root = server._project_root()
89
+ raw = Path(args.path)
90
+ try:
91
+ abs_path = raw.resolve() if raw.is_absolute() else (root / raw).resolve()
92
+ except OSError as exc:
93
+ _emit({"success": False, "message": f"Invalid path: {exc}"})
94
+ return 1
95
+ li = LayeredIgnore(root)
96
+ _emit(li.diagnose_dict(abs_path))
97
+ return 0
98
+
99
+
100
+ def _read_diff_text(args: argparse.Namespace) -> str:
101
+ if args.diff_file:
102
+ return Path(args.diff_file).read_text(encoding="utf-8")
103
+ if args.diff_stdin:
104
+ return sys.stdin.read()
105
+ raise ValueError("Provide exactly one of --diff-file or --diff-stdin")
106
+
107
+
108
+ def _cmd_analyze_pr(args: argparse.Namespace) -> int:
109
+ _apply_graph_env(args)
110
+ try:
111
+ diff_text = _read_diff_text(args)
112
+ except Exception as exc:
113
+ _emit({"success": False, "message": str(exc)})
114
+ return 1
115
+ if not diff_text.strip():
116
+ _emit({"success": False, "message": "Diff is empty"})
117
+ return 1
118
+ from kuzu_queries import KuzuGraph
119
+
120
+ if not KuzuGraph.exists():
121
+ _emit({"success": False, "message": "Kuzu graph not found"})
122
+ return 1
123
+ graph = KuzuGraph.get()
124
+ report = pr_analysis.analyze_pr_pipeline(graph, diff_text)
125
+ _emit(pr_analysis.pr_report_to_dict(report))
126
+ return 0
127
+
128
+
129
+ def build_parser() -> argparse.ArgumentParser:
130
+ parser = argparse.ArgumentParser(prog="java-codebase-rag")
131
+ subparsers = parser.add_subparsers(dest="subcommand")
132
+
133
+ refresh = subparsers.add_parser("refresh")
134
+ _parse_common_graph_flags(refresh)
135
+ refresh.add_argument("--quiet", action="store_true")
136
+ refresh.set_defaults(handler=_cmd_refresh)
137
+
138
+ meta = subparsers.add_parser("meta")
139
+ _parse_common_graph_flags(meta)
140
+ meta.set_defaults(handler=_cmd_meta)
141
+
142
+ tables = subparsers.add_parser("tables")
143
+ _parse_common_graph_flags(tables)
144
+ tables.set_defaults(handler=_cmd_tables)
145
+
146
+ diagnose = subparsers.add_parser("diagnose-ignore")
147
+ _parse_common_graph_flags(diagnose)
148
+ diagnose.add_argument("path", type=str)
149
+ diagnose.set_defaults(handler=_cmd_diagnose_ignore)
150
+
151
+ analyze = subparsers.add_parser("analyze-pr")
152
+ _parse_common_graph_flags(analyze)
153
+ group = analyze.add_mutually_exclusive_group(required=True)
154
+ group.add_argument("--diff-file", type=str)
155
+ group.add_argument("--diff-stdin", action="store_true")
156
+ analyze.set_defaults(handler=_cmd_analyze_pr)
157
+ return parser
158
+
159
+
160
+ def main(argv: list[str] | None = None) -> int:
161
+ parser = build_parser()
162
+ args = parser.parse_args(argv)
163
+ handler = getattr(args, "handler", None)
164
+ if handler is None:
165
+ parser.print_help(sys.stderr)
166
+ return 2
167
+ try:
168
+ return int(handler(args))
169
+ except Exception as exc: # pragma: no cover - defensive top-level guard
170
+ _emit({"success": False, "exit_code": 2, "message": f"internal error: {exc}"})
171
+ return 2
172
+
173
+
174
+ if __name__ == "__main__":
175
+ raise SystemExit(main())
@@ -1,31 +0,0 @@
1
- ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
2
- brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
- build_ast_graph.py,sha256=1uqgFK2ebBdEc2QcAYK5vU4afOb95jU3zht5FracCkI,148683
4
- chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
- graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
6
- index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
- java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
8
- java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
9
- java_ontology.py,sha256=nM-oY8_91rmUudv9hAss1AMus9BFY9s5tTpAWjlCz00,16424
10
- kuzu_queries.py,sha256=9bQzrU311AOw_BcUp_KSGiZgPVSaLSU7y63XfcT_vqI,90137
11
- mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
12
- mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
13
- path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
- pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
15
- search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
16
- server.py,sha256=1ZEDkRAOMs0ORncMh9CP2ICCTGEuAe2qmptytQ4QYYU,28862
17
- java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
- java_codebase_rag/cli.py,sha256=WW-DsskSGr-d0JXBLkj4IdAa2OsAcLz5e54_DWvD9Sk,33872
19
- java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
20
- java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
21
- java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
22
- java_codebase_rag/installer.py,sha256=flj330ZPSBrO2iw_yuNFBILHOTVbarMufYwqjZ8JzN0,42778
23
- java_codebase_rag/pipeline.py,sha256=D9SNdffcmJLoKHnNZLWZzfor1fI4bkkpJkU0KFsqfdA,9722
24
- java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
25
- java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
26
- java_codebase_rag-0.5.3.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
27
- java_codebase_rag-0.5.3.dist-info/METADATA,sha256=iI08-selyGz8kYjgqBsWbt4Z9e7MeQd_aF7kHFPu65Q,16807
28
- java_codebase_rag-0.5.3.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
29
- java_codebase_rag-0.5.3.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
30
- java_codebase_rag-0.5.3.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
31
- java_codebase_rag-0.5.3.dist-info/RECORD,,