java-codebase-rag 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +1 -1
- build_ast_graph.py +833 -64
- graph_enrich.py +32 -0
- java_codebase_rag/cli.py +74 -5
- java_codebase_rag/config.py +70 -2
- java_codebase_rag/pipeline.py +55 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/METADATA +10 -4
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/RECORD +13 -13
- server.py +58 -6
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/WHEEL +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/entry_points.txt +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.4.0.dist-info}/top_level.txt +0 -0
graph_enrich.py
CHANGED
|
@@ -1565,6 +1565,38 @@ def microservice_for_path(
|
|
|
1565
1565
|
return ""
|
|
1566
1566
|
|
|
1567
1567
|
|
|
1568
|
+
def detect_microservice_from_path(cwd: Path, source_root: Path) -> str | None:
|
|
1569
|
+
"""Detect microservice from cwd for query-time auto-scope.
|
|
1570
|
+
|
|
1571
|
+
Returns None if cwd is outside source_root, cwd IS source_root (system level),
|
|
1572
|
+
or no microservice is detected. Otherwise returns the microservice name.
|
|
1573
|
+
"""
|
|
1574
|
+
cwd_resolved = cwd.resolve()
|
|
1575
|
+
source_resolved = source_root.resolve()
|
|
1576
|
+
|
|
1577
|
+
# Check if cwd is outside source_root
|
|
1578
|
+
try:
|
|
1579
|
+
cwd_resolved.relative_to(source_resolved)
|
|
1580
|
+
except ValueError:
|
|
1581
|
+
return None
|
|
1582
|
+
|
|
1583
|
+
# Check if cwd IS source_root (at system level, no specific scope)
|
|
1584
|
+
if cwd_resolved == source_resolved:
|
|
1585
|
+
return None
|
|
1586
|
+
|
|
1587
|
+
# Check if cwd itself matches a YAML override (directory name matches microservice_roots)
|
|
1588
|
+
overrides = load_microservice_overrides(source_resolved)
|
|
1589
|
+
if overrides and cwd_resolved.name in overrides:
|
|
1590
|
+
return cwd_resolved.name
|
|
1591
|
+
|
|
1592
|
+
# microservice_for_path walks _bounded_parents which excludes the path itself.
|
|
1593
|
+
# For query-time detection we need cwd included in the walk, so pass a synthetic
|
|
1594
|
+
# child path so that cwd appears as a parent in the build-marker scan.
|
|
1595
|
+
synthetic = cwd_resolved / "__scope_probe__"
|
|
1596
|
+
ms = microservice_for_path(str(synthetic), source_resolved)
|
|
1597
|
+
return ms if ms else None
|
|
1598
|
+
|
|
1599
|
+
|
|
1568
1600
|
# ---------- chunk enrichment ----------
|
|
1569
1601
|
|
|
1570
1602
|
|
java_codebase_rag/cli.py
CHANGED
|
@@ -21,7 +21,7 @@ from java_codebase_rag.config import (
|
|
|
21
21
|
index_dir_has_existing_artifacts,
|
|
22
22
|
resolve_operator_config,
|
|
23
23
|
)
|
|
24
|
-
from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update
|
|
24
|
+
from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update, run_incremental_graph
|
|
25
25
|
from java_ontology import VALID_UNRESOLVED_CALL_REASONS
|
|
26
26
|
|
|
27
27
|
KUZU_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
|
|
@@ -229,6 +229,23 @@ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
|
|
|
229
229
|
|
|
230
230
|
def _cmd_init(args: argparse.Namespace) -> int:
|
|
231
231
|
cfg = _resolved_from_ns(args)
|
|
232
|
+
# Check for parent config or index
|
|
233
|
+
from java_codebase_rag.config import discover_project_root, find_yaml_config_file
|
|
234
|
+
parent_config_dir = discover_project_root(cfg.source_root.parent)
|
|
235
|
+
if parent_config_dir is not None:
|
|
236
|
+
parent_config = find_yaml_config_file(parent_config_dir)
|
|
237
|
+
if parent_config is not None:
|
|
238
|
+
print(
|
|
239
|
+
f"Warning: found existing config at {parent_config}. "
|
|
240
|
+
f"Creating a new project here will create a separate index.",
|
|
241
|
+
file=sys.stderr,
|
|
242
|
+
)
|
|
243
|
+
else:
|
|
244
|
+
print(
|
|
245
|
+
f"Warning: found existing index at {parent_config_dir / '.java-codebase-rag'}. "
|
|
246
|
+
f"Creating a new project here will create a separate index.",
|
|
247
|
+
file=sys.stderr,
|
|
248
|
+
)
|
|
232
249
|
_startup_hints(cfg)
|
|
233
250
|
cfg.apply_to_os_environ()
|
|
234
251
|
occupied, paths = index_dir_has_existing_artifacts(cfg.index_dir)
|
|
@@ -298,7 +315,11 @@ def _cmd_increment(args: argparse.Namespace) -> int:
|
|
|
298
315
|
cfg = _resolved_from_ns(args)
|
|
299
316
|
_startup_hints(cfg)
|
|
300
317
|
cfg.apply_to_os_environ()
|
|
301
|
-
|
|
318
|
+
|
|
319
|
+
# Check for --vectors-only flag
|
|
320
|
+
vectors_only = bool(getattr(args, "vectors_only", False))
|
|
321
|
+
if vectors_only:
|
|
322
|
+
_emit_increment_kuzu_warning()
|
|
302
323
|
|
|
303
324
|
def work() -> int:
|
|
304
325
|
env = cfg.subprocess_env()
|
|
@@ -320,7 +341,50 @@ def _cmd_increment(args: argparse.Namespace) -> int:
|
|
|
320
341
|
}
|
|
321
342
|
)
|
|
322
343
|
return 1
|
|
323
|
-
|
|
344
|
+
|
|
345
|
+
# If --vectors-only is set, skip graph update
|
|
346
|
+
if vectors_only:
|
|
347
|
+
_emit({"success": True, "message": "increment completed (Lance only; graph may be stale — see stderr)"})
|
|
348
|
+
return 0
|
|
349
|
+
|
|
350
|
+
# Run incremental graph update
|
|
351
|
+
g = run_incremental_graph(
|
|
352
|
+
source_root=cfg.source_root,
|
|
353
|
+
kuzu_path=cfg.kuzu_path,
|
|
354
|
+
verbose=bool(args.verbose),
|
|
355
|
+
quiet=bool(args.quiet),
|
|
356
|
+
env=env,
|
|
357
|
+
)
|
|
358
|
+
|
|
359
|
+
# Check if incremental fell back to full rebuild
|
|
360
|
+
if g.returncode == 0 and g.stdout:
|
|
361
|
+
# Parse stdout to check for full_fallback mode
|
|
362
|
+
# The incremental_rebuild function returns a JSON payload with mode field
|
|
363
|
+
try:
|
|
364
|
+
result = json.loads(g.stdout.strip())
|
|
365
|
+
if result.get("mode") == "full_fallback":
|
|
366
|
+
print(
|
|
367
|
+
"[increment] fell back to full graph rebuild — this is normal after schema changes or first run",
|
|
368
|
+
file=sys.stderr,
|
|
369
|
+
flush=True,
|
|
370
|
+
)
|
|
371
|
+
except (json.JSONDecodeError, ValueError):
|
|
372
|
+
# If parsing fails, continue silently
|
|
373
|
+
pass
|
|
374
|
+
|
|
375
|
+
if g.returncode != 0:
|
|
376
|
+
_emit(
|
|
377
|
+
{
|
|
378
|
+
"success": False,
|
|
379
|
+
"exit_code": g.returncode,
|
|
380
|
+
"stdout": clip(g.stdout, 4000),
|
|
381
|
+
"stderr": clip(g.stderr, 4000),
|
|
382
|
+
"message": f"graph builder exit {g.returncode}",
|
|
383
|
+
}
|
|
384
|
+
)
|
|
385
|
+
return 1
|
|
386
|
+
|
|
387
|
+
_emit({"success": True, "message": "increment completed (Lance + graph updated)"})
|
|
324
388
|
return 0
|
|
325
389
|
|
|
326
390
|
return _run_with_pipeline_progress("increment", cfg, quiet=bool(args.quiet), work=work)
|
|
@@ -615,7 +679,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
615
679
|
"--quiet suppresses that stream; stdout remains the machine-readable payload.\n\n"
|
|
616
680
|
"Lifecycle (manage the index):\n"
|
|
617
681
|
" init Create a fresh index from a Java repository.\n"
|
|
618
|
-
" increment Pick up changes since the last index update (Lance
|
|
682
|
+
" increment Pick up changes since the last index update (Lance + graph).\n"
|
|
619
683
|
" reprocess Full vector + graph rebuild (default); optional --vectors-only / --graph-only.\n"
|
|
620
684
|
" erase Delete the index from disk.\n\n"
|
|
621
685
|
"Introspection (inspect the index):\n"
|
|
@@ -650,10 +714,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
650
714
|
increment = subparsers.add_parser(
|
|
651
715
|
"increment",
|
|
652
716
|
help="Pick up changes since the last index update.",
|
|
653
|
-
description="Runs cocoindex catch-up
|
|
717
|
+
description="Runs cocoindex catch-up and incremental Kuzu graph update. Use --vectors-only to skip graph update.",
|
|
654
718
|
)
|
|
655
719
|
_add_index_embedding_flags(increment)
|
|
656
720
|
_add_verbosity_flags(increment)
|
|
721
|
+
increment.add_argument(
|
|
722
|
+
"--vectors-only",
|
|
723
|
+
action="store_true",
|
|
724
|
+
help="Run only cocoindex catch-up (Lance); skip graph update.",
|
|
725
|
+
)
|
|
657
726
|
increment.set_defaults(handler=_cmd_increment)
|
|
658
727
|
|
|
659
728
|
reprocess = subparsers.add_parser(
|
java_codebase_rag/config.py
CHANGED
|
@@ -123,6 +123,46 @@ def find_yaml_config_file(source_root: Path) -> Path | None:
|
|
|
123
123
|
return None
|
|
124
124
|
|
|
125
125
|
|
|
126
|
+
def _has_index_dir(directory: Path) -> bool:
|
|
127
|
+
"""True if *directory* contains a non-empty ``.java-codebase-rag/`` index directory."""
|
|
128
|
+
idx = directory / ".java-codebase-rag"
|
|
129
|
+
return idx.is_dir() and any(idx.iterdir())
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def discover_project_root(start: Path) -> Path | None:
|
|
133
|
+
"""Walk up from start to find the directory containing a config file or index.
|
|
134
|
+
|
|
135
|
+
Looks for ``.java-codebase-rag.yml`` / ``.java-codebase-rag.yaml`` (preferred)
|
|
136
|
+
or the ``.java-codebase-rag/`` index directory as a project boundary marker.
|
|
137
|
+
|
|
138
|
+
First match wins (closest to start). Config file takes priority over index
|
|
139
|
+
directory at the same level. Stops at $HOME inclusive — checks $HOME itself
|
|
140
|
+
but does not walk past it. Returns None if no marker found.
|
|
141
|
+
"""
|
|
142
|
+
start = start.resolve()
|
|
143
|
+
home = Path.home().resolve()
|
|
144
|
+
|
|
145
|
+
current = start
|
|
146
|
+
while True:
|
|
147
|
+
# Config file is the primary anchor
|
|
148
|
+
if find_yaml_config_file(current) is not None:
|
|
149
|
+
return current
|
|
150
|
+
# Index directory is the secondary anchor (supports indexes without config)
|
|
151
|
+
if _has_index_dir(current):
|
|
152
|
+
return current
|
|
153
|
+
|
|
154
|
+
# Stop if we've reached home (check home itself, but don't walk past it)
|
|
155
|
+
if current == home:
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
# Stop if we've reached filesystem root
|
|
159
|
+
parent = current.parent
|
|
160
|
+
if parent == current:
|
|
161
|
+
return None
|
|
162
|
+
|
|
163
|
+
current = parent
|
|
164
|
+
|
|
165
|
+
|
|
126
166
|
def load_yaml_mapping(source_root: Path) -> dict[str, Any]:
|
|
127
167
|
path = find_yaml_config_file(source_root)
|
|
128
168
|
if path is None:
|
|
@@ -277,8 +317,36 @@ def resolve_operator_config(
|
|
|
277
317
|
cli_embedding_model: str | None = None,
|
|
278
318
|
cli_embedding_device: str | None = None,
|
|
279
319
|
) -> ResolvedOperatorConfig:
|
|
280
|
-
|
|
281
|
-
|
|
320
|
+
# Phase 1: Find the config file directory
|
|
321
|
+
if source_root is not None:
|
|
322
|
+
# CLI flag provided: use it as both config_dir and effective source_root
|
|
323
|
+
# (skip YAML source_root check - CLI wins)
|
|
324
|
+
root = source_root.expanduser().resolve()
|
|
325
|
+
config_dir = root
|
|
326
|
+
yaml_dict = load_yaml_mapping(config_dir)
|
|
327
|
+
else:
|
|
328
|
+
# Check env var first
|
|
329
|
+
env_raw = os.environ.get(ENV_SOURCE_ROOT, "").strip()
|
|
330
|
+
if env_raw:
|
|
331
|
+
root = Path(env_raw).expanduser().resolve()
|
|
332
|
+
config_dir = root
|
|
333
|
+
yaml_dict = load_yaml_mapping(config_dir)
|
|
334
|
+
else:
|
|
335
|
+
# Walk up to find config dir
|
|
336
|
+
discovered = discover_project_root(Path.cwd())
|
|
337
|
+
config_dir = discovered if discovered is not None else Path.cwd().resolve()
|
|
338
|
+
# Load YAML from config dir
|
|
339
|
+
yaml_dict = load_yaml_mapping(config_dir)
|
|
340
|
+
|
|
341
|
+
# Phase 2: Resolve effective source root
|
|
342
|
+
# Check for YAML source_root field (resolved relative to config dir)
|
|
343
|
+
yaml_source_root = yaml_dict.get("source_root")
|
|
344
|
+
if isinstance(yaml_source_root, str) and yaml_source_root.strip():
|
|
345
|
+
yroot = Path(yaml_source_root.strip()).expanduser()
|
|
346
|
+
root = yroot.resolve() if yroot.is_absolute() else (config_dir / yroot).resolve()
|
|
347
|
+
else:
|
|
348
|
+
root = config_dir
|
|
349
|
+
|
|
282
350
|
index_dir, index_src = _resolve_index_dir_path(
|
|
283
351
|
source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
|
|
284
352
|
)
|
java_codebase_rag/pipeline.py
CHANGED
|
@@ -247,5 +247,60 @@ def run_build_ast_graph(
|
|
|
247
247
|
return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
|
|
248
248
|
|
|
249
249
|
|
|
250
|
+
def run_incremental_graph(
|
|
251
|
+
*,
|
|
252
|
+
source_root: Path,
|
|
253
|
+
kuzu_path: Path,
|
|
254
|
+
verbose: bool,
|
|
255
|
+
quiet: bool = False,
|
|
256
|
+
env: dict[str, str] | None = None,
|
|
257
|
+
) -> subprocess.CompletedProcess[str]:
|
|
258
|
+
"""Run incremental graph rebuild by passing --incremental flag to build_ast_graph.py."""
|
|
259
|
+
builder = bundle_dir() / "build_ast_graph.py"
|
|
260
|
+
if not builder.is_file():
|
|
261
|
+
return subprocess.CompletedProcess(
|
|
262
|
+
args=[],
|
|
263
|
+
returncode=126,
|
|
264
|
+
stdout="",
|
|
265
|
+
stderr=f"build_ast_graph.py not found under {builder.parent}",
|
|
266
|
+
)
|
|
267
|
+
cmd: list[str] = [
|
|
268
|
+
sys.executable,
|
|
269
|
+
str(builder),
|
|
270
|
+
"--source-root",
|
|
271
|
+
str(source_root),
|
|
272
|
+
"--kuzu-path",
|
|
273
|
+
str(kuzu_path),
|
|
274
|
+
"--incremental",
|
|
275
|
+
]
|
|
276
|
+
# Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
|
|
277
|
+
# Default passes --verbose so the builder emits per-pass progress lines,
|
|
278
|
+
# which the parent filters via _LineFilter. --verbose bypasses the filter.
|
|
279
|
+
if verbose or not quiet:
|
|
280
|
+
cmd.append("--verbose")
|
|
281
|
+
if quiet:
|
|
282
|
+
return subprocess.run(
|
|
283
|
+
cmd,
|
|
284
|
+
cwd=str(source_root),
|
|
285
|
+
env=env or os.environ.copy(),
|
|
286
|
+
capture_output=True,
|
|
287
|
+
text=True,
|
|
288
|
+
)
|
|
289
|
+
proc = subprocess.Popen(
|
|
290
|
+
cmd,
|
|
291
|
+
cwd=str(source_root),
|
|
292
|
+
env=env or os.environ.copy(),
|
|
293
|
+
stdout=subprocess.PIPE,
|
|
294
|
+
stderr=subprocess.PIPE,
|
|
295
|
+
bufsize=0,
|
|
296
|
+
)
|
|
297
|
+
out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
|
|
298
|
+
if not verbose:
|
|
299
|
+
from java_codebase_rag.cli_format import bold_cyan, styled_check, styled_cross
|
|
300
|
+
marker = styled_check() if code == 0 else styled_cross()
|
|
301
|
+
print(f"{marker} {bold_cyan('[increment]')} done", file=sys.stderr, flush=True)
|
|
302
|
+
return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
|
|
303
|
+
|
|
304
|
+
|
|
250
305
|
def clip(s: str, n: int) -> str:
|
|
251
306
|
return s[-n:] if len(s) > n else s
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.4.0
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -29,7 +29,12 @@ Requires-Dist: PyYAML<7,>=6.0.3
|
|
|
29
29
|
Requires-Dist: sentence-transformers<6,>=5.4.0
|
|
30
30
|
Requires-Dist: tree-sitter<0.26,>=0.25.2
|
|
31
31
|
Requires-Dist: tree-sitter-java<0.24,>=0.23.5
|
|
32
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
32
33
|
Requires-Dist: unidiff<1,>=0.7.3
|
|
34
|
+
Provides-Extra: dev
|
|
35
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
36
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
37
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
33
38
|
Dynamic: license-file
|
|
34
39
|
|
|
35
40
|
# java-codebase-rag
|
|
@@ -126,7 +131,9 @@ With the package installed, the console script `java-codebase-rag-mcp` is on you
|
|
|
126
131
|
claude mcp add --transport stdio java-codebase-rag -- java-codebase-rag-mcp
|
|
127
132
|
```
|
|
128
133
|
|
|
129
|
-
|
|
134
|
+
**Zero-env-var configuration:** The tool automatically walks up the directory tree to find `.java-codebase-rag.yml`, so you don't need to set `JAVA_CODEBASE_RAG_SOURCE_ROOT` when working from within a project. Just place the config file at your project root and the tool will find it. See [`mcp.json.example`](./mcp.json.example) for the minimal configuration.
|
|
135
|
+
|
|
136
|
+
If you need to override defaults, you can set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a full configuration template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
|
|
130
137
|
|
|
131
138
|
### Claude Desktop
|
|
132
139
|
|
|
@@ -200,7 +207,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
|
|
|
200
207
|
| Group | Subcommand | What it does |
|
|
201
208
|
|---|---|---|
|
|
202
209
|
| Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
|
|
203
|
-
| Lifecycle | `increment` | CocoIndex catch-up
|
|
210
|
+
| Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
|
|
204
211
|
| Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
|
|
205
212
|
| Lifecycle | `erase` | Delete index artifacts. Requires `--yes` or TTY confirm. |
|
|
206
213
|
| Introspection | `meta`, `tables`, `diagnose-ignore`, `unresolved-calls` | Health, table listing, ignore-layer diagnostics, receiver-failure call sites. |
|
|
@@ -244,5 +251,4 @@ The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloa
|
|
|
244
251
|
|
|
245
252
|
- `get_service_topology` — microservice-level summary aggregating `HTTP_CALLS` / `ASYNC_CALLS`.
|
|
246
253
|
- Agentic routing layer (query classifier → vector / graph / both).
|
|
247
|
-
- Incremental Kuzu updates (per-changed-file) — see [`propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md`](./propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md) and [`propose/INDEX-AUTO-MODE-PROPOSE.md`](./propose/INDEX-AUTO-MODE-PROPOSE.md).
|
|
248
254
|
- Optional `codegraph_nodes` LanceDB table embedding symbol summaries so the graph itself is vector-searchable.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
ast_java.py,sha256=
|
|
1
|
+
ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
|
|
2
2
|
brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
|
|
3
|
-
build_ast_graph.py,sha256=
|
|
3
|
+
build_ast_graph.py,sha256=KY5rpqWR7UafvAcIv0ubSz6jiYA8I5ZGqm_SKIFJulE,148770
|
|
4
4
|
chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
|
|
5
|
-
graph_enrich.py,sha256=
|
|
5
|
+
graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
|
|
6
6
|
index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
|
|
7
7
|
java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
|
|
8
8
|
java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
|
|
@@ -13,16 +13,16 @@ mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
|
|
|
13
13
|
path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
|
|
14
14
|
pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
|
|
15
15
|
search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
|
|
16
|
-
server.py,sha256=
|
|
16
|
+
server.py,sha256=c4Bo0FXPoKP2AwIVP_wiv0XENkmKchLHf0QrQPUUgq4,28645
|
|
17
17
|
java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
18
|
-
java_codebase_rag/cli.py,sha256=
|
|
18
|
+
java_codebase_rag/cli.py,sha256=h4-86RRZAlCSARuhMhLbqnUiwxAagBnrY5rj4IF0yIo,31238
|
|
19
19
|
java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
|
|
20
20
|
java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
|
|
21
|
-
java_codebase_rag/config.py,sha256=
|
|
22
|
-
java_codebase_rag/pipeline.py,sha256=
|
|
23
|
-
java_codebase_rag-0.
|
|
24
|
-
java_codebase_rag-0.
|
|
25
|
-
java_codebase_rag-0.
|
|
26
|
-
java_codebase_rag-0.
|
|
27
|
-
java_codebase_rag-0.
|
|
28
|
-
java_codebase_rag-0.
|
|
21
|
+
java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
|
|
22
|
+
java_codebase_rag/pipeline.py,sha256=nMXwX9r7HG9yPstrm7y_vfOMUZuDmw5_1lJTAfR-jwI,9488
|
|
23
|
+
java_codebase_rag-0.4.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
|
|
24
|
+
java_codebase_rag-0.4.0.dist-info/METADATA,sha256=Je_Zr3MB5ANZNolBRvHOpjQvEO_Y9GBFes4sXYiI_Uw,15422
|
|
25
|
+
java_codebase_rag-0.4.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
26
|
+
java_codebase_rag-0.4.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
|
|
27
|
+
java_codebase_rag-0.4.0.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
|
|
28
|
+
java_codebase_rag-0.4.0.dist-info/RECORD,,
|
server.py
CHANGED
|
@@ -16,7 +16,12 @@ from java_codebase_rag.cli_progress import (
|
|
|
16
16
|
emit_vectors_finish,
|
|
17
17
|
emit_vectors_start,
|
|
18
18
|
)
|
|
19
|
-
from java_codebase_rag.config import
|
|
19
|
+
from java_codebase_rag.config import (
|
|
20
|
+
discover_project_root,
|
|
21
|
+
emit_legacy_env_hints_if_present,
|
|
22
|
+
resolved_sbert_model_for_process_env,
|
|
23
|
+
resolve_operator_config,
|
|
24
|
+
)
|
|
20
25
|
from kuzu_queries import KuzuGraph, resolve_kuzu_path
|
|
21
26
|
from mcp.server.fastmcp import FastMCP
|
|
22
27
|
from pydantic import BaseModel, Field
|
|
@@ -91,10 +96,49 @@ class IndexInfoOutput(BaseModel):
|
|
|
91
96
|
graph: GraphMetaOutput
|
|
92
97
|
|
|
93
98
|
|
|
99
|
+
# Module-level scope manager, initialized in main()
|
|
100
|
+
_scope_manager: ScopeManager | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class ScopeManager:
|
|
104
|
+
"""Manages automatic microservice scope detection and injection."""
|
|
105
|
+
|
|
106
|
+
def __init__(self, source_root: Path):
|
|
107
|
+
self.source_root = source_root
|
|
108
|
+
self.default_scope: str | None = self._detect_scope()
|
|
109
|
+
self._log_detection()
|
|
110
|
+
|
|
111
|
+
def _detect_scope(self) -> str | None:
|
|
112
|
+
from graph_enrich import detect_microservice_from_path
|
|
113
|
+
return detect_microservice_from_path(Path.cwd(), self.source_root)
|
|
114
|
+
|
|
115
|
+
def _log_detection(self) -> None:
|
|
116
|
+
if self.default_scope:
|
|
117
|
+
print(f"[scope] Detected microservice: {self.default_scope}", file=sys.stderr)
|
|
118
|
+
print(f"[scope] Queries scoped to {self.default_scope}", file=sys.stderr)
|
|
119
|
+
else:
|
|
120
|
+
print("[scope] No microservice detected (at project root)", file=sys.stderr)
|
|
121
|
+
print("[scope] Queries will span all microservices", file=sys.stderr)
|
|
122
|
+
|
|
123
|
+
def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
124
|
+
"""Apply auto-detected scope to filter if no explicit microservice is set."""
|
|
125
|
+
if self.default_scope is None:
|
|
126
|
+
return node_filter
|
|
127
|
+
# Convert to dict for manipulation
|
|
128
|
+
if node_filter is None:
|
|
129
|
+
filter_dict = {}
|
|
130
|
+
else:
|
|
131
|
+
filter_dict = dict(node_filter)
|
|
132
|
+
# Only inject if user didn't specify microservice
|
|
133
|
+
if "microservice" not in filter_dict:
|
|
134
|
+
filter_dict["microservice"] = self.default_scope
|
|
135
|
+
return filter_dict
|
|
136
|
+
|
|
137
|
+
|
|
94
138
|
def _resolve_lancedb_uri() -> str:
|
|
95
139
|
raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
96
140
|
if not raw:
|
|
97
|
-
raw = str((
|
|
141
|
+
raw = str((_project_root() / ".java-codebase-rag").resolve())
|
|
98
142
|
p = Path(raw).expanduser()
|
|
99
143
|
if not str(raw).startswith(("s3://", "gs://", "az://")):
|
|
100
144
|
try:
|
|
@@ -108,7 +152,8 @@ def _project_root() -> Path:
|
|
|
108
152
|
env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
|
|
109
153
|
if env:
|
|
110
154
|
return Path(env).expanduser().resolve()
|
|
111
|
-
|
|
155
|
+
discovered = discover_project_root(Path.cwd())
|
|
156
|
+
return discovered if discovered is not None else Path.cwd().resolve()
|
|
112
157
|
|
|
113
158
|
|
|
114
159
|
def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
|
|
@@ -370,6 +415,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
370
415
|
),
|
|
371
416
|
),
|
|
372
417
|
) -> mcp_v2.SearchOutput:
|
|
418
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
373
419
|
return await asyncio.to_thread(
|
|
374
420
|
mcp_v2.search_v2,
|
|
375
421
|
query,
|
|
@@ -378,7 +424,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
378
424
|
limit,
|
|
379
425
|
offset,
|
|
380
426
|
path_contains,
|
|
381
|
-
|
|
427
|
+
scoped_filter,
|
|
382
428
|
None,
|
|
383
429
|
)
|
|
384
430
|
|
|
@@ -413,7 +459,8 @@ def create_mcp_server() -> FastMCP:
|
|
|
413
459
|
limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
|
|
414
460
|
offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"),
|
|
415
461
|
) -> mcp_v2.FindOutput:
|
|
416
|
-
|
|
462
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
463
|
+
return await asyncio.to_thread(mcp_v2.find_v2, kind, scoped_filter, limit, offset, None)
|
|
417
464
|
|
|
418
465
|
@mcp.tool(
|
|
419
466
|
name="describe",
|
|
@@ -525,6 +572,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
525
572
|
),
|
|
526
573
|
),
|
|
527
574
|
) -> mcp_v2.NeighborsOutput:
|
|
575
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
528
576
|
return await asyncio.to_thread(
|
|
529
577
|
mcp_v2.neighbors_v2,
|
|
530
578
|
ids,
|
|
@@ -532,7 +580,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
532
580
|
edge_types,
|
|
533
581
|
limit,
|
|
534
582
|
offset,
|
|
535
|
-
|
|
583
|
+
scoped_filter,
|
|
536
584
|
edge_filter,
|
|
537
585
|
include_unresolved,
|
|
538
586
|
dedup_calls,
|
|
@@ -580,6 +628,10 @@ def main() -> None:
|
|
|
580
628
|
cfg.apply_to_os_environ()
|
|
581
629
|
mcp_v2.set_hints_enabled(cfg.hints_enabled)
|
|
582
630
|
|
|
631
|
+
# Initialize scope manager for automatic microservice detection
|
|
632
|
+
global _scope_manager
|
|
633
|
+
_scope_manager = ScopeManager(cfg.source_root)
|
|
634
|
+
|
|
583
635
|
asyncio.run(create_mcp_server().run_stdio_async())
|
|
584
636
|
|
|
585
637
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|