java-codebase-rag 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ast_java.py +1 -1
- build_ast_graph.py +833 -64
- graph_enrich.py +32 -0
- java_codebase_rag/cli.py +123 -5
- java_codebase_rag/config.py +70 -2
- java_codebase_rag/install_data/agents/explorer-rag-enhanced.md +306 -0
- java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +204 -0
- java_codebase_rag/installer.py +930 -0
- java_codebase_rag/pipeline.py +55 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/METADATA +11 -4
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/RECORD +16 -13
- server.py +58 -6
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/WHEEL +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/entry_points.txt +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {java_codebase_rag-0.3.1.dist-info → java_codebase_rag-0.5.0.dist-info}/top_level.txt +0 -0
java_codebase_rag/pipeline.py
CHANGED
|
@@ -247,5 +247,60 @@ def run_build_ast_graph(
|
|
|
247
247
|
return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
|
|
248
248
|
|
|
249
249
|
|
|
250
|
+
def run_incremental_graph(
|
|
251
|
+
*,
|
|
252
|
+
source_root: Path,
|
|
253
|
+
kuzu_path: Path,
|
|
254
|
+
verbose: bool,
|
|
255
|
+
quiet: bool = False,
|
|
256
|
+
env: dict[str, str] | None = None,
|
|
257
|
+
) -> subprocess.CompletedProcess[str]:
|
|
258
|
+
"""Run incremental graph rebuild by passing --incremental flag to build_ast_graph.py."""
|
|
259
|
+
builder = bundle_dir() / "build_ast_graph.py"
|
|
260
|
+
if not builder.is_file():
|
|
261
|
+
return subprocess.CompletedProcess(
|
|
262
|
+
args=[],
|
|
263
|
+
returncode=126,
|
|
264
|
+
stdout="",
|
|
265
|
+
stderr=f"build_ast_graph.py not found under {builder.parent}",
|
|
266
|
+
)
|
|
267
|
+
cmd: list[str] = [
|
|
268
|
+
sys.executable,
|
|
269
|
+
str(builder),
|
|
270
|
+
"--source-root",
|
|
271
|
+
str(source_root),
|
|
272
|
+
"--kuzu-path",
|
|
273
|
+
str(kuzu_path),
|
|
274
|
+
"--incremental",
|
|
275
|
+
]
|
|
276
|
+
# Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
|
|
277
|
+
# Default passes --verbose so the builder emits per-pass progress lines,
|
|
278
|
+
# which the parent filters via _LineFilter. --verbose bypasses the filter.
|
|
279
|
+
if verbose or not quiet:
|
|
280
|
+
cmd.append("--verbose")
|
|
281
|
+
if quiet:
|
|
282
|
+
return subprocess.run(
|
|
283
|
+
cmd,
|
|
284
|
+
cwd=str(source_root),
|
|
285
|
+
env=env or os.environ.copy(),
|
|
286
|
+
capture_output=True,
|
|
287
|
+
text=True,
|
|
288
|
+
)
|
|
289
|
+
proc = subprocess.Popen(
|
|
290
|
+
cmd,
|
|
291
|
+
cwd=str(source_root),
|
|
292
|
+
env=env or os.environ.copy(),
|
|
293
|
+
stdout=subprocess.PIPE,
|
|
294
|
+
stderr=subprocess.PIPE,
|
|
295
|
+
bufsize=0,
|
|
296
|
+
)
|
|
297
|
+
out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
|
|
298
|
+
if not verbose:
|
|
299
|
+
from java_codebase_rag.cli_format import bold_cyan, styled_check, styled_cross
|
|
300
|
+
marker = styled_check() if code == 0 else styled_cross()
|
|
301
|
+
print(f"{marker} {bold_cyan('[increment]')} done", file=sys.stderr, flush=True)
|
|
302
|
+
return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
|
|
303
|
+
|
|
304
|
+
|
|
250
305
|
def clip(s: str, n: int) -> str:
|
|
251
306
|
return s[-n:] if len(s) > n else s
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -25,11 +25,17 @@ Requires-Dist: mcp<2,>=1.27.0
|
|
|
25
25
|
Requires-Dist: numpy<2.5,>=1.26.4
|
|
26
26
|
Requires-Dist: pathspec<2,>=1.0.4
|
|
27
27
|
Requires-Dist: pyarrow<24,>=23.0.1
|
|
28
|
+
Requires-Dist: pydantic<3,>=2.0
|
|
28
29
|
Requires-Dist: PyYAML<7,>=6.0.3
|
|
30
|
+
Requires-Dist: questionary<3,>=2.0
|
|
29
31
|
Requires-Dist: sentence-transformers<6,>=5.4.0
|
|
30
32
|
Requires-Dist: tree-sitter<0.26,>=0.25.2
|
|
31
33
|
Requires-Dist: tree-sitter-java<0.24,>=0.23.5
|
|
32
34
|
Requires-Dist: unidiff<1,>=0.7.3
|
|
35
|
+
Provides-Extra: dev
|
|
36
|
+
Requires-Dist: pytest>=7; extra == "dev"
|
|
37
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
38
|
+
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
33
39
|
Dynamic: license-file
|
|
34
40
|
|
|
35
41
|
# java-codebase-rag
|
|
@@ -126,7 +132,9 @@ With the package installed, the console script `java-codebase-rag-mcp` is on you
|
|
|
126
132
|
claude mcp add --transport stdio java-codebase-rag -- java-codebase-rag-mcp
|
|
127
133
|
```
|
|
128
134
|
|
|
129
|
-
|
|
135
|
+
**Zero-env-var configuration:** The tool automatically walks up the directory tree to find `.java-codebase-rag.yml`, so you don't need to set `JAVA_CODEBASE_RAG_SOURCE_ROOT` when working from within a project. Just place the config file at your project root and the tool will find it. See [`mcp.json.example`](./mcp.json.example) for the minimal configuration.
|
|
136
|
+
|
|
137
|
+
If you need to override defaults, you can set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a full configuration template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
|
|
130
138
|
|
|
131
139
|
### Claude Desktop
|
|
132
140
|
|
|
@@ -200,7 +208,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
|
|
|
200
208
|
| Group | Subcommand | What it does |
|
|
201
209
|
|---|---|---|
|
|
202
210
|
| Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
|
|
203
|
-
| Lifecycle | `increment` | CocoIndex catch-up
|
|
211
|
+
| Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
|
|
204
212
|
| Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
|
|
205
213
|
| Lifecycle | `erase` | Delete index artifacts. Requires `--yes` or TTY confirm. |
|
|
206
214
|
| Introspection | `meta`, `tables`, `diagnose-ignore`, `unresolved-calls` | Health, table listing, ignore-layer diagnostics, receiver-failure call sites. |
|
|
@@ -244,5 +252,4 @@ The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloa
|
|
|
244
252
|
|
|
245
253
|
- `get_service_topology` — microservice-level summary aggregating `HTTP_CALLS` / `ASYNC_CALLS`.
|
|
246
254
|
- Agentic routing layer (query classifier → vector / graph / both).
|
|
247
|
-
- Incremental Kuzu updates (per-changed-file) — see [`propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md`](./propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md) and [`propose/INDEX-AUTO-MODE-PROPOSE.md`](./propose/INDEX-AUTO-MODE-PROPOSE.md).
|
|
248
255
|
- Optional `codegraph_nodes` LanceDB table embedding symbol summaries so the graph itself is vector-searchable.
|
|
@@ -1,8 +1,8 @@
|
|
|
1
|
-
ast_java.py,sha256=
|
|
1
|
+
ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
|
|
2
2
|
brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
|
|
3
|
-
build_ast_graph.py,sha256=
|
|
3
|
+
build_ast_graph.py,sha256=KY5rpqWR7UafvAcIv0ubSz6jiYA8I5ZGqm_SKIFJulE,148770
|
|
4
4
|
chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
|
|
5
|
-
graph_enrich.py,sha256=
|
|
5
|
+
graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
|
|
6
6
|
index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
|
|
7
7
|
java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
|
|
8
8
|
java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
|
|
@@ -13,16 +13,19 @@ mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
|
|
|
13
13
|
path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
|
|
14
14
|
pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
|
|
15
15
|
search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
|
|
16
|
-
server.py,sha256=
|
|
16
|
+
server.py,sha256=c4Bo0FXPoKP2AwIVP_wiv0XENkmKchLHf0QrQPUUgq4,28645
|
|
17
17
|
java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
18
|
-
java_codebase_rag/cli.py,sha256=
|
|
18
|
+
java_codebase_rag/cli.py,sha256=uI0g1jluwuOG-aRNz-9wmH8WY6ICGO3A9O-HZXm_x2M,32882
|
|
19
19
|
java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
|
|
20
20
|
java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
|
|
21
|
-
java_codebase_rag/config.py,sha256=
|
|
22
|
-
java_codebase_rag/
|
|
23
|
-
java_codebase_rag
|
|
24
|
-
java_codebase_rag-
|
|
25
|
-
java_codebase_rag-
|
|
26
|
-
java_codebase_rag-0.
|
|
27
|
-
java_codebase_rag-0.
|
|
28
|
-
java_codebase_rag-0.
|
|
21
|
+
java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
|
|
22
|
+
java_codebase_rag/installer.py,sha256=_dYScFzoI1XpmYblzpuZ3bftUJflhRs3wn6suebHI6o,29286
|
|
23
|
+
java_codebase_rag/pipeline.py,sha256=nMXwX9r7HG9yPstrm7y_vfOMUZuDmw5_1lJTAfR-jwI,9488
|
|
24
|
+
java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
|
|
25
|
+
java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
|
|
26
|
+
java_codebase_rag-0.5.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
|
|
27
|
+
java_codebase_rag-0.5.0.dist-info/METADATA,sha256=C_DfrPUqDvCk4X_vgUaA45SiZn4OwYKlBIWIkPIHCQE,15457
|
|
28
|
+
java_codebase_rag-0.5.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
|
|
29
|
+
java_codebase_rag-0.5.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
|
|
30
|
+
java_codebase_rag-0.5.0.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
|
|
31
|
+
java_codebase_rag-0.5.0.dist-info/RECORD,,
|
server.py
CHANGED
|
@@ -16,7 +16,12 @@ from java_codebase_rag.cli_progress import (
|
|
|
16
16
|
emit_vectors_finish,
|
|
17
17
|
emit_vectors_start,
|
|
18
18
|
)
|
|
19
|
-
from java_codebase_rag.config import
|
|
19
|
+
from java_codebase_rag.config import (
|
|
20
|
+
discover_project_root,
|
|
21
|
+
emit_legacy_env_hints_if_present,
|
|
22
|
+
resolved_sbert_model_for_process_env,
|
|
23
|
+
resolve_operator_config,
|
|
24
|
+
)
|
|
20
25
|
from kuzu_queries import KuzuGraph, resolve_kuzu_path
|
|
21
26
|
from mcp.server.fastmcp import FastMCP
|
|
22
27
|
from pydantic import BaseModel, Field
|
|
@@ -91,10 +96,49 @@ class IndexInfoOutput(BaseModel):
|
|
|
91
96
|
graph: GraphMetaOutput
|
|
92
97
|
|
|
93
98
|
|
|
99
|
+
# Module-level scope manager, initialized in main()
|
|
100
|
+
_scope_manager: ScopeManager | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class ScopeManager:
|
|
104
|
+
"""Manages automatic microservice scope detection and injection."""
|
|
105
|
+
|
|
106
|
+
def __init__(self, source_root: Path):
|
|
107
|
+
self.source_root = source_root
|
|
108
|
+
self.default_scope: str | None = self._detect_scope()
|
|
109
|
+
self._log_detection()
|
|
110
|
+
|
|
111
|
+
def _detect_scope(self) -> str | None:
|
|
112
|
+
from graph_enrich import detect_microservice_from_path
|
|
113
|
+
return detect_microservice_from_path(Path.cwd(), self.source_root)
|
|
114
|
+
|
|
115
|
+
def _log_detection(self) -> None:
|
|
116
|
+
if self.default_scope:
|
|
117
|
+
print(f"[scope] Detected microservice: {self.default_scope}", file=sys.stderr)
|
|
118
|
+
print(f"[scope] Queries scoped to {self.default_scope}", file=sys.stderr)
|
|
119
|
+
else:
|
|
120
|
+
print("[scope] No microservice detected (at project root)", file=sys.stderr)
|
|
121
|
+
print("[scope] Queries will span all microservices", file=sys.stderr)
|
|
122
|
+
|
|
123
|
+
def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
|
|
124
|
+
"""Apply auto-detected scope to filter if no explicit microservice is set."""
|
|
125
|
+
if self.default_scope is None:
|
|
126
|
+
return node_filter
|
|
127
|
+
# Convert to dict for manipulation
|
|
128
|
+
if node_filter is None:
|
|
129
|
+
filter_dict = {}
|
|
130
|
+
else:
|
|
131
|
+
filter_dict = dict(node_filter)
|
|
132
|
+
# Only inject if user didn't specify microservice
|
|
133
|
+
if "microservice" not in filter_dict:
|
|
134
|
+
filter_dict["microservice"] = self.default_scope
|
|
135
|
+
return filter_dict
|
|
136
|
+
|
|
137
|
+
|
|
94
138
|
def _resolve_lancedb_uri() -> str:
|
|
95
139
|
raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
|
|
96
140
|
if not raw:
|
|
97
|
-
raw = str((
|
|
141
|
+
raw = str((_project_root() / ".java-codebase-rag").resolve())
|
|
98
142
|
p = Path(raw).expanduser()
|
|
99
143
|
if not str(raw).startswith(("s3://", "gs://", "az://")):
|
|
100
144
|
try:
|
|
@@ -108,7 +152,8 @@ def _project_root() -> Path:
|
|
|
108
152
|
env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
|
|
109
153
|
if env:
|
|
110
154
|
return Path(env).expanduser().resolve()
|
|
111
|
-
|
|
155
|
+
discovered = discover_project_root(Path.cwd())
|
|
156
|
+
return discovered if discovered is not None else Path.cwd().resolve()
|
|
112
157
|
|
|
113
158
|
|
|
114
159
|
def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
|
|
@@ -370,6 +415,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
370
415
|
),
|
|
371
416
|
),
|
|
372
417
|
) -> mcp_v2.SearchOutput:
|
|
418
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
373
419
|
return await asyncio.to_thread(
|
|
374
420
|
mcp_v2.search_v2,
|
|
375
421
|
query,
|
|
@@ -378,7 +424,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
378
424
|
limit,
|
|
379
425
|
offset,
|
|
380
426
|
path_contains,
|
|
381
|
-
|
|
427
|
+
scoped_filter,
|
|
382
428
|
None,
|
|
383
429
|
)
|
|
384
430
|
|
|
@@ -413,7 +459,8 @@ def create_mcp_server() -> FastMCP:
|
|
|
413
459
|
limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
|
|
414
460
|
offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"),
|
|
415
461
|
) -> mcp_v2.FindOutput:
|
|
416
|
-
|
|
462
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
463
|
+
return await asyncio.to_thread(mcp_v2.find_v2, kind, scoped_filter, limit, offset, None)
|
|
417
464
|
|
|
418
465
|
@mcp.tool(
|
|
419
466
|
name="describe",
|
|
@@ -525,6 +572,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
525
572
|
),
|
|
526
573
|
),
|
|
527
574
|
) -> mcp_v2.NeighborsOutput:
|
|
575
|
+
scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
|
|
528
576
|
return await asyncio.to_thread(
|
|
529
577
|
mcp_v2.neighbors_v2,
|
|
530
578
|
ids,
|
|
@@ -532,7 +580,7 @@ def create_mcp_server() -> FastMCP:
|
|
|
532
580
|
edge_types,
|
|
533
581
|
limit,
|
|
534
582
|
offset,
|
|
535
|
-
|
|
583
|
+
scoped_filter,
|
|
536
584
|
edge_filter,
|
|
537
585
|
include_unresolved,
|
|
538
586
|
dedup_calls,
|
|
@@ -580,6 +628,10 @@ def main() -> None:
|
|
|
580
628
|
cfg.apply_to_os_environ()
|
|
581
629
|
mcp_v2.set_hints_enabled(cfg.hints_enabled)
|
|
582
630
|
|
|
631
|
+
# Initialize scope manager for automatic microservice detection
|
|
632
|
+
global _scope_manager
|
|
633
|
+
_scope_manager = ScopeManager(cfg.source_root)
|
|
634
|
+
|
|
583
635
|
asyncio.run(create_mcp_server().run_stdio_async())
|
|
584
636
|
|
|
585
637
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|