java-codebase-rag 0.3.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -247,5 +247,60 @@ def run_build_ast_graph(
247
247
  return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
248
248
 
249
249
 
250
+ def run_incremental_graph(
251
+ *,
252
+ source_root: Path,
253
+ kuzu_path: Path,
254
+ verbose: bool,
255
+ quiet: bool = False,
256
+ env: dict[str, str] | None = None,
257
+ ) -> subprocess.CompletedProcess[str]:
258
+ """Run incremental graph rebuild by passing --incremental flag to build_ast_graph.py."""
259
+ builder = bundle_dir() / "build_ast_graph.py"
260
+ if not builder.is_file():
261
+ return subprocess.CompletedProcess(
262
+ args=[],
263
+ returncode=126,
264
+ stdout="",
265
+ stderr=f"build_ast_graph.py not found under {builder.parent}",
266
+ )
267
+ cmd: list[str] = [
268
+ sys.executable,
269
+ str(builder),
270
+ "--source-root",
271
+ str(source_root),
272
+ "--kuzu-path",
273
+ str(kuzu_path),
274
+ "--incremental",
275
+ ]
276
+ # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
277
+ # Default passes --verbose so the builder emits per-pass progress lines,
278
+ # which the parent filters via _LineFilter. --verbose bypasses the filter.
279
+ if verbose or not quiet:
280
+ cmd.append("--verbose")
281
+ if quiet:
282
+ return subprocess.run(
283
+ cmd,
284
+ cwd=str(source_root),
285
+ env=env or os.environ.copy(),
286
+ capture_output=True,
287
+ text=True,
288
+ )
289
+ proc = subprocess.Popen(
290
+ cmd,
291
+ cwd=str(source_root),
292
+ env=env or os.environ.copy(),
293
+ stdout=subprocess.PIPE,
294
+ stderr=subprocess.PIPE,
295
+ bufsize=0,
296
+ )
297
+ out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
298
+ if not verbose:
299
+ from java_codebase_rag.cli_format import bold_cyan, styled_check, styled_cross
300
+ marker = styled_check() if code == 0 else styled_cross()
301
+ print(f"{marker} {bold_cyan('[increment]')} done", file=sys.stderr, flush=True)
302
+ return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
303
+
304
+
250
305
  def clip(s: str, n: int) -> str:
251
306
  return s[-n:] if len(s) > n else s
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.3.1
3
+ Version: 0.5.0
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -25,11 +25,17 @@ Requires-Dist: mcp<2,>=1.27.0
25
25
  Requires-Dist: numpy<2.5,>=1.26.4
26
26
  Requires-Dist: pathspec<2,>=1.0.4
27
27
  Requires-Dist: pyarrow<24,>=23.0.1
28
+ Requires-Dist: pydantic<3,>=2.0
28
29
  Requires-Dist: PyYAML<7,>=6.0.3
30
+ Requires-Dist: questionary<3,>=2.0
29
31
  Requires-Dist: sentence-transformers<6,>=5.4.0
30
32
  Requires-Dist: tree-sitter<0.26,>=0.25.2
31
33
  Requires-Dist: tree-sitter-java<0.24,>=0.23.5
32
34
  Requires-Dist: unidiff<1,>=0.7.3
35
+ Provides-Extra: dev
36
+ Requires-Dist: pytest>=7; extra == "dev"
37
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: ruff>=0.4; extra == "dev"
33
39
  Dynamic: license-file
34
40
 
35
41
  # java-codebase-rag
@@ -126,7 +132,9 @@ With the package installed, the console script `java-codebase-rag-mcp` is on you
126
132
  claude mcp add --transport stdio java-codebase-rag -- java-codebase-rag-mcp
127
133
  ```
128
134
 
129
- Then set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a project-scoped `.mcp.json` template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
135
+ **Zero-env-var configuration:** The tool automatically walks up the directory tree to find `.java-codebase-rag.yml`, so you don't need to set `JAVA_CODEBASE_RAG_SOURCE_ROOT` when working from within a project. Just place the config file at your project root and the tool will find it. See [`mcp.json.example`](./mcp.json.example) for the minimal configuration.
136
+
137
+ If you need to override defaults, you can set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a full configuration template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
130
138
 
131
139
  ### Claude Desktop
132
140
 
@@ -200,7 +208,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
200
208
  | Group | Subcommand | What it does |
201
209
  |---|---|---|
202
210
  | Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
203
- | Lifecycle | `increment` | CocoIndex catch-up (Lance only); Kuzu stays stale until `reprocess`. |
211
+ | Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
204
212
  | Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
205
213
  | Lifecycle | `erase` | Delete index artifacts. Requires `--yes` or TTY confirm. |
206
214
  | Introspection | `meta`, `tables`, `diagnose-ignore`, `unresolved-calls` | Health, table listing, ignore-layer diagnostics, receiver-failure call sites. |
@@ -244,5 +252,4 @@ The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloa
244
252
 
245
253
  - `get_service_topology` — microservice-level summary aggregating `HTTP_CALLS` / `ASYNC_CALLS`.
246
254
  - Agentic routing layer (query classifier → vector / graph / both).
247
- - Incremental Kuzu updates (per-changed-file) — see [`propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md`](./propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md) and [`propose/INDEX-AUTO-MODE-PROPOSE.md`](./propose/INDEX-AUTO-MODE-PROPOSE.md).
248
255
  - Optional `codegraph_nodes` LanceDB table embedding symbol summaries so the graph itself is vector-searchable.
@@ -1,8 +1,8 @@
1
- ast_java.py,sha256=NGs34vhoSypfHbKnNRpA9aj-gO4P6bED3ASmDWEVsZk,98881
1
+ ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
2
2
  brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
- build_ast_graph.py,sha256=oK2C94tZqCL6KVxOHkrXTLfeF29xWXuBDF49KQxCMZo,118133
3
+ build_ast_graph.py,sha256=KY5rpqWR7UafvAcIv0ubSz6jiYA8I5ZGqm_SKIFJulE,148770
4
4
  chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
- graph_enrich.py,sha256=2-njD2alm7FFpLn217ZG3f3ln-zqbdtGwTghOpd44oo,62021
5
+ graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
6
6
  index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
7
  java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
8
8
  java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
@@ -13,16 +13,19 @@ mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
13
13
  path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
14
  pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
15
15
  search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
16
- server.py,sha256=6pw3g29o7SwVYmRZV0NxSc2d_eFg521LkUn9kUCzbJw,26470
16
+ server.py,sha256=c4Bo0FXPoKP2AwIVP_wiv0XENkmKchLHf0QrQPUUgq4,28645
17
17
  java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
- java_codebase_rag/cli.py,sha256=7nwrnXdRGZvRKMYcHJDR0CecYsiBt1Fu1RJwrQAIMV0,28518
18
+ java_codebase_rag/cli.py,sha256=uI0g1jluwuOG-aRNz-9wmH8WY6ICGO3A9O-HZXm_x2M,32882
19
19
  java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
20
20
  java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
21
- java_codebase_rag/config.py,sha256=F6NtbRAlcs9M96bhrkQVeptOkvCFdd0rt_UJFKNiRfA,12633
22
- java_codebase_rag/pipeline.py,sha256=p0u6yJlBYip2kr7LaCUYFHI4sv9inEgXpZTzcJK_rJ8,7583
23
- java_codebase_rag-0.3.1.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
24
- java_codebase_rag-0.3.1.dist-info/METADATA,sha256=o4nk9F_JRE6yX0Jv7O4A45c2vHPXIGVryjxPz8dWEDQ,15068
25
- java_codebase_rag-0.3.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
26
- java_codebase_rag-0.3.1.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
27
- java_codebase_rag-0.3.1.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
28
- java_codebase_rag-0.3.1.dist-info/RECORD,,
21
+ java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
22
+ java_codebase_rag/installer.py,sha256=_dYScFzoI1XpmYblzpuZ3bftUJflhRs3wn6suebHI6o,29286
23
+ java_codebase_rag/pipeline.py,sha256=nMXwX9r7HG9yPstrm7y_vfOMUZuDmw5_1lJTAfR-jwI,9488
24
+ java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
25
+ java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
26
+ java_codebase_rag-0.5.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
27
+ java_codebase_rag-0.5.0.dist-info/METADATA,sha256=C_DfrPUqDvCk4X_vgUaA45SiZn4OwYKlBIWIkPIHCQE,15457
28
+ java_codebase_rag-0.5.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
29
+ java_codebase_rag-0.5.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
30
+ java_codebase_rag-0.5.0.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
31
+ java_codebase_rag-0.5.0.dist-info/RECORD,,
server.py CHANGED
@@ -16,7 +16,12 @@ from java_codebase_rag.cli_progress import (
16
16
  emit_vectors_finish,
17
17
  emit_vectors_start,
18
18
  )
19
- from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env, resolve_operator_config
19
+ from java_codebase_rag.config import (
20
+ discover_project_root,
21
+ emit_legacy_env_hints_if_present,
22
+ resolved_sbert_model_for_process_env,
23
+ resolve_operator_config,
24
+ )
20
25
  from kuzu_queries import KuzuGraph, resolve_kuzu_path
21
26
  from mcp.server.fastmcp import FastMCP
22
27
  from pydantic import BaseModel, Field
@@ -91,10 +96,49 @@ class IndexInfoOutput(BaseModel):
91
96
  graph: GraphMetaOutput
92
97
 
93
98
 
99
+ # Module-level scope manager, initialized in main()
100
+ _scope_manager: ScopeManager | None = None
101
+
102
+
103
+ class ScopeManager:
104
+ """Manages automatic microservice scope detection and injection."""
105
+
106
+ def __init__(self, source_root: Path):
107
+ self.source_root = source_root
108
+ self.default_scope: str | None = self._detect_scope()
109
+ self._log_detection()
110
+
111
+ def _detect_scope(self) -> str | None:
112
+ from graph_enrich import detect_microservice_from_path
113
+ return detect_microservice_from_path(Path.cwd(), self.source_root)
114
+
115
+ def _log_detection(self) -> None:
116
+ if self.default_scope:
117
+ print(f"[scope] Detected microservice: {self.default_scope}", file=sys.stderr)
118
+ print(f"[scope] Queries scoped to {self.default_scope}", file=sys.stderr)
119
+ else:
120
+ print("[scope] No microservice detected (at project root)", file=sys.stderr)
121
+ print("[scope] Queries will span all microservices", file=sys.stderr)
122
+
123
+ def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
124
+ """Apply auto-detected scope to filter if no explicit microservice is set."""
125
+ if self.default_scope is None:
126
+ return node_filter
127
+ # Convert to dict for manipulation
128
+ if node_filter is None:
129
+ filter_dict = {}
130
+ else:
131
+ filter_dict = dict(node_filter)
132
+ # Only inject if user didn't specify microservice
133
+ if "microservice" not in filter_dict:
134
+ filter_dict["microservice"] = self.default_scope
135
+ return filter_dict
136
+
137
+
94
138
  def _resolve_lancedb_uri() -> str:
95
139
  raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
96
140
  if not raw:
97
- raw = str((Path.cwd() / ".java-codebase-rag").resolve())
141
+ raw = str((_project_root() / ".java-codebase-rag").resolve())
98
142
  p = Path(raw).expanduser()
99
143
  if not str(raw).startswith(("s3://", "gs://", "az://")):
100
144
  try:
@@ -108,7 +152,8 @@ def _project_root() -> Path:
108
152
  env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
109
153
  if env:
110
154
  return Path(env).expanduser().resolve()
111
- return Path.cwd().resolve()
155
+ discovered = discover_project_root(Path.cwd())
156
+ return discovered if discovered is not None else Path.cwd().resolve()
112
157
 
113
158
 
114
159
  def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
@@ -370,6 +415,7 @@ def create_mcp_server() -> FastMCP:
370
415
  ),
371
416
  ),
372
417
  ) -> mcp_v2.SearchOutput:
418
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
373
419
  return await asyncio.to_thread(
374
420
  mcp_v2.search_v2,
375
421
  query,
@@ -378,7 +424,7 @@ def create_mcp_server() -> FastMCP:
378
424
  limit,
379
425
  offset,
380
426
  path_contains,
381
- filter,
427
+ scoped_filter,
382
428
  None,
383
429
  )
384
430
 
@@ -413,7 +459,8 @@ def create_mcp_server() -> FastMCP:
413
459
  limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
414
460
  offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"),
415
461
  ) -> mcp_v2.FindOutput:
416
- return await asyncio.to_thread(mcp_v2.find_v2, kind, filter, limit, offset, None)
462
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
463
+ return await asyncio.to_thread(mcp_v2.find_v2, kind, scoped_filter, limit, offset, None)
417
464
 
418
465
  @mcp.tool(
419
466
  name="describe",
@@ -525,6 +572,7 @@ def create_mcp_server() -> FastMCP:
525
572
  ),
526
573
  ),
527
574
  ) -> mcp_v2.NeighborsOutput:
575
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
528
576
  return await asyncio.to_thread(
529
577
  mcp_v2.neighbors_v2,
530
578
  ids,
@@ -532,7 +580,7 @@ def create_mcp_server() -> FastMCP:
532
580
  edge_types,
533
581
  limit,
534
582
  offset,
535
- filter,
583
+ scoped_filter,
536
584
  edge_filter,
537
585
  include_unresolved,
538
586
  dedup_calls,
@@ -580,6 +628,10 @@ def main() -> None:
580
628
  cfg.apply_to_os_environ()
581
629
  mcp_v2.set_hints_enabled(cfg.hints_enabled)
582
630
 
631
+ # Initialize scope manager for automatic microservice detection
632
+ global _scope_manager
633
+ _scope_manager = ScopeManager(cfg.source_root)
634
+
583
635
  asyncio.run(create_mcp_server().run_stdio_async())
584
636
 
585
637