java-codebase-rag 0.3.1__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
graph_enrich.py CHANGED
@@ -1565,6 +1565,38 @@ def microservice_for_path(
1565
1565
  return ""
1566
1566
 
1567
1567
 
1568
+ def detect_microservice_from_path(cwd: Path, source_root: Path) -> str | None:
1569
+ """Detect microservice from cwd for query-time auto-scope.
1570
+
1571
+ Returns None if cwd is outside source_root, cwd IS source_root (system level),
1572
+ or no microservice is detected. Otherwise returns the microservice name.
1573
+ """
1574
+ cwd_resolved = cwd.resolve()
1575
+ source_resolved = source_root.resolve()
1576
+
1577
+ # Check if cwd is outside source_root
1578
+ try:
1579
+ cwd_resolved.relative_to(source_resolved)
1580
+ except ValueError:
1581
+ return None
1582
+
1583
+ # Check if cwd IS source_root (at system level, no specific scope)
1584
+ if cwd_resolved == source_resolved:
1585
+ return None
1586
+
1587
+ # Check if cwd itself matches a YAML override (directory name matches microservice_roots)
1588
+ overrides = load_microservice_overrides(source_resolved)
1589
+ if overrides and cwd_resolved.name in overrides:
1590
+ return cwd_resolved.name
1591
+
1592
+ # microservice_for_path walks _bounded_parents which excludes the path itself.
1593
+ # For query-time detection we need cwd included in the walk, so pass a synthetic
1594
+ # child path so that cwd appears as a parent in the build-marker scan.
1595
+ synthetic = cwd_resolved / "__scope_probe__"
1596
+ ms = microservice_for_path(str(synthetic), source_resolved)
1597
+ return ms if ms else None
1598
+
1599
+
1568
1600
  # ---------- chunk enrichment ----------
1569
1601
 
1570
1602
 
java_codebase_rag/cli.py CHANGED
@@ -21,7 +21,7 @@ from java_codebase_rag.config import (
21
21
  index_dir_has_existing_artifacts,
22
22
  resolve_operator_config,
23
23
  )
24
- from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update
24
+ from java_codebase_rag.pipeline import clip, run_build_ast_graph, run_cocoindex_drop, run_cocoindex_update, run_incremental_graph
25
25
  from java_ontology import VALID_UNRESOLVED_CALL_REASONS
26
26
 
27
27
  KUZU_INCREMENTAL_TRACKING_ISSUE_URL = "https://github.com/HumanBean17/java-codebase-rag/issues/73"
@@ -229,6 +229,23 @@ def _add_verbosity_flags(p: argparse.ArgumentParser) -> None:
229
229
 
230
230
  def _cmd_init(args: argparse.Namespace) -> int:
231
231
  cfg = _resolved_from_ns(args)
232
+ # Check for parent config or index
233
+ from java_codebase_rag.config import discover_project_root, find_yaml_config_file
234
+ parent_config_dir = discover_project_root(cfg.source_root.parent)
235
+ if parent_config_dir is not None:
236
+ parent_config = find_yaml_config_file(parent_config_dir)
237
+ if parent_config is not None:
238
+ print(
239
+ f"Warning: found existing config at {parent_config}. "
240
+ f"Creating a new project here will create a separate index.",
241
+ file=sys.stderr,
242
+ )
243
+ else:
244
+ print(
245
+ f"Warning: found existing index at {parent_config_dir / '.java-codebase-rag'}. "
246
+ f"Creating a new project here will create a separate index.",
247
+ file=sys.stderr,
248
+ )
232
249
  _startup_hints(cfg)
233
250
  cfg.apply_to_os_environ()
234
251
  occupied, paths = index_dir_has_existing_artifacts(cfg.index_dir)
@@ -298,7 +315,11 @@ def _cmd_increment(args: argparse.Namespace) -> int:
298
315
  cfg = _resolved_from_ns(args)
299
316
  _startup_hints(cfg)
300
317
  cfg.apply_to_os_environ()
301
- _emit_increment_kuzu_warning()
318
+
319
+ # Check for --vectors-only flag
320
+ vectors_only = bool(getattr(args, "vectors_only", False))
321
+ if vectors_only:
322
+ _emit_increment_kuzu_warning()
302
323
 
303
324
  def work() -> int:
304
325
  env = cfg.subprocess_env()
@@ -320,7 +341,50 @@ def _cmd_increment(args: argparse.Namespace) -> int:
320
341
  }
321
342
  )
322
343
  return 1
323
- _emit({"success": True, "message": "increment completed (Lance only; graph may be stale — see stderr)"})
344
+
345
+ # If --vectors-only is set, skip graph update
346
+ if vectors_only:
347
+ _emit({"success": True, "message": "increment completed (Lance only; graph may be stale — see stderr)"})
348
+ return 0
349
+
350
+ # Run incremental graph update
351
+ g = run_incremental_graph(
352
+ source_root=cfg.source_root,
353
+ kuzu_path=cfg.kuzu_path,
354
+ verbose=bool(args.verbose),
355
+ quiet=bool(args.quiet),
356
+ env=env,
357
+ )
358
+
359
+ # Check if incremental fell back to full rebuild
360
+ if g.returncode == 0 and g.stdout:
361
+ # Parse stdout to check for full_fallback mode
362
+ # The incremental_rebuild function returns a JSON payload with mode field
363
+ try:
364
+ result = json.loads(g.stdout.strip())
365
+ if result.get("mode") == "full_fallback":
366
+ print(
367
+ "[increment] fell back to full graph rebuild — this is normal after schema changes or first run",
368
+ file=sys.stderr,
369
+ flush=True,
370
+ )
371
+ except (json.JSONDecodeError, ValueError):
372
+ # If parsing fails, continue silently
373
+ pass
374
+
375
+ if g.returncode != 0:
376
+ _emit(
377
+ {
378
+ "success": False,
379
+ "exit_code": g.returncode,
380
+ "stdout": clip(g.stdout, 4000),
381
+ "stderr": clip(g.stderr, 4000),
382
+ "message": f"graph builder exit {g.returncode}",
383
+ }
384
+ )
385
+ return 1
386
+
387
+ _emit({"success": True, "message": "increment completed (Lance + graph updated)"})
324
388
  return 0
325
389
 
326
390
  return _run_with_pipeline_progress("increment", cfg, quiet=bool(args.quiet), work=work)
@@ -615,7 +679,7 @@ def build_parser() -> argparse.ArgumentParser:
615
679
  "--quiet suppresses that stream; stdout remains the machine-readable payload.\n\n"
616
680
  "Lifecycle (manage the index):\n"
617
681
  " init Create a fresh index from a Java repository.\n"
618
- " increment Pick up changes since the last index update (Lance only).\n"
682
+ " increment Pick up changes since the last index update (Lance + graph).\n"
619
683
  " reprocess Full vector + graph rebuild (default); optional --vectors-only / --graph-only.\n"
620
684
  " erase Delete the index from disk.\n\n"
621
685
  "Introspection (inspect the index):\n"
@@ -650,10 +714,15 @@ def build_parser() -> argparse.ArgumentParser:
650
714
  increment = subparsers.add_parser(
651
715
  "increment",
652
716
  help="Pick up changes since the last index update.",
653
- description="Runs cocoindex catch-up (no full reprocess). Does not rebuild Kuzu; see stderr warning.",
717
+ description="Runs cocoindex catch-up and incremental Kuzu graph update. Use --vectors-only to skip graph update.",
654
718
  )
655
719
  _add_index_embedding_flags(increment)
656
720
  _add_verbosity_flags(increment)
721
+ increment.add_argument(
722
+ "--vectors-only",
723
+ action="store_true",
724
+ help="Run only cocoindex catch-up (Lance); skip graph update.",
725
+ )
657
726
  increment.set_defaults(handler=_cmd_increment)
658
727
 
659
728
  reprocess = subparsers.add_parser(
@@ -123,6 +123,46 @@ def find_yaml_config_file(source_root: Path) -> Path | None:
123
123
  return None
124
124
 
125
125
 
126
+ def _has_index_dir(directory: Path) -> bool:
127
+ """True if *directory* contains a non-empty ``.java-codebase-rag/`` index directory."""
128
+ idx = directory / ".java-codebase-rag"
129
+ return idx.is_dir() and any(idx.iterdir())
130
+
131
+
132
+ def discover_project_root(start: Path) -> Path | None:
133
+ """Walk up from start to find the directory containing a config file or index.
134
+
135
+ Looks for ``.java-codebase-rag.yml`` / ``.java-codebase-rag.yaml`` (preferred)
136
+ or the ``.java-codebase-rag/`` index directory as a project boundary marker.
137
+
138
+ First match wins (closest to start). Config file takes priority over index
139
+ directory at the same level. Stops at $HOME inclusive — checks $HOME itself
140
+ but does not walk past it. Returns None if no marker found.
141
+ """
142
+ start = start.resolve()
143
+ home = Path.home().resolve()
144
+
145
+ current = start
146
+ while True:
147
+ # Config file is the primary anchor
148
+ if find_yaml_config_file(current) is not None:
149
+ return current
150
+ # Index directory is the secondary anchor (supports indexes without config)
151
+ if _has_index_dir(current):
152
+ return current
153
+
154
+ # Stop if we've reached home (check home itself, but don't walk past it)
155
+ if current == home:
156
+ return None
157
+
158
+ # Stop if we've reached filesystem root
159
+ parent = current.parent
160
+ if parent == current:
161
+ return None
162
+
163
+ current = parent
164
+
165
+
126
166
  def load_yaml_mapping(source_root: Path) -> dict[str, Any]:
127
167
  path = find_yaml_config_file(source_root)
128
168
  if path is None:
@@ -277,8 +317,36 @@ def resolve_operator_config(
277
317
  cli_embedding_model: str | None = None,
278
318
  cli_embedding_device: str | None = None,
279
319
  ) -> ResolvedOperatorConfig:
280
- root = (source_root or Path.cwd()).expanduser().resolve()
281
- yaml_dict = load_yaml_mapping(root)
320
+ # Phase 1: Find the config file directory
321
+ if source_root is not None:
322
+ # CLI flag provided: use it as both config_dir and effective source_root
323
+ # (skip YAML source_root check - CLI wins)
324
+ root = source_root.expanduser().resolve()
325
+ config_dir = root
326
+ yaml_dict = load_yaml_mapping(config_dir)
327
+ else:
328
+ # Check env var first
329
+ env_raw = os.environ.get(ENV_SOURCE_ROOT, "").strip()
330
+ if env_raw:
331
+ root = Path(env_raw).expanduser().resolve()
332
+ config_dir = root
333
+ yaml_dict = load_yaml_mapping(config_dir)
334
+ else:
335
+ # Walk up to find config dir
336
+ discovered = discover_project_root(Path.cwd())
337
+ config_dir = discovered if discovered is not None else Path.cwd().resolve()
338
+ # Load YAML from config dir
339
+ yaml_dict = load_yaml_mapping(config_dir)
340
+
341
+ # Phase 2: Resolve effective source root
342
+ # Check for YAML source_root field (resolved relative to config dir)
343
+ yaml_source_root = yaml_dict.get("source_root")
344
+ if isinstance(yaml_source_root, str) and yaml_source_root.strip():
345
+ yroot = Path(yaml_source_root.strip()).expanduser()
346
+ root = yroot.resolve() if yroot.is_absolute() else (config_dir / yroot).resolve()
347
+ else:
348
+ root = config_dir
349
+
282
350
  index_dir, index_src = _resolve_index_dir_path(
283
351
  source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
284
352
  )
@@ -247,5 +247,60 @@ def run_build_ast_graph(
247
247
  return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
248
248
 
249
249
 
250
+ def run_incremental_graph(
251
+ *,
252
+ source_root: Path,
253
+ kuzu_path: Path,
254
+ verbose: bool,
255
+ quiet: bool = False,
256
+ env: dict[str, str] | None = None,
257
+ ) -> subprocess.CompletedProcess[str]:
258
+ """Run incremental graph rebuild by passing --incremental flag to build_ast_graph.py."""
259
+ builder = bundle_dir() / "build_ast_graph.py"
260
+ if not builder.is_file():
261
+ return subprocess.CompletedProcess(
262
+ args=[],
263
+ returncode=126,
264
+ stdout="",
265
+ stderr=f"build_ast_graph.py not found under {builder.parent}",
266
+ )
267
+ cmd: list[str] = [
268
+ sys.executable,
269
+ str(builder),
270
+ "--source-root",
271
+ str(source_root),
272
+ "--kuzu-path",
273
+ str(kuzu_path),
274
+ "--incremental",
275
+ ]
276
+ # Three-tier: --quiet (silent) / default (filtered progress) / --verbose (raw).
277
+ # Default passes --verbose so the builder emits per-pass progress lines,
278
+ # which the parent filters via _LineFilter. --verbose bypasses the filter.
279
+ if verbose or not quiet:
280
+ cmd.append("--verbose")
281
+ if quiet:
282
+ return subprocess.run(
283
+ cmd,
284
+ cwd=str(source_root),
285
+ env=env or os.environ.copy(),
286
+ capture_output=True,
287
+ text=True,
288
+ )
289
+ proc = subprocess.Popen(
290
+ cmd,
291
+ cwd=str(source_root),
292
+ env=env or os.environ.copy(),
293
+ stdout=subprocess.PIPE,
294
+ stderr=subprocess.PIPE,
295
+ bufsize=0,
296
+ )
297
+ out_s, err_s, code = _popen_capturing_stderr(proc, verbose=verbose)
298
+ if not verbose:
299
+ from java_codebase_rag.cli_format import bold_cyan, styled_check, styled_cross
300
+ marker = styled_check() if code == 0 else styled_cross()
301
+ print(f"{marker} {bold_cyan('[increment]')} done", file=sys.stderr, flush=True)
302
+ return subprocess.CompletedProcess(args=cmd, returncode=code, stdout=out_s, stderr=err_s)
303
+
304
+
250
305
  def clip(s: str, n: int) -> str:
251
306
  return s[-n:] if len(s) > n else s
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.3.1
3
+ Version: 0.4.0
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -29,7 +29,12 @@ Requires-Dist: PyYAML<7,>=6.0.3
29
29
  Requires-Dist: sentence-transformers<6,>=5.4.0
30
30
  Requires-Dist: tree-sitter<0.26,>=0.25.2
31
31
  Requires-Dist: tree-sitter-java<0.24,>=0.23.5
32
+ Requires-Dist: pydantic<3,>=2.0
32
33
  Requires-Dist: unidiff<1,>=0.7.3
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest>=7; extra == "dev"
36
+ Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
37
+ Requires-Dist: ruff>=0.4; extra == "dev"
33
38
  Dynamic: license-file
34
39
 
35
40
  # java-codebase-rag
@@ -126,7 +131,9 @@ With the package installed, the console script `java-codebase-rag-mcp` is on you
126
131
  claude mcp add --transport stdio java-codebase-rag -- java-codebase-rag-mcp
127
132
  ```
128
133
 
129
- Then set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a project-scoped `.mcp.json` template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
134
+ **Zero-env-var configuration:** The tool automatically walks up the directory tree to find `.java-codebase-rag.yml`, so you don't need to set `JAVA_CODEBASE_RAG_SOURCE_ROOT` when working from within a project. Just place the config file at your project root and the tool will find it. See [`mcp.json.example`](./mcp.json.example) for the minimal configuration.
135
+
136
+ If you need to override defaults, you can set env vars (`JAVA_CODEBASE_RAG_INDEX_DIR`, `JAVA_CODEBASE_RAG_SOURCE_ROOT`, `SBERT_MODEL`, …) in `.mcp.json` or your shell profile. For a full configuration template, see [`mcp.json.example`](./mcp.json.example). Official docs: [Claude Code settings](https://docs.anthropic.com/en/docs/claude-code/settings).
130
137
 
131
138
  ### Claude Desktop
132
139
 
@@ -200,7 +207,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
200
207
  | Group | Subcommand | What it does |
201
208
  |---|---|---|
202
209
  | Lifecycle | `init` | First-time index. Refuses if artifacts already exist. |
203
- | Lifecycle | `increment` | CocoIndex catch-up (Lance only); Kuzu stays stale until `reprocess`. |
210
+ | Lifecycle | `increment` | CocoIndex catch-up + incremental Kuzu update. `--vectors-only` for Lance only. |
204
211
  | Lifecycle | `reprocess` | Full Lance + Kuzu rebuild. `--vectors-only` / `--graph-only` for a single phase. |
205
212
  | Lifecycle | `erase` | Delete index artifacts. Requires `--yes` or TTY confirm. |
206
213
  | Introspection | `meta`, `tables`, `diagnose-ignore`, `unresolved-calls` | Health, table listing, ignore-layer diagnostics, receiver-failure call sites. |
@@ -244,5 +251,4 @@ The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloa
244
251
 
245
252
  - `get_service_topology` — microservice-level summary aggregating `HTTP_CALLS` / `ASYNC_CALLS`.
246
253
  - Agentic routing layer (query classifier → vector / graph / both).
247
- - Incremental Kuzu updates (per-changed-file) — see [`propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md`](./propose/TIER2-INCREMENTAL-REBUILD-PROPOSE.md) and [`propose/INDEX-AUTO-MODE-PROPOSE.md`](./propose/INDEX-AUTO-MODE-PROPOSE.md).
248
254
  - Optional `codegraph_nodes` LanceDB table embedding symbol summaries so the graph itself is vector-searchable.
@@ -1,8 +1,8 @@
1
- ast_java.py,sha256=NGs34vhoSypfHbKnNRpA9aj-gO4P6bED3ASmDWEVsZk,98881
1
+ ast_java.py,sha256=OKoH7oX6L7AEEd6UY-spK8BPtWYY1T_4esrTC5VtoK8,98881
2
2
  brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
- build_ast_graph.py,sha256=oK2C94tZqCL6KVxOHkrXTLfeF29xWXuBDF49KQxCMZo,118133
3
+ build_ast_graph.py,sha256=KY5rpqWR7UafvAcIv0ubSz6jiYA8I5ZGqm_SKIFJulE,148770
4
4
  chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
- graph_enrich.py,sha256=2-njD2alm7FFpLn217ZG3f3ln-zqbdtGwTghOpd44oo,62021
5
+ graph_enrich.py,sha256=m3cksCHLqLHhA0Y-TLodbm09YfSJZjlTDN0Z51DiP2c,63317
6
6
  index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
7
  java_index_flow_lancedb.py,sha256=LMmfMSdE2d-ujxuJ2-hss7BhkrUMxHNyZuqsiGITuAI,12057
8
8
  java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
@@ -13,16 +13,16 @@ mcp_v2.py,sha256=JFe62sYzJ2XiE6L3wAH8XG9_Ya2oOeJQ_hkiTmXFnSE,79065
13
13
  path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
14
  pr_analysis.py,sha256=Zaq90xYgMgrReV3vCGcFhOkK61gIRMAAIgs7ev-rJG4,18410
15
15
  search_lancedb.py,sha256=-XgtpbJ_3zDLiZ_vGKXjaLpl7RlvgyzUb7oAGoWkXO0,36754
16
- server.py,sha256=6pw3g29o7SwVYmRZV0NxSc2d_eFg521LkUn9kUCzbJw,26470
16
+ server.py,sha256=c4Bo0FXPoKP2AwIVP_wiv0XENkmKchLHf0QrQPUUgq4,28645
17
17
  java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
- java_codebase_rag/cli.py,sha256=7nwrnXdRGZvRKMYcHJDR0CecYsiBt1Fu1RJwrQAIMV0,28518
18
+ java_codebase_rag/cli.py,sha256=h4-86RRZAlCSARuhMhLbqnUiwxAagBnrY5rj4IF0yIo,31238
19
19
  java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
20
20
  java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
21
- java_codebase_rag/config.py,sha256=F6NtbRAlcs9M96bhrkQVeptOkvCFdd0rt_UJFKNiRfA,12633
22
- java_codebase_rag/pipeline.py,sha256=p0u6yJlBYip2kr7LaCUYFHI4sv9inEgXpZTzcJK_rJ8,7583
23
- java_codebase_rag-0.3.1.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
24
- java_codebase_rag-0.3.1.dist-info/METADATA,sha256=o4nk9F_JRE6yX0Jv7O4A45c2vHPXIGVryjxPz8dWEDQ,15068
25
- java_codebase_rag-0.3.1.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
26
- java_codebase_rag-0.3.1.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
27
- java_codebase_rag-0.3.1.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
28
- java_codebase_rag-0.3.1.dist-info/RECORD,,
21
+ java_codebase_rag/config.py,sha256=1BkRQsdY2ohZ8IWmbTG3WHgotVVUIrRTN537A1QAoCQ,15352
22
+ java_codebase_rag/pipeline.py,sha256=nMXwX9r7HG9yPstrm7y_vfOMUZuDmw5_1lJTAfR-jwI,9488
23
+ java_codebase_rag-0.4.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
24
+ java_codebase_rag-0.4.0.dist-info/METADATA,sha256=Je_Zr3MB5ANZNolBRvHOpjQvEO_Y9GBFes4sXYiI_Uw,15422
25
+ java_codebase_rag-0.4.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
26
+ java_codebase_rag-0.4.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
27
+ java_codebase_rag-0.4.0.dist-info/top_level.txt,sha256=5aIYoMkvJvvfXvf4iHn2OeSIM7PZXP-0j94eNESnwMw,242
28
+ java_codebase_rag-0.4.0.dist-info/RECORD,,
server.py CHANGED
@@ -16,7 +16,12 @@ from java_codebase_rag.cli_progress import (
16
16
  emit_vectors_finish,
17
17
  emit_vectors_start,
18
18
  )
19
- from java_codebase_rag.config import emit_legacy_env_hints_if_present, resolved_sbert_model_for_process_env, resolve_operator_config
19
+ from java_codebase_rag.config import (
20
+ discover_project_root,
21
+ emit_legacy_env_hints_if_present,
22
+ resolved_sbert_model_for_process_env,
23
+ resolve_operator_config,
24
+ )
20
25
  from kuzu_queries import KuzuGraph, resolve_kuzu_path
21
26
  from mcp.server.fastmcp import FastMCP
22
27
  from pydantic import BaseModel, Field
@@ -91,10 +96,49 @@ class IndexInfoOutput(BaseModel):
91
96
  graph: GraphMetaOutput
92
97
 
93
98
 
99
+ # Module-level scope manager, initialized in main()
100
+ _scope_manager: ScopeManager | None = None
101
+
102
+
103
+ class ScopeManager:
104
+ """Manages automatic microservice scope detection and injection."""
105
+
106
+ def __init__(self, source_root: Path):
107
+ self.source_root = source_root
108
+ self.default_scope: str | None = self._detect_scope()
109
+ self._log_detection()
110
+
111
+ def _detect_scope(self) -> str | None:
112
+ from graph_enrich import detect_microservice_from_path
113
+ return detect_microservice_from_path(Path.cwd(), self.source_root)
114
+
115
+ def _log_detection(self) -> None:
116
+ if self.default_scope:
117
+ print(f"[scope] Detected microservice: {self.default_scope}", file=sys.stderr)
118
+ print(f"[scope] Queries scoped to {self.default_scope}", file=sys.stderr)
119
+ else:
120
+ print("[scope] No microservice detected (at project root)", file=sys.stderr)
121
+ print("[scope] Queries will span all microservices", file=sys.stderr)
122
+
123
+ def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
124
+ """Apply auto-detected scope to filter if no explicit microservice is set."""
125
+ if self.default_scope is None:
126
+ return node_filter
127
+ # Convert to dict for manipulation
128
+ if node_filter is None:
129
+ filter_dict = {}
130
+ else:
131
+ filter_dict = dict(node_filter)
132
+ # Only inject if user didn't specify microservice
133
+ if "microservice" not in filter_dict:
134
+ filter_dict["microservice"] = self.default_scope
135
+ return filter_dict
136
+
137
+
94
138
  def _resolve_lancedb_uri() -> str:
95
139
  raw = os.environ.get("JAVA_CODEBASE_RAG_INDEX_DIR", "").strip()
96
140
  if not raw:
97
- raw = str((Path.cwd() / ".java-codebase-rag").resolve())
141
+ raw = str((_project_root() / ".java-codebase-rag").resolve())
98
142
  p = Path(raw).expanduser()
99
143
  if not str(raw).startswith(("s3://", "gs://", "az://")):
100
144
  try:
@@ -108,7 +152,8 @@ def _project_root() -> Path:
108
152
  env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
109
153
  if env:
110
154
  return Path(env).expanduser().resolve()
111
- return Path.cwd().resolve()
155
+ discovered = discover_project_root(Path.cwd())
156
+ return discovered if discovered is not None else Path.cwd().resolve()
112
157
 
113
158
 
114
159
  def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
@@ -370,6 +415,7 @@ def create_mcp_server() -> FastMCP:
370
415
  ),
371
416
  ),
372
417
  ) -> mcp_v2.SearchOutput:
418
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
373
419
  return await asyncio.to_thread(
374
420
  mcp_v2.search_v2,
375
421
  query,
@@ -378,7 +424,7 @@ def create_mcp_server() -> FastMCP:
378
424
  limit,
379
425
  offset,
380
426
  path_contains,
381
- filter,
427
+ scoped_filter,
382
428
  None,
383
429
  )
384
430
 
@@ -413,7 +459,8 @@ def create_mcp_server() -> FastMCP:
413
459
  limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
414
460
  offset: int = Field(default=0, ge=0, le=499, description="Skip this many nodes (pagination)"),
415
461
  ) -> mcp_v2.FindOutput:
416
- return await asyncio.to_thread(mcp_v2.find_v2, kind, filter, limit, offset, None)
462
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
463
+ return await asyncio.to_thread(mcp_v2.find_v2, kind, scoped_filter, limit, offset, None)
417
464
 
418
465
  @mcp.tool(
419
466
  name="describe",
@@ -525,6 +572,7 @@ def create_mcp_server() -> FastMCP:
525
572
  ),
526
573
  ),
527
574
  ) -> mcp_v2.NeighborsOutput:
575
+ scoped_filter = _scope_manager.apply_auto_scope(filter) if _scope_manager else filter
528
576
  return await asyncio.to_thread(
529
577
  mcp_v2.neighbors_v2,
530
578
  ids,
@@ -532,7 +580,7 @@ def create_mcp_server() -> FastMCP:
532
580
  edge_types,
533
581
  limit,
534
582
  offset,
535
- filter,
583
+ scoped_filter,
536
584
  edge_filter,
537
585
  include_unresolved,
538
586
  dedup_calls,
@@ -580,6 +628,10 @@ def main() -> None:
580
628
  cfg.apply_to_os_environ()
581
629
  mcp_v2.set_hints_enabled(cfg.hints_enabled)
582
630
 
631
+ # Initialize scope manager for automatic microservice detection
632
+ global _scope_manager
633
+ _scope_manager = ScopeManager(cfg.source_root)
634
+
583
635
  asyncio.run(create_mcp_server().run_stdio_async())
584
636
 
585
637