java-codebase-rag 0.6.0__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ast_java.py CHANGED
@@ -13,6 +13,7 @@ Python with no tree-sitter dependency.
13
13
  from __future__ import annotations
14
14
 
15
15
  import posixpath
16
+ import sys
16
17
  from dataclasses import dataclass, field
17
18
  from functools import lru_cache
18
19
  from typing import Iterable
@@ -1642,9 +1643,17 @@ def _parse_codebase_http_client_annotation(
1642
1643
  pairs, _ = _annotation_kv_nodes(ann, src)
1643
1644
  client_kind = ""
1644
1645
  if "clientKind" in pairs:
1645
- val, _kind = _annotation_value(pairs["clientKind"], src)
1646
- if val and _kind == "enum":
1647
- client_kind = str(val)
1646
+ val, vkind = _annotation_value(pairs["clientKind"], src)
1647
+ if val and vkind == "enum":
1648
+ kind_val = str(val)
1649
+ from java_ontology import VALID_CLIENT_KINDS # deferred: java_ontology imports ast_java
1650
+ if kind_val in VALID_CLIENT_KINDS:
1651
+ client_kind = kind_val
1652
+ else:
1653
+ print(
1654
+ f"[lancedb-mcp] CodebaseHttpClient: invalid clientKind {kind_val!r} — ignored",
1655
+ file=sys.stderr,
1656
+ )
1648
1657
  target_service = ""
1649
1658
  if "targetService" in pairs:
1650
1659
  atoms = _string_value_atoms(pairs["targetService"], src, ctx)
@@ -1714,9 +1723,17 @@ def _parse_codebase_producer_annotation(
1714
1723
  client_kind = "kafka_send"
1715
1724
  kind_node = pairs.get("producerKind") or pairs.get("clientKind")
1716
1725
  if kind_node is not None:
1717
- val, _kind = _annotation_value(kind_node, src)
1718
- if val and _kind == "enum":
1719
- client_kind = str(val)
1726
+ val, vkind = _annotation_value(kind_node, src)
1727
+ if val and vkind == "enum":
1728
+ kind_val = str(val)
1729
+ from java_ontology import VALID_PRODUCER_KINDS # deferred: java_ontology imports ast_java
1730
+ if kind_val in VALID_PRODUCER_KINDS:
1731
+ client_kind = kind_val
1732
+ else:
1733
+ print(
1734
+ f"[lancedb-mcp] CodebaseProducer: invalid producerKind {kind_val!r} — ignored",
1735
+ file=sys.stderr,
1736
+ )
1720
1737
  topic = ""
1721
1738
  if "topic" in pairs:
1722
1739
  atoms = _string_value_atoms(pairs["topic"], src, ctx)
build_ast_graph.py CHANGED
@@ -3668,10 +3668,17 @@ def incremental_rebuild(
3668
3668
 
3669
3669
 
3670
3670
  def _init_hash_tracker(source_root: Path, ladybug_path: Path) -> int:
3671
- """Initialize hash tracker for all Java files. Returns number of files hashed."""
3671
+ """Initialize hash tracker for all Java files. Returns number of files hashed.
3672
+
3673
+ Called right after a full graph rebuild (``write_ladybug``), so the store must
3674
+ mirror exactly the files that were just indexed. We deliberately do NOT
3675
+ ``load()`` the existing store: ``update`` re-hashes every current file anyway,
3676
+ and preserving old entries would leave stale hashes for files that no longer
3677
+ exist (deleted or now-ignored). Those ghosts would be re-detected as "removed"
3678
+ on every subsequent ``increment``, sustaining an endless full-rebuild loop.
3679
+ """
3672
3680
  index_dir = ladybug_path.parent
3673
3681
  tracker = FileHashTracker(index_dir)
3674
- tracker.load()
3675
3682
  ignore = LayeredIgnore(source_root)
3676
3683
  all_files: set[str] = set()
3677
3684
  source_root_resolved = source_root.resolve()
@@ -3742,7 +3749,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3742
3749
 
3743
3750
  # Write declares_client edges
3744
3751
  for row in tables.declares_client_rows:
3745
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3752
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3746
3753
  conn.execute(_CREATE_DECLARES_CLIENT, {
3747
3754
  "sid": row.symbol_id,
3748
3755
  "cid": row.client_id,
@@ -3753,7 +3760,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3753
3760
 
3754
3761
  # Write declares_producer edges
3755
3762
  for row in tables.declares_producer_rows:
3756
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3763
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3757
3764
  conn.execute(_CREATE_DECLARES_PRODUCER, {
3758
3765
  "sid": row.symbol_id,
3759
3766
  "pid": row.producer_id,
java_codebase_rag/cli.py CHANGED
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
  import argparse
7
7
  import asyncio
8
8
  import json
9
+ import os
9
10
  import pprint
10
11
  import shutil
11
12
  import sys
@@ -930,5 +931,21 @@ def main(argv: list[str] | None = None) -> int:
930
931
  return 2
931
932
 
932
933
 
934
+ def _console_script_main() -> None:
935
+ """Real CLI entry: terminate without interpreter finalization.
936
+
937
+ A pyarrow/lance worker thread (loaded via lancedb in lifecycle commands) can
938
+ outlive CPython finalization in a one-shot CLI subprocess and trip
939
+ ``PyGILState_Release`` (SIGABRT, exit -6). Flushing + ``os._exit`` skips that
940
+ racy teardown — the command has already done its work and emitted its result.
941
+ ``main()`` stays return-based so in-process test callers (``cli.main(...)``)
942
+ keep working.
943
+ """
944
+ rc = main()
945
+ sys.stdout.flush()
946
+ sys.stderr.flush()
947
+ os._exit(rc)
948
+
949
+
933
950
  if __name__ == "__main__":
934
- raise SystemExit(main())
951
+ _console_script_main()
@@ -52,14 +52,36 @@ _DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
52
52
  _UNRESOLVED_VAR_RE = re.compile(r"\$(\w+|\{[^}]+\})")
53
53
 
54
54
 
55
- def maybe_expand_embedding_model_path(value: str) -> str:
56
- """Expand ``~`` and ``$VAR`` when *value* is path-shaped.
55
+ def maybe_expand_embedding_model_path(
56
+ value: str,
57
+ *,
58
+ config_dir: Path | None = None,
59
+ source_root: Path | None = None,
60
+ source: SettingSource | None = None,
61
+ ) -> str:
62
+ """Expand ``~`` / ``$VAR`` for path-shaped values and resolve relatives to absolute.
57
63
 
58
64
  Path-shape: starts with ``/``, ``./``, ``../``, ``~``, or contains ``$``.
59
65
  Plain ``org/name`` (hub id) does not match and is passed through unchanged.
60
66
 
61
- Used for ``embedding.model`` after precedence resolution and for runtime
62
- ``SBERT_MODEL`` reads (e.g. MCP) so the string matches ``ResolvedOperatorConfig``.
67
+ Relative resolution mirrors :func:`_resolve_index_dir_path` so a committed
68
+ config is portable regardless of process CWD:
69
+
70
+ * YAML values (``source == "yaml"``) resolve against ``config_dir`` (the
71
+ directory holding ``.java-codebase-rag.yml``).
72
+ * CLI / env values resolve against ``source_root``.
73
+
74
+ Only a result that still starts with ``./`` or ``../`` *after* ``~`` /
75
+ ``$VAR`` expansion is re-based — so hub ids (``org/name``), absolute paths,
76
+ ``~/``-expanded paths, and an env var that already yielded an absolute path
77
+ are all left untouched.
78
+
79
+ When no base is supplied (the runtime ``SBERT_MODEL`` read via
80
+ :func:`resolved_sbert_model_for_process_env`), relative resolution is
81
+ skipped: the value is returned ``expandvars`` / ``expanduser``-expanded but
82
+ not re-based, matching the prior best-effort behavior. The main resolution
83
+ path (:func:`resolve_operator_config`) supplies a base, so the absolute path
84
+ it stores is what downstream loaders receive.
63
85
  """
64
86
  needs_expand = value.startswith(("/", "./", "../", "~")) or "$" in value
65
87
  if not needs_expand:
@@ -70,9 +92,31 @@ def maybe_expand_embedding_model_path(value: str) -> str:
70
92
  f"java-codebase-rag: path-shaped model string contains unresolved variable: {expanded}",
71
93
  file=sys.stderr,
72
94
  )
95
+ if expanded.startswith(("./", "../")):
96
+ base = _embedding_model_base(
97
+ source=source, config_dir=config_dir, source_root=source_root
98
+ )
99
+ if base is not None:
100
+ return str((base / expanded).resolve())
73
101
  return expanded
74
102
 
75
103
 
104
+ def _embedding_model_base(
105
+ *,
106
+ source: SettingSource | None,
107
+ config_dir: Path | None,
108
+ source_root: Path | None,
109
+ ) -> Path | None:
110
+ """Base directory for a relative ``embedding.model``.
111
+
112
+ Mirrors :func:`_resolve_index_dir_path`: YAML values anchor on the config
113
+ file's directory; CLI / env values anchor on the resolved ``source_root``.
114
+ """
115
+ if source == "yaml":
116
+ return config_dir
117
+ return source_root
118
+
119
+
76
120
  def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
77
121
  """``SBERT_MODEL`` from the process environment, with the same expansion as YAML/CLI resolution.
78
122
 
@@ -306,9 +350,19 @@ def _pick_bool(
306
350
  def _resolve_index_dir_path(
307
351
  *,
308
352
  source_root: Path,
353
+ config_dir: Path,
309
354
  cli_index_dir: str | None,
310
355
  yaml_dict: dict[str, Any],
311
356
  ) -> tuple[Path, SettingSource]:
357
+ # Bases for relative paths:
358
+ # - YAML ``index_dir`` -> the config file's directory (``config_dir``),
359
+ # the SAME base used for YAML ``source_root``. Paths written in the
360
+ # config file are relative to the file, so both keys stay consistent.
361
+ # - CLI / env ``index_dir`` -> ``source_root`` (unchanged). These are not
362
+ # "in the config file"; preserving the existing base avoids a semantics
363
+ # change for operators who pass ``--index-dir`` on the command line.
364
+ # - Default ``./.java-codebase-rag`` -> ``source_root`` so the index sits
365
+ # beside the Java tree (the layout ``discover_project_root`` anchors on).
312
366
  raw_cli = cli_index_dir.strip() if isinstance(cli_index_dir, str) else None
313
367
  if raw_cli:
314
368
  p = Path(raw_cli).expanduser()
@@ -324,7 +378,7 @@ def _resolve_index_dir_path(
324
378
  idx = yaml_dict.get("index_dir")
325
379
  if isinstance(idx, str) and idx.strip():
326
380
  p = Path(idx.strip()).expanduser()
327
- out = p.resolve() if p.is_absolute() else (source_root / p).resolve()
381
+ out = p.resolve() if p.is_absolute() else (config_dir / p).resolve()
328
382
  return out, "yaml"
329
383
 
330
384
  return (source_root / ".java-codebase-rag").resolve(), "default"
@@ -368,7 +422,7 @@ def resolve_operator_config(
368
422
  root = config_dir
369
423
 
370
424
  index_dir, index_src = _resolve_index_dir_path(
371
- source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
425
+ source_root=root, config_dir=config_dir, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
372
426
  )
373
427
  model, model_src = _pick_str(
374
428
  cli_val=cli_embedding_model,
@@ -377,7 +431,12 @@ def resolve_operator_config(
377
431
  yaml_path=("embedding", "model"),
378
432
  default=_DEFAULT_EMBEDDING_MODEL,
379
433
  )
380
- model = maybe_expand_embedding_model_path(model)
434
+ model = maybe_expand_embedding_model_path(
435
+ model,
436
+ config_dir=config_dir,
437
+ source_root=root,
438
+ source=model_src,
439
+ )
381
440
  device, device_src = _pick_optional_device(
382
441
  cli_val=cli_embedding_device,
383
442
  env_key="SBERT_DEVICE",
@@ -759,6 +759,11 @@ def generate_yaml_config(
759
759
  else:
760
760
  config["embedding"].pop("model", None)
761
761
 
762
+ # Seed cross-service resolution safe-by-default: only evidence-backed cross-service
763
+ # edges survive (see _is_brownfield_sourced in build_ast_graph). setdefault preserves
764
+ # an explicit user choice (e.g. `auto`) on re-run update.
765
+ config.setdefault("cross_service_resolution", "brownfield_only")
766
+
762
767
  # Keys NOT written by installer (preserved if present):
763
768
  # - source_root (config.py resolves from walk-up discovery)
764
769
  # - index_dir (config.py defaults to <source_root>/.java-codebase-rag)
@@ -1250,9 +1255,14 @@ def run_update(
1250
1255
  print("Skipping index update.")
1251
1256
  return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1252
1257
 
1253
- # Resolve configuration
1258
+ # Resolve configuration. Pass source_root=None so the YAML ``source_root``
1259
+ # field is honored exactly like increment/init/reprocess — passing the
1260
+ # discovered config dir here routes resolve_operator_config into the
1261
+ # explicit-override branch that SKIPS the YAML field, which made `update`
1262
+ # point cocoindex at the config dir (no Java) against the real index and
1263
+ # mass-delete it. Discovery still runs against the CLI's cwd.
1254
1264
  try:
1255
- cfg = resolve_operator_config(source_root=project_root, cli_index_dir=None)
1265
+ cfg = resolve_operator_config(source_root=None, cli_index_dir=None)
1256
1266
  index_dir = cfg.index_dir
1257
1267
  except Exception as e:
1258
1268
  print(f"\nWarning: Failed to resolve configuration: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -1,33 +1,33 @@
1
- ast_java.py,sha256=TMesuv4SYqzkwfKxf_Pps0KaPLZNZOrhU8mL20bwqeQ,98882
1
+ ast_java.py,sha256=NQgZzstbsMq-PdowoD6r_ixJKxEEFzTP9xUzqDpiXeU,99661
2
2
  brownfield_events.py,sha256=yxXkKDgMb3VPtaiakGzncHM_EGnda8xIue6w90yYp8s,2055
3
- build_ast_graph.py,sha256=GNbjiIAwsXaJQ9Je6gbR-dB9SbnaLThya2pEw3tggQs,152396
3
+ build_ast_graph.py,sha256=OKigswkUmWwUAKXXRNH4zplw2VonIdWUWzVjC-t5roo,152893
4
4
  chunk_heuristics.py,sha256=aQk2NOKxzUdqoUAJUO3G3LE0MN_bYZWNLQ0tkmj5uts,1813
5
5
  graph_enrich.py,sha256=POT4LwSkTsrjUmP67bsm2UezUam70cunuPDYDh-v1Bs,63332
6
6
  index_common.py,sha256=HT6FKHFJ084eFvd3fR1j8z8gf4eWoPHVW8GXLpw464I,285
7
7
  java_index_flow_lancedb.py,sha256=MH9iTNF6HDHDTt5Jn7TOVE5hQ4WUPNt7PlQoh1tuh9o,13212
8
8
  java_index_v1_common.py,sha256=nF1KrSqboF_RRvWerG9knRRFmWwsrG_CvhgnsoZ8KqA,1154
9
- java_ontology.py,sha256=FcnOq1XWhUP03OfnTkRStslqrNyukzUKH7VNuK6Bme4,16425
9
+ java_ontology.py,sha256=71bCLDNvMy0SpZPzSR5apJ0qJXNd6y5ggkLdBEw_PFo,16682
10
10
  ladybug_queries.py,sha256=912j9VAYDjcU4ReVorWQ6R4DZl0tteKic-Pqu0jyBS0,90837
11
11
  mcp_hints.py,sha256=3swh05LSiWur3tm3-yssndBsLxIxFhy501kBtJI8jJ0,42509
12
- mcp_v2.py,sha256=64UDrQ27hAQtlz3pFp9A3Xlk95bUjYZ4VBscsyAPCIY,79116
12
+ mcp_v2.py,sha256=o94GJI7j6dLJDIA3R_1ZiQhjzQfMAEW3etdeZYnHOUc,80637
13
13
  path_filtering.py,sha256=-oX16SYLWYwX9pcV1fu3vbVTIhY1GzFflT7J1E2tqPY,17122
14
14
  pr_analysis.py,sha256=3-5L8_G5XupdJsl9RN73Lq-ejPoK11B3m_VzAx2fGG8,18413
15
15
  search_lancedb.py,sha256=scG6HBUrsgIeSWFrGcLcGdhWv1qODOx4JOBMAlLDY_E,36793
16
- server.py,sha256=uGKT0PdM-bVrzIsfbxF6ZuHGcuRMSSlvkJk0e7Ff43Y,30556
16
+ server.py,sha256=Js3XDpV7ThAtj352StH6QdhHutf1D5qUkbR-8k3jO8g,31303
17
17
  java_codebase_rag/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
18
18
  java_codebase_rag/_fdlimit.py,sha256=WroFdfSNbcriKok6q8znTf74dqlznxea_1Fd5bHl_3o,1930
19
- java_codebase_rag/cli.py,sha256=a5IFLWAsh77mfLv1Z9OdpvLaYvj4i0KR3_kLtL-ans8,34156
19
+ java_codebase_rag/cli.py,sha256=HkzCP8-G3WlCzoXcVCI2K3forDOMpTmUSoxgx3jbKk4,34774
20
20
  java_codebase_rag/cli_format.py,sha256=arU7P9W6Fvm7X_wzR1wJ8EfyxK1rDP_ESEhdA0ub4Mo,2579
21
21
  java_codebase_rag/cli_progress.py,sha256=9jCqEagYOXs32SYVA31_sOCrONvYy7cl1CrdBD2Pg44,3168
22
- java_codebase_rag/config.py,sha256=u4OomvglTWHUmMpcxN8wPRnRGfXVp3qK_GJ5pY96O98,16267
23
- java_codebase_rag/installer.py,sha256=DlBuVVWbHXgcjaQkuXUeT9fNdmk7XZefVT3zzw47k18,45965
22
+ java_codebase_rag/config.py,sha256=bfwYI4R8PU9YV_M4r8-03iaUZ_0TW-qN_NuhIsDXy2M,18769
23
+ java_codebase_rag/installer.py,sha256=sE0l85K_o291PdpF1vpesefR9VgdvvVeARXrpTxa30A,46689
24
24
  java_codebase_rag/lance_optimize.py,sha256=MzACYlgwxmkJCK64qQLyIAdizSq5BARqaMYSZONlc1I,6069
25
25
  java_codebase_rag/pipeline.py,sha256=UcgluFAW9Ghnas8u40x45bVic0mQv6rjzcliDKsnYJI,11936
26
26
  java_codebase_rag/install_data/agents/explorer-rag-enhanced.md,sha256=APl9d-No12qZNZLjU7mwNRwxHIgnT3ZtQZiD4clWlyU,14413
27
27
  java_codebase_rag/install_data/skills/explore-codebase/SKILL.md,sha256=pIM-Xdwq_fXkhhBJCdb-fA2nes5c_mMPcdUXb7Adyxo,12040
28
- java_codebase_rag-0.6.0.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
29
- java_codebase_rag-0.6.0.dist-info/METADATA,sha256=GoMO3zFTb98w4rVV5SMXpcLK-irlDs7aUH0wBGlv5cQ,16887
30
- java_codebase_rag-0.6.0.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
31
- java_codebase_rag-0.6.0.dist-info/entry_points.txt,sha256=mVVQJa0n73OWfhHXYCDoPRrWin_LJhH2Rn0CkJ2iax4,101
32
- java_codebase_rag-0.6.0.dist-info/top_level.txt,sha256=syQgi8XPBwY2ws_NZ1uRCxTf_s41NpshwEHNdcdnk3A,245
33
- java_codebase_rag-0.6.0.dist-info/RECORD,,
28
+ java_codebase_rag-0.6.2.dist-info/licenses/LICENSE,sha256=gxvtiHtuviR_q8ZAjWw-QTcF3DyPzg6ZY-lQrr8OPpw,1068
29
+ java_codebase_rag-0.6.2.dist-info/METADATA,sha256=X92kaZ5TbEacz0sznWtUtpYEJvBdWIXX0s8MlqOeRyg,16934
30
+ java_codebase_rag-0.6.2.dist-info/WHEEL,sha256=aeYiig01lYGDzBgS8HxWXOg3uV61G9ijOsup-k9o1sk,91
31
+ java_codebase_rag-0.6.2.dist-info/entry_points.txt,sha256=wsPZwot0Ui4JI3TIgW8LcbN8bNtKFbwQAlHAAJXfYgQ,117
32
+ java_codebase_rag-0.6.2.dist-info/top_level.txt,sha256=syQgi8XPBwY2ws_NZ1uRCxTf_s41NpshwEHNdcdnk3A,245
33
+ java_codebase_rag-0.6.2.dist-info/RECORD,,
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ java-codebase-rag = java_codebase_rag.cli:_console_script_main
3
+ java-codebase-rag-mcp = server:main
java_ontology.py CHANGED
@@ -15,7 +15,10 @@ from ast_java import (
15
15
  _TYPE_ANN_TO_CAPABILITY,
16
16
  )
17
17
 
18
- # Roles: Spring stereotype values plus DTO from `infer_role_for_type`.
18
+ # Roles assignable by indexing: Spring stereotype values plus DTO. ``OTHER`` is the
19
+ # built-in inference fallback (ast_java.infer_role when nothing matches) and is
20
+ # deliberately excluded here — it is a read-side value (the mcp_v2 ``Role`` enum
21
+ # includes it) but not a role a user may set via @CodebaseRole / role_overrides.
19
22
  VALID_ROLES: frozenset[str] = frozenset((*ROLE_ANNOTATIONS.values(), "DTO"))
20
23
 
21
24
  VALID_CAPABILITIES: frozenset[str] = frozenset(
mcp_v2.py CHANGED
@@ -48,6 +48,22 @@ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
48
48
 
49
49
  DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
50
50
 
51
+ # Closed value taxonomies surfaced to MCP consumers as enums. Sources of truth:
52
+ # Role — VALID_ROLES in java_ontology.py + the "OTHER" inference fallback (ast_java.infer_role)
53
+ # Framework — hardcoded literals across ast_java.py / build_ast_graph.py
54
+ # SourceLayer — exhaustive classifier build_ast_graph._client_source_layer / _producer_source_layer
55
+ # ClientKind — VALID_CLIENT_KINDS in java_ontology.py (every producer validated at index time)
56
+ # ProducerKind — VALID_PRODUCER_KINDS in java_ontology.py (every producer validated at index time)
57
+ # Keep these in sync with the indexing-side taxonomies if they change.
58
+ Role = Literal[
59
+ "CONTROLLER", "SERVICE", "REPOSITORY", "COMPONENT", "CONFIG",
60
+ "ENTITY", "CLIENT", "MAPPER", "DTO", "OTHER",
61
+ ]
62
+ Framework = Literal["spring_mvc", "webflux", "kafka", "rabbitmq", "jms", "stream", "feign", ""]
63
+ SourceLayer = Literal["builtin", "layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"]
64
+ ClientKind = Literal["feign_method", "rest_template", "web_client"]
65
+ ProducerKind = Literal["kafka_send", "stream_bridge_send"]
66
+
51
67
  # Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
52
68
  # dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
53
69
  EdgeType = Literal[
@@ -133,21 +149,30 @@ class NodeFilter(BaseModel):
133
149
 
134
150
  microservice: str | None = None
135
151
  module: str | None = None
136
- source_layer: str | None = None
137
- role: str | None = None
138
- exclude_roles: list[str] | None = None
152
+ source_layer: SourceLayer | None = None
153
+ role: Role | None = None
154
+ exclude_roles: list[Role] | None = None
139
155
  annotation: str | None = None
140
156
  capability: str | None = None
141
157
  fqn_prefix: str | None = None
142
158
  symbol_kind: DeclarationSymbolKind | None = None
143
159
  symbol_kinds: list[DeclarationSymbolKind] | None = None
144
- http_method: str | None = None
160
+ http_method: str | None = Field(
161
+ default=None,
162
+ description="HTTP verb (commonly GET/POST/PUT/DELETE/PATCH; user route annotations may yield others).",
163
+ )
145
164
  path_prefix: str | None = None
146
- framework: str | None = None
147
- client_kind: str | None = None
165
+ framework: Framework | None = None
166
+ client_kind: ClientKind | None = Field(
167
+ default=None,
168
+ description="Outbound HTTP client kind: feign_method, rest_template, or web_client.",
169
+ )
148
170
  target_service: str | None = None
149
171
  target_path_prefix: str | None = None
150
- producer_kind: str | None = None
172
+ producer_kind: ProducerKind | None = Field(
173
+ default=None,
174
+ description="Outbound async producer kind: kafka_send or stream_bridge_send.",
175
+ )
151
176
  topic_prefix: str | None = None
152
177
 
153
178
 
@@ -157,9 +182,9 @@ class EdgeFilter(BaseModel):
157
182
  min_confidence: float | None = None
158
183
  exclude_strategies: list[str] | None = None
159
184
  include_strategies: list[str] | None = None
160
- callee_declaring_role: str | None = None
161
- callee_declaring_roles: list[str] | None = None
162
- exclude_callee_declaring_roles: list[str] | None = None
185
+ callee_declaring_role: Role | None = None
186
+ callee_declaring_roles: list[Role] | None = None
187
+ exclude_callee_declaring_roles: list[Role] | None = None
163
188
 
164
189
  @model_validator(mode="after")
165
190
  def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
server.py CHANGED
@@ -7,7 +7,7 @@ import os
7
7
  import sys
8
8
  import time
9
9
  from pathlib import Path
10
- from typing import Any, Literal
10
+ from typing import Literal
11
11
 
12
12
  import mcp_v2
13
13
  from index_common import SBERT_MODEL
@@ -31,14 +31,14 @@ from search_lancedb import TABLES
31
31
 
32
32
  _COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
33
33
  _INSTRUCTIONS = (
34
- "Java codebase graph navigator (LanceDB + Ladybug). "
34
+ "Java codebase graph navigator over an indexed Java codebase. "
35
35
  "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), "
36
36
  "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), "
37
- "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many/none). "
38
- "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. "
37
+ "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses: one | many | none). "
39
38
  "Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. "
39
+ "Successful responses from any tool may include `hints_structured` (tool call suggestions with a `reason` field) and `advisories` (pure informational text) when hints are enabled. "
40
40
  "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS; "
41
- "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only). "
41
+ "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only, type Symbol origin). "
42
42
  "Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP."
43
43
  )
44
44
 
@@ -123,19 +123,15 @@ class ScopeManager:
123
123
  print("[scope] No microservice detected (at project root)", file=sys.stderr)
124
124
  print("[scope] Queries will span all microservices", file=sys.stderr)
125
125
 
126
- def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
126
+ def apply_auto_scope(self, node_filter: mcp_v2.NodeFilter | None) -> mcp_v2.NodeFilter | None:
127
127
  """Apply auto-detected scope to filter if no explicit microservice is set."""
128
128
  if self.default_scope is None:
129
129
  return node_filter
130
- # Convert to dict for manipulation
131
130
  if node_filter is None:
132
- filter_dict = {}
133
- else:
134
- filter_dict = dict(node_filter)
135
- # Only inject if user didn't specify microservice
136
- if "microservice" not in filter_dict:
137
- filter_dict["microservice"] = self.default_scope
138
- return filter_dict
131
+ return mcp_v2.NodeFilter(microservice=self.default_scope)
132
+ if node_filter.microservice is None:
133
+ return node_filter.model_copy(update={"microservice": self.default_scope})
134
+ return node_filter
139
135
 
140
136
 
141
137
  def _resolve_lancedb_uri() -> str:
@@ -159,6 +155,27 @@ def _project_root() -> Path:
159
155
  return discovered if discovered is not None else Path.cwd().resolve()
160
156
 
161
157
 
158
+ def _source_root_for_operator_config() -> Path | None:
159
+ """``source_root`` arg to hand ``resolve_operator_config`` from the MCP server.
160
+
161
+ Returns ``JAVA_CODEBASE_RAG_SOURCE_ROOT`` when set (an explicit operator
162
+ override that wins and suppresses the YAML ``source_root`` field, exactly
163
+ like CLI ``--source-root``), otherwise ``None`` — so
164
+ ``resolve_operator_config`` runs its OWN walk-up discovery and HONORS the
165
+ YAML ``source_root`` field, matching the CLI (``init`` / ``increment`` /
166
+ ``reprocess``) path.
167
+
168
+ Do NOT pass ``_project_root()`` (the walk-up-discovered dir) here: a
169
+ non-``None`` value routes into the "explicit source root" branch that
170
+ skips the YAML ``source_root`` field, which made the MCP server and the
171
+ CLI resolve different ``source_root`` / ``index_dir`` from the same config
172
+ file (the init-vs-MCP index_dir divergence). ``_project_root()`` is kept
173
+ only for the ``_resolve_lancedb_uri()`` fallback below.
174
+ """
175
+ env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
176
+ return Path(env).expanduser().resolve() if env else None
177
+
178
+
162
179
  def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
163
180
  sub_env = os.environ.copy()
164
181
  sub_env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(project_root)
@@ -413,14 +430,15 @@ def create_mcp_server() -> FastMCP:
413
430
  @mcp.tool(
414
431
  name="search",
415
432
  description=(
416
- "Ranked chunk retrieval: `query` is opaque text (natural language or code fragments); "
417
- "results are score-ranked, not boolean-matched. Optional `filter` uses the same NodeFilter "
418
- "schema as `find` but only **symbol-applicable** fields apply (strict frame). Wildcards "
433
+ "Ranked chunk retrieval over content tables (java/sql/yaml); `query` is opaque text (natural language or code "
434
+ "fragments) and results are score-ranked, not boolean-matched. For graph-structured listing "
435
+ "(symbols/routes/clients/producers) use `find`, not `search`. Optional `filter` uses the same NodeFilter "
436
+ "schema as `find` but only **symbol-applicable** fields apply — others return success=false. Wildcards "
419
437
  "(`*`, `?`) in prefix fields are rejected—use ranked `query` text instead. There is **no** "
420
438
  "structured DSL inside `query`; structured predicates belong in `find`. "
421
439
  "For identifier-shaped lookups (FQN, id prefix, route/client identifiers, …), use `resolve` first; "
422
440
  "use `search` for natural-language or ranked fuzzy discovery. "
423
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
441
+ "Successful responses echo `limit`/`offset`."
424
442
  ),
425
443
  )
426
444
  async def search(
@@ -431,7 +449,7 @@ def create_mcp_server() -> FastMCP:
431
449
  ),
432
450
  hybrid: bool = Field(
433
451
  default=False,
434
- description="If true, fuse FTS + vector (single-table java/sql/yaml only)",
452
+ description="If true, fuse FTS + vector. Requires a single table (java/sql/yaml); hybrid with table='all' returns success=false.",
435
453
  ),
436
454
  limit: int = Field(default=5, ge=1, le=50, description="Max hits to return"),
437
455
  offset: int = Field(default=0, ge=0, le=500, description="Skip this many hits (pagination)"),
@@ -439,11 +457,11 @@ def create_mcp_server() -> FastMCP:
439
457
  default=None,
440
458
  description="Substring match on file path (pre-filter from index)",
441
459
  ),
442
- filter: dict[str, Any] | str | None = Field(
460
+ filter: mcp_v2.NodeFilter | None = Field(
443
461
  default=None,
444
462
  description=(
445
- "Optional NodeFilter post-filter on symbol-oriented hit rows. Unknown keys or populated fields not "
446
- "applicable to symbols return success=false. Prefer a JSON object; a JSON-encoded string is accepted."
463
+ "Optional NodeFilter post-filter on symbol-oriented hit rows. An empty object or omitted means no "
464
+ "predicate. Unknown keys or populated fields not applicable to symbols return success=false."
447
465
  ),
448
466
  ),
449
467
  ) -> mcp_v2.SearchOutput:
@@ -468,9 +486,11 @@ def create_mcp_server() -> FastMCP:
468
486
  "**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, "
469
487
  "source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, "
470
488
  "module, source_layer, producer_kind, topic_prefix. "
489
+ "`role` is singular and `exclude_roles` plural; `capability` is a functional tag assigned during indexing. "
490
+ "`fqn_prefix` is a prefix predicate — for exact FQN or id lookup use `resolve`/`describe`. "
471
491
  "Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of "
472
492
  "that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. "
473
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
493
+ "Successful responses echo `limit`/`offset`."
474
494
  ),
475
495
  )
476
496
  async def find(
@@ -481,11 +501,10 @@ def create_mcp_server() -> FastMCP:
481
501
  "'producer' = outbound async producers."
482
502
  )
483
503
  ),
484
- filter: dict[str, Any] | str = Field(
504
+ filter: mcp_v2.NodeFilter = Field(
485
505
  ...,
486
506
  description=(
487
- "Required NodeFilter dict (extra keys forbidden). Fields must be applicable to `kind`. "
488
- "Prefer a JSON object; a JSON-encoded string is accepted."
507
+ "Required NodeFilter object (extra keys forbidden). Fields must be applicable to `kind`."
489
508
  ),
490
509
  ),
491
510
  limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
@@ -497,17 +516,14 @@ def create_mcp_server() -> FastMCP:
497
516
  @mcp.tool(
498
517
  name="describe",
499
518
  description=(
500
- "Full node record plus `edge_summary` (in/out counts per stored edge label, plus optional describe-time keys). Type Symbols may add "
501
- "composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, and DECLARES.EXPOSES (navigable on type Symbols via neighbors, out only); "
502
- "method Symbols may add override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.DECLARES_PRODUCER, "
503
- "OVERRIDDEN_BY.EXPOSES, plus an `OVERRIDES` map entry that merges stored `[:OVERRIDES]` counts with the dispatch-up rollup per direction). "
504
- "Override-axis virtual keys are navigable via neighbors on non-static method Symbol origins "
505
- "(out only; composed keys include via_id in attrs). The stored `OVERRIDES` relationship "
506
- "is also a normal edge label (e.g. direction in from declaration toward overriders). "
519
+ "Full node record plus `edge_summary` (in/out counts per stored edge label). For type Symbols, `edge_summary` "
520
+ "also exposes composed keys (DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES); for "
521
+ "non-static method Symbols it adds override-axis virtual keys (OVERRIDDEN_BY and its composed forms, plus an "
522
+ "`OVERRIDES` map merging stored `[:OVERRIDES]` counts with the dispatch-up rollup). These composed/override keys "
523
+ "are out-only and navigable via `neighbors`; the stored `OVERRIDES` is also a normal edge label (in toward declaration). "
507
524
  "Pass `id` for any kind, or exact `fqn` for Symbol lookup (`id` wins when both are set). "
508
525
  "`describe(fqn=…)` keeps the first graph row when multiple symbols share that FQN; when an FQN may collide, "
509
- "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node. "
510
- "Successful responses may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
526
+ "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node."
511
527
  ),
512
528
  )
513
529
  async def describe(
@@ -531,18 +547,19 @@ def create_mcp_server() -> FastMCP:
531
547
  @mcp.tool(
532
548
  name="neighbors",
533
549
  description=(
534
- "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (stored labels for one hop; "
535
- "type Symbol origins may also pass composed DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, or DECLARES.EXPOSES "
536
- "for 2-hop member rollups; method Symbol origins may pass OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, "
537
- "OVERRIDDEN_BY.DECLARES_PRODUCER, OVERRIDDEN_BY.EXPOSES for override-axis rollups — out only, via_id in "
538
- "attrs on composed keys). "
550
+ "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (one hop over stored edge "
551
+ "labels; type/method Symbol origins may also pass composed or override-axis keys — see `edge_types`). From a "
552
+ "type Symbol, `direction='out'` with EXPOSES yields route nodes and HTTP_CALLS/ASYNC_CALLS yield client/producer "
553
+ "nodes; `direction='in'` reverses each relationship. "
554
+ "`direction` and `edge_types` have no defaults; an empty `edge_types` fails. The CALLS-only features — "
555
+ "`edge_filter`, `include_unresolved`, `dedup_calls` — each require `edge_types=['CALLS']`; `edge_filter` and "
556
+ "`include_unresolved` are mutually exclusive. Violating a precondition (wrong CALLS context, composed/override "
557
+ "keys on an ineligible origin or with `direction='in'`, wildcards in prefix fields, unknown filter keys) returns "
558
+ "success=false with a message; `dedup_calls` with other edge_types is a silent no-op. "
539
559
  "Optional `filter` applies to each neighbor endpoint row; populated fields must be applicable to that "
540
- "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (strict frame). "
541
- "Optional `edge_filter` requires edge_types=['CALLS'] only (no composed dot-keys or extra stored "
542
- "labels); projects the ordered CALLS stream by edge attributes (min_confidence, strategies, "
543
- "callee_declaring_role). Wildcards in prefix fields are rejected. Unknown filter keys return success=false. "
544
- "Successful responses echo `requested_edge_types` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text). "
545
- "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths)."
560
+ "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (per-neighbor strict frame). "
561
+ "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths). "
562
+ "Successful responses echo `requested_edge_types`."
546
563
  ),
547
564
  )
548
565
  async def neighbors(
@@ -573,19 +590,19 @@ def create_mcp_server() -> FastMCP:
573
590
  le=1000,
574
591
  description="Skip this many edges after merge (pagination)",
575
592
  ),
576
- filter: dict[str, Any] | str | None = Field(
593
+ filter: mcp_v2.NodeFilter | None = Field(
577
594
  default=None,
578
595
  description=(
579
- "Optional NodeFilter on the neighbor node. Same applicability rules as `find` for that node's kind. "
580
- "Prefer a JSON object; a JSON-encoded string is accepted."
596
+ "Optional NodeFilter on the neighbor node. An empty object or omitted means no predicate. "
597
+ "Same applicability rules as `find` for that node's kind."
581
598
  ),
582
599
  ),
583
- edge_filter: dict[str, Any] | str | None = Field(
600
+ edge_filter: mcp_v2.EdgeFilter | None = Field(
584
601
  default=None,
585
602
  description=(
586
603
  "Optional EdgeFilter on CALLS edge attributes (edge_types=['CALLS'] only). Use "
587
604
  "callee_declaring_role for callee stereotype projection — not NodeFilter.role on method neighbors. "
588
- "Mutually exclusive with include_unresolved. Prefer a JSON object; a JSON-encoded string is accepted."
605
+ "Mutually exclusive with include_unresolved."
589
606
  ),
590
607
  ),
591
608
  include_unresolved: bool = Field(
@@ -627,10 +644,11 @@ def create_mcp_server() -> FastMCP:
627
644
  "status=one (single node), many (≥2 ranked candidates with reason), or none "
628
645
  "(no match — fall back to search(query=...) for natural language or fuzzy text). "
629
646
  "Optional hint_kind narrows to symbol, route, client, or producer. "
630
- "Successful responses may include hints_structured (tool call suggestions with `reason` field) and advisories (pure informational text) — same contract as other v2 tools. "
631
647
  "Malformed empty/whitespace identifier returns success=false. "
632
648
  "Examples: resolve('com.foo.Bar', hint_kind='symbol'); "
633
649
  "resolve('GET /api/v1/customers', hint_kind='route'); "
650
+ "resolve('PaymentClient', hint_kind='client'); "
651
+ "resolve('order.created', hint_kind='producer'); "
634
652
  "resolve('the client that handles assignments') → none (use search instead)."
635
653
  ),
636
654
  )
@@ -657,7 +675,7 @@ def main() -> None:
657
675
  # Load YAML config and apply embedding settings to environment
658
676
  # This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available
659
677
  # before any tool handler runs (same behavior as CLI path)
660
- cfg = resolve_operator_config(source_root=_project_root())
678
+ cfg = resolve_operator_config(source_root=_source_root_for_operator_config())
661
679
  cfg.apply_to_os_environ()
662
680
  mcp_v2.set_hints_enabled(cfg.hints_enabled)
663
681
 
@@ -1,3 +0,0 @@
1
- [console_scripts]
2
- java-codebase-rag = java_codebase_rag.cli:main
3
- java-codebase-rag-mcp = server:main