java-codebase-rag 0.6.0__tar.gz → 0.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. {java_codebase_rag-0.6.0/java_codebase_rag.egg-info → java_codebase_rag-0.6.1}/PKG-INFO +2 -1
  2. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/ast_java.py +23 -6
  3. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/build_ast_graph.py +11 -4
  4. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/config.py +12 -2
  5. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1/java_codebase_rag.egg-info}/PKG-INFO +2 -1
  6. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag.egg-info/requires.txt +1 -0
  7. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_ontology.py +4 -1
  8. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/mcp_v2.py +35 -10
  9. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/pyproject.toml +2 -1
  10. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/server.py +71 -53
  11. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_brownfield_clients.py +57 -0
  12. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_config.py +51 -0
  13. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_incremental_graph.py +140 -0
  14. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_java_codebase_rag_cli.py +55 -0
  15. java_codebase_rag-0.6.1/tests/test_mcp_server_project_root.py +81 -0
  16. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_microservice_scope.py +20 -13
  17. java_codebase_rag-0.6.0/tests/test_mcp_server_project_root.py +0 -25
  18. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/LICENSE +0 -0
  19. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/README.md +0 -0
  20. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/brownfield_events.py +0 -0
  21. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/chunk_heuristics.py +0 -0
  22. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/graph_enrich.py +0 -0
  23. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/index_common.py +0 -0
  24. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/__init__.py +0 -0
  25. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/_fdlimit.py +0 -0
  26. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/cli.py +0 -0
  27. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/cli_format.py +0 -0
  28. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/cli_progress.py +0 -0
  29. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md +0 -0
  30. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +0 -0
  31. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/installer.py +0 -0
  32. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/lance_optimize.py +0 -0
  33. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag/pipeline.py +0 -0
  34. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag.egg-info/SOURCES.txt +0 -0
  35. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
  36. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag.egg-info/entry_points.txt +0 -0
  37. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_codebase_rag.egg-info/top_level.txt +0 -0
  38. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_index_flow_lancedb.py +0 -0
  39. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/java_index_v1_common.py +0 -0
  40. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/ladybug_queries.py +0 -0
  41. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/mcp_hints.py +0 -0
  42. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/path_filtering.py +0 -0
  43. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/pr_analysis.py +0 -0
  44. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/search_lancedb.py +0 -0
  45. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/setup.cfg +0 -0
  46. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_agent_skills_static.py +0 -0
  47. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_assign_endpoint_client_extraction.py +0 -0
  48. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_ast_graph_build.py +0 -0
  49. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_ast_java_calls.py +0 -0
  50. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_ast_java_capabilities.py +0 -0
  51. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_bank_chat_brownfield_integration.py +0 -0
  52. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_brownfield_events.py +0 -0
  53. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_brownfield_overrides.py +0 -0
  54. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_brownfield_routes.py +0 -0
  55. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_call_edge_matching.py +0 -0
  56. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_call_edges_e2e.py +0 -0
  57. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_call_graph_receiver_resolution.py +0 -0
  58. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_call_graph_smoke_roundtrip.py +0 -0
  59. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_call_invariant.py +0 -0
  60. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_cli_progress_stdout_invariant.py +0 -0
  61. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_cli_quiet_parity.py +0 -0
  62. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_client_hint_recovery.py +0 -0
  63. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_client_node_extraction.py +0 -0
  64. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_client_role_rename.py +0 -0
  65. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_cross_service_resolution_flag.py +0 -0
  66. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_edge_navigation_doc.py +0 -0
  67. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_fd_limit.py +0 -0
  68. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_feign_not_exposer.py +0 -0
  69. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_graph_enrich.py +0 -0
  70. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_installer.py +0 -0
  71. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_installer_integration.py +0 -0
  72. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_ladybug_queries.py +0 -0
  73. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_lance_optimize.py +0 -0
  74. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_lancedb_e2e.py +0 -0
  75. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_mcp_hints.py +0 -0
  76. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_mcp_tools.py +0 -0
  77. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_mcp_v2.py +0 -0
  78. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_mcp_v2_compose.py +0 -0
  79. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_meta_chain_core.py +0 -0
  80. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_outgoing_call_extraction.py +0 -0
  81. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_packaging_metadata.py +0 -0
  82. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_path_filtering.py +0 -0
  83. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_pr_analysis.py +0 -0
  84. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
  85. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_route_extraction.py +0 -0
  86. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_schema_consistency.py +0 -0
  87. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_search_lancedb.py +0 -0
  88. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_search_lancedb_capability.py +0 -0
  89. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.1}/tests/test_string_value_atoms.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -13,6 +13,7 @@ Python with no tree-sitter dependency.
13
13
  from __future__ import annotations
14
14
 
15
15
  import posixpath
16
+ import sys
16
17
  from dataclasses import dataclass, field
17
18
  from functools import lru_cache
18
19
  from typing import Iterable
@@ -1642,9 +1643,17 @@ def _parse_codebase_http_client_annotation(
1642
1643
  pairs, _ = _annotation_kv_nodes(ann, src)
1643
1644
  client_kind = ""
1644
1645
  if "clientKind" in pairs:
1645
- val, _kind = _annotation_value(pairs["clientKind"], src)
1646
- if val and _kind == "enum":
1647
- client_kind = str(val)
1646
+ val, vkind = _annotation_value(pairs["clientKind"], src)
1647
+ if val and vkind == "enum":
1648
+ kind_val = str(val)
1649
+ from java_ontology import VALID_CLIENT_KINDS # deferred: java_ontology imports ast_java
1650
+ if kind_val in VALID_CLIENT_KINDS:
1651
+ client_kind = kind_val
1652
+ else:
1653
+ print(
1654
+ f"[lancedb-mcp] CodebaseHttpClient: invalid clientKind {kind_val!r} — ignored",
1655
+ file=sys.stderr,
1656
+ )
1648
1657
  target_service = ""
1649
1658
  if "targetService" in pairs:
1650
1659
  atoms = _string_value_atoms(pairs["targetService"], src, ctx)
@@ -1714,9 +1723,17 @@ def _parse_codebase_producer_annotation(
1714
1723
  client_kind = "kafka_send"
1715
1724
  kind_node = pairs.get("producerKind") or pairs.get("clientKind")
1716
1725
  if kind_node is not None:
1717
- val, _kind = _annotation_value(kind_node, src)
1718
- if val and _kind == "enum":
1719
- client_kind = str(val)
1726
+ val, vkind = _annotation_value(kind_node, src)
1727
+ if val and vkind == "enum":
1728
+ kind_val = str(val)
1729
+ from java_ontology import VALID_PRODUCER_KINDS # deferred: java_ontology imports ast_java
1730
+ if kind_val in VALID_PRODUCER_KINDS:
1731
+ client_kind = kind_val
1732
+ else:
1733
+ print(
1734
+ f"[lancedb-mcp] CodebaseProducer: invalid producerKind {kind_val!r} — ignored",
1735
+ file=sys.stderr,
1736
+ )
1720
1737
  topic = ""
1721
1738
  if "topic" in pairs:
1722
1739
  atoms = _string_value_atoms(pairs["topic"], src, ctx)
@@ -3668,10 +3668,17 @@ def incremental_rebuild(
3668
3668
 
3669
3669
 
3670
3670
  def _init_hash_tracker(source_root: Path, ladybug_path: Path) -> int:
3671
- """Initialize hash tracker for all Java files. Returns number of files hashed."""
3671
+ """Initialize hash tracker for all Java files. Returns number of files hashed.
3672
+
3673
+ Called right after a full graph rebuild (``write_ladybug``), so the store must
3674
+ mirror exactly the files that were just indexed. We deliberately do NOT
3675
+ ``load()`` the existing store: ``update`` re-hashes every current file anyway,
3676
+ and preserving old entries would leave stale hashes for files that no longer
3677
+ exist (deleted or now-ignored). Those ghosts would be re-detected as "removed"
3678
+ on every subsequent ``increment``, sustaining an endless full-rebuild loop.
3679
+ """
3672
3680
  index_dir = ladybug_path.parent
3673
3681
  tracker = FileHashTracker(index_dir)
3674
- tracker.load()
3675
3682
  ignore = LayeredIgnore(source_root)
3676
3683
  all_files: set[str] = set()
3677
3684
  source_root_resolved = source_root.resolve()
@@ -3742,7 +3749,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3742
3749
 
3743
3750
  # Write declares_client edges
3744
3751
  for row in tables.declares_client_rows:
3745
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3752
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3746
3753
  conn.execute(_CREATE_DECLARES_CLIENT, {
3747
3754
  "sid": row.symbol_id,
3748
3755
  "cid": row.client_id,
@@ -3753,7 +3760,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3753
3760
 
3754
3761
  # Write declares_producer edges
3755
3762
  for row in tables.declares_producer_rows:
3756
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3763
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3757
3764
  conn.execute(_CREATE_DECLARES_PRODUCER, {
3758
3765
  "sid": row.symbol_id,
3759
3766
  "pid": row.producer_id,
@@ -306,9 +306,19 @@ def _pick_bool(
306
306
  def _resolve_index_dir_path(
307
307
  *,
308
308
  source_root: Path,
309
+ config_dir: Path,
309
310
  cli_index_dir: str | None,
310
311
  yaml_dict: dict[str, Any],
311
312
  ) -> tuple[Path, SettingSource]:
313
+ # Bases for relative paths:
314
+ # - YAML ``index_dir`` -> the config file's directory (``config_dir``),
315
+ # the SAME base used for YAML ``source_root``. Paths written in the
316
+ # config file are relative to the file, so both keys stay consistent.
317
+ # - CLI / env ``index_dir`` -> ``source_root`` (unchanged). These are not
318
+ # "in the config file"; preserving the existing base avoids a semantics
319
+ # change for operators who pass ``--index-dir`` on the command line.
320
+ # - Default ``./.java-codebase-rag`` -> ``source_root`` so the index sits
321
+ # beside the Java tree (the layout ``discover_project_root`` anchors on).
312
322
  raw_cli = cli_index_dir.strip() if isinstance(cli_index_dir, str) else None
313
323
  if raw_cli:
314
324
  p = Path(raw_cli).expanduser()
@@ -324,7 +334,7 @@ def _resolve_index_dir_path(
324
334
  idx = yaml_dict.get("index_dir")
325
335
  if isinstance(idx, str) and idx.strip():
326
336
  p = Path(idx.strip()).expanduser()
327
- out = p.resolve() if p.is_absolute() else (source_root / p).resolve()
337
+ out = p.resolve() if p.is_absolute() else (config_dir / p).resolve()
328
338
  return out, "yaml"
329
339
 
330
340
  return (source_root / ".java-codebase-rag").resolve(), "default"
@@ -368,7 +378,7 @@ def resolve_operator_config(
368
378
  root = config_dir
369
379
 
370
380
  index_dir, index_src = _resolve_index_dir_path(
371
- source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
381
+ source_root=root, config_dir=config_dir, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
372
382
  )
373
383
  model, model_src = _pick_str(
374
384
  cli_val=cli_embedding_model,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -16,4 +16,5 @@ unidiff<1,>=0.7.3
16
16
  [dev]
17
17
  pytest>=7
18
18
  pytest-asyncio>=0.21
19
+ pytest-xdist>=3
19
20
  ruff>=0.4
@@ -15,7 +15,10 @@ from ast_java import (
15
15
  _TYPE_ANN_TO_CAPABILITY,
16
16
  )
17
17
 
18
- # Roles: Spring stereotype values plus DTO from `infer_role_for_type`.
18
+ # Roles assignable by indexing: Spring stereotype values plus DTO. ``OTHER`` is the
19
+ # built-in inference fallback (ast_java.infer_role when nothing matches) and is
20
+ # deliberately excluded here — it is a read-side value (the mcp_v2 ``Role`` enum
21
+ # includes it) but not a role a user may set via @CodebaseRole / role_overrides.
19
22
  VALID_ROLES: frozenset[str] = frozenset((*ROLE_ANNOTATIONS.values(), "DTO"))
20
23
 
21
24
  VALID_CAPABILITIES: frozenset[str] = frozenset(
@@ -48,6 +48,22 @@ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
48
48
 
49
49
  DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
50
50
 
51
+ # Closed value taxonomies surfaced to MCP consumers as enums. Sources of truth:
52
+ # Role — VALID_ROLES in java_ontology.py + the "OTHER" inference fallback (ast_java.infer_role)
53
+ # Framework — hardcoded literals across ast_java.py / build_ast_graph.py
54
+ # SourceLayer — exhaustive classifier build_ast_graph._client_source_layer / _producer_source_layer
55
+ # ClientKind — VALID_CLIENT_KINDS in java_ontology.py (every producer validated at index time)
56
+ # ProducerKind — VALID_PRODUCER_KINDS in java_ontology.py (every producer validated at index time)
57
+ # Keep these in sync with the indexing-side taxonomies if they change.
58
+ Role = Literal[
59
+ "CONTROLLER", "SERVICE", "REPOSITORY", "COMPONENT", "CONFIG",
60
+ "ENTITY", "CLIENT", "MAPPER", "DTO", "OTHER",
61
+ ]
62
+ Framework = Literal["spring_mvc", "webflux", "kafka", "rabbitmq", "jms", "stream", "feign", ""]
63
+ SourceLayer = Literal["builtin", "layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"]
64
+ ClientKind = Literal["feign_method", "rest_template", "web_client"]
65
+ ProducerKind = Literal["kafka_send", "stream_bridge_send"]
66
+
51
67
  # Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
52
68
  # dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
53
69
  EdgeType = Literal[
@@ -133,21 +149,30 @@ class NodeFilter(BaseModel):
133
149
 
134
150
  microservice: str | None = None
135
151
  module: str | None = None
136
- source_layer: str | None = None
137
- role: str | None = None
138
- exclude_roles: list[str] | None = None
152
+ source_layer: SourceLayer | None = None
153
+ role: Role | None = None
154
+ exclude_roles: list[Role] | None = None
139
155
  annotation: str | None = None
140
156
  capability: str | None = None
141
157
  fqn_prefix: str | None = None
142
158
  symbol_kind: DeclarationSymbolKind | None = None
143
159
  symbol_kinds: list[DeclarationSymbolKind] | None = None
144
- http_method: str | None = None
160
+ http_method: str | None = Field(
161
+ default=None,
162
+ description="HTTP verb (commonly GET/POST/PUT/DELETE/PATCH; user route annotations may yield others).",
163
+ )
145
164
  path_prefix: str | None = None
146
- framework: str | None = None
147
- client_kind: str | None = None
165
+ framework: Framework | None = None
166
+ client_kind: ClientKind | None = Field(
167
+ default=None,
168
+ description="Outbound HTTP client kind: feign_method, rest_template, or web_client.",
169
+ )
148
170
  target_service: str | None = None
149
171
  target_path_prefix: str | None = None
150
- producer_kind: str | None = None
172
+ producer_kind: ProducerKind | None = Field(
173
+ default=None,
174
+ description="Outbound async producer kind: kafka_send or stream_bridge_send.",
175
+ )
151
176
  topic_prefix: str | None = None
152
177
 
153
178
 
@@ -157,9 +182,9 @@ class EdgeFilter(BaseModel):
157
182
  min_confidence: float | None = None
158
183
  exclude_strategies: list[str] | None = None
159
184
  include_strategies: list[str] | None = None
160
- callee_declaring_role: str | None = None
161
- callee_declaring_roles: list[str] | None = None
162
- exclude_callee_declaring_roles: list[str] | None = None
185
+ callee_declaring_role: Role | None = None
186
+ callee_declaring_roles: list[Role] | None = None
187
+ exclude_callee_declaring_roles: list[Role] | None = None
163
188
 
164
189
  @model_validator(mode="after")
165
190
  def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "java-codebase-rag"
7
- version = "0.6.0"
7
+ version = "0.6.1"
8
8
  description = "MCP server for semantic + structural search over Java codebases"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -43,6 +43,7 @@ dependencies = [
43
43
  dev = [
44
44
  "pytest>=7",
45
45
  "pytest-asyncio>=0.21",
46
+ "pytest-xdist>=3",
46
47
  "ruff>=0.4",
47
48
  ]
48
49
 
@@ -7,7 +7,7 @@ import os
7
7
  import sys
8
8
  import time
9
9
  from pathlib import Path
10
- from typing import Any, Literal
10
+ from typing import Literal
11
11
 
12
12
  import mcp_v2
13
13
  from index_common import SBERT_MODEL
@@ -31,14 +31,14 @@ from search_lancedb import TABLES
31
31
 
32
32
  _COCOINDEX_TARGET = "java_index_flow_lancedb.py:JavaCodeIndexLance"
33
33
  _INSTRUCTIONS = (
34
- "Java codebase graph navigator (LanceDB + Ladybug). "
34
+ "Java codebase graph navigator over an indexed Java codebase. "
35
35
  "Tools: search (NL/code locate), find (structured NodeFilter), describe (one node + edge_summary: stored edge-label counts and optional composed keys for type Symbols and override-axis virtual keys for method Symbols), "
36
36
  "neighbors (one hop; you MUST pass direction in|out AND edge_types list — no defaults), "
37
- "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses one|many/none). "
38
- "NodeFilter `filter` is a JSON object (preferred); a JSON-encoded string is also accepted as a fallback. "
37
+ "resolve (identifier-shaped lookup for symbol/route/client/producer — three statuses: one | many | none). "
39
38
  "Unknown filter keys and populated fields not applicable to the effective node kind fail with success=false and message. "
39
+ "Successful responses from any tool may include `hints_structured` (tool call suggestions with a `reason` field) and `advisories` (pure informational text) when hints are enabled. "
40
40
  "Edge labels: EXTENDS, IMPLEMENTS, INJECTS, OVERRIDES, DECLARES, DECLARES_CLIENT, DECLARES_PRODUCER, CALLS, EXPOSES, HTTP_CALLS, ASYNC_CALLS; "
41
- "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only). "
41
+ "type Symbols may also use composed neighbors edge_types DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES (out only, type Symbol origin). "
42
42
  "Reprocess/init, meta, tables, diagnose-ignore, analyze-pr: use java-codebase-rag CLI — not MCP."
43
43
  )
44
44
 
@@ -123,19 +123,15 @@ class ScopeManager:
123
123
  print("[scope] No microservice detected (at project root)", file=sys.stderr)
124
124
  print("[scope] Queries will span all microservices", file=sys.stderr)
125
125
 
126
- def apply_auto_scope(self, node_filter: dict[str, Any] | None) -> dict[str, Any] | None:
126
+ def apply_auto_scope(self, node_filter: mcp_v2.NodeFilter | None) -> mcp_v2.NodeFilter | None:
127
127
  """Apply auto-detected scope to filter if no explicit microservice is set."""
128
128
  if self.default_scope is None:
129
129
  return node_filter
130
- # Convert to dict for manipulation
131
130
  if node_filter is None:
132
- filter_dict = {}
133
- else:
134
- filter_dict = dict(node_filter)
135
- # Only inject if user didn't specify microservice
136
- if "microservice" not in filter_dict:
137
- filter_dict["microservice"] = self.default_scope
138
- return filter_dict
131
+ return mcp_v2.NodeFilter(microservice=self.default_scope)
132
+ if node_filter.microservice is None:
133
+ return node_filter.model_copy(update={"microservice": self.default_scope})
134
+ return node_filter
139
135
 
140
136
 
141
137
  def _resolve_lancedb_uri() -> str:
@@ -159,6 +155,27 @@ def _project_root() -> Path:
159
155
  return discovered if discovered is not None else Path.cwd().resolve()
160
156
 
161
157
 
158
+ def _source_root_for_operator_config() -> Path | None:
159
+ """``source_root`` arg to hand ``resolve_operator_config`` from the MCP server.
160
+
161
+ Returns ``JAVA_CODEBASE_RAG_SOURCE_ROOT`` when set (an explicit operator
162
+ override that wins and suppresses the YAML ``source_root`` field, exactly
163
+ like CLI ``--source-root``), otherwise ``None`` — so
164
+ ``resolve_operator_config`` runs its OWN walk-up discovery and HONORS the
165
+ YAML ``source_root`` field, matching the CLI (``init`` / ``increment`` /
166
+ ``reprocess``) path.
167
+
168
+ Do NOT pass ``_project_root()`` (the walk-up-discovered dir) here: a
169
+ non-``None`` value routes into the "explicit source root" branch that
170
+ skips the YAML ``source_root`` field, which made the MCP server and the
171
+ CLI resolve different ``source_root`` / ``index_dir`` from the same config
172
+ file (the init-vs-MCP index_dir divergence). ``_project_root()`` is kept
173
+ only for the ``_resolve_lancedb_uri()`` fallback below.
174
+ """
175
+ env = os.environ.get("JAVA_CODEBASE_RAG_SOURCE_ROOT", "").strip()
176
+ return Path(env).expanduser().resolve() if env else None
177
+
178
+
162
179
  def _cocoindex_subprocess_env(project_root: Path) -> dict[str, str]:
163
180
  sub_env = os.environ.copy()
164
181
  sub_env["JAVA_CODEBASE_RAG_SOURCE_ROOT"] = str(project_root)
@@ -413,14 +430,15 @@ def create_mcp_server() -> FastMCP:
413
430
  @mcp.tool(
414
431
  name="search",
415
432
  description=(
416
- "Ranked chunk retrieval: `query` is opaque text (natural language or code fragments); "
417
- "results are score-ranked, not boolean-matched. Optional `filter` uses the same NodeFilter "
418
- "schema as `find` but only **symbol-applicable** fields apply (strict frame). Wildcards "
433
+ "Ranked chunk retrieval over content tables (java/sql/yaml); `query` is opaque text (natural language or code "
434
+ "fragments) and results are score-ranked, not boolean-matched. For graph-structured listing "
435
+ "(symbols/routes/clients/producers) use `find`, not `search`. Optional `filter` uses the same NodeFilter "
436
+ "schema as `find` but only **symbol-applicable** fields apply — others return success=false. Wildcards "
419
437
  "(`*`, `?`) in prefix fields are rejected—use ranked `query` text instead. There is **no** "
420
438
  "structured DSL inside `query`; structured predicates belong in `find`. "
421
439
  "For identifier-shaped lookups (FQN, id prefix, route/client identifiers, …), use `resolve` first; "
422
440
  "use `search` for natural-language or ranked fuzzy discovery. "
423
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
441
+ "Successful responses echo `limit`/`offset`."
424
442
  ),
425
443
  )
426
444
  async def search(
@@ -431,7 +449,7 @@ def create_mcp_server() -> FastMCP:
431
449
  ),
432
450
  hybrid: bool = Field(
433
451
  default=False,
434
- description="If true, fuse FTS + vector (single-table java/sql/yaml only)",
452
+ description="If true, fuse FTS + vector. Requires a single table (java/sql/yaml); hybrid with table='all' returns success=false.",
435
453
  ),
436
454
  limit: int = Field(default=5, ge=1, le=50, description="Max hits to return"),
437
455
  offset: int = Field(default=0, ge=0, le=500, description="Skip this many hits (pagination)"),
@@ -439,11 +457,11 @@ def create_mcp_server() -> FastMCP:
439
457
  default=None,
440
458
  description="Substring match on file path (pre-filter from index)",
441
459
  ),
442
- filter: dict[str, Any] | str | None = Field(
460
+ filter: mcp_v2.NodeFilter | None = Field(
443
461
  default=None,
444
462
  description=(
445
- "Optional NodeFilter post-filter on symbol-oriented hit rows. Unknown keys or populated fields not "
446
- "applicable to symbols return success=false. Prefer a JSON object; a JSON-encoded string is accepted."
463
+ "Optional NodeFilter post-filter on symbol-oriented hit rows. An empty object or omitted means no "
464
+ "predicate. Unknown keys or populated fields not applicable to symbols return success=false."
447
465
  ),
448
466
  ),
449
467
  ) -> mcp_v2.SearchOutput:
@@ -468,9 +486,11 @@ def create_mcp_server() -> FastMCP:
468
486
  "**route** — microservice, module, http_method, path_prefix, framework; **client** — microservice, module, "
469
487
  "source_layer, client_kind, target_service, target_path_prefix, http_method; **producer** — microservice, "
470
488
  "module, source_layer, producer_kind, topic_prefix. "
489
+ "`role` is singular and `exclude_roles` plural; `capability` is a functional tag assigned during indexing. "
490
+ "`fqn_prefix` is a prefix predicate — for exact FQN or id lookup use `resolve`/`describe`. "
471
491
  "Wildcards in prefix fields are rejected. An empty filter (`{}`) or `filter=None` means no predicate (all nodes of "
472
492
  "that kind; use pagination). Unknown keys or inapplicable populated fields return success=false. "
473
- "Successful responses echo `limit`/`offset` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
493
+ "Successful responses echo `limit`/`offset`."
474
494
  ),
475
495
  )
476
496
  async def find(
@@ -481,11 +501,10 @@ def create_mcp_server() -> FastMCP:
481
501
  "'producer' = outbound async producers."
482
502
  )
483
503
  ),
484
- filter: dict[str, Any] | str = Field(
504
+ filter: mcp_v2.NodeFilter = Field(
485
505
  ...,
486
506
  description=(
487
- "Required NodeFilter dict (extra keys forbidden). Fields must be applicable to `kind`. "
488
- "Prefer a JSON object; a JSON-encoded string is accepted."
507
+ "Required NodeFilter object (extra keys forbidden). Fields must be applicable to `kind`."
489
508
  ),
490
509
  ),
491
510
  limit: int = Field(default=25, ge=1, le=500, description="Max nodes to return"),
@@ -497,17 +516,14 @@ def create_mcp_server() -> FastMCP:
497
516
  @mcp.tool(
498
517
  name="describe",
499
518
  description=(
500
- "Full node record plus `edge_summary` (in/out counts per stored edge label, plus optional describe-time keys). Type Symbols may add "
501
- "composed keys DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, and DECLARES.EXPOSES (navigable on type Symbols via neighbors, out only); "
502
- "method Symbols may add override-axis virtual keys (OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, OVERRIDDEN_BY.DECLARES_PRODUCER, "
503
- "OVERRIDDEN_BY.EXPOSES, plus an `OVERRIDES` map entry that merges stored `[:OVERRIDES]` counts with the dispatch-up rollup per direction). "
504
- "Override-axis virtual keys are navigable via neighbors on non-static method Symbol origins "
505
- "(out only; composed keys include via_id in attrs). The stored `OVERRIDES` relationship "
506
- "is also a normal edge label (e.g. direction in from declaration toward overriders). "
519
+ "Full node record plus `edge_summary` (in/out counts per stored edge label). For type Symbols, `edge_summary` "
520
+ "also exposes composed keys (DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, DECLARES.EXPOSES); for "
521
+ "non-static method Symbols it adds override-axis virtual keys (OVERRIDDEN_BY and its composed forms, plus an "
522
+ "`OVERRIDES` map merging stored `[:OVERRIDES]` counts with the dispatch-up rollup). These composed/override keys "
523
+ "are out-only and navigable via `neighbors`; the stored `OVERRIDES` is also a normal edge label (in toward declaration). "
507
524
  "Pass `id` for any kind, or exact `fqn` for Symbol lookup (`id` wins when both are set). "
508
525
  "`describe(fqn=…)` keeps the first graph row when multiple symbols share that FQN; when an FQN may collide, "
509
- "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node. "
510
- "Successful responses may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text)."
526
+ "prefer `resolve(identifier=…, hint_kind='symbol')` first, then `describe(id=…)` on the chosen node."
511
527
  ),
512
528
  )
513
529
  async def describe(
@@ -531,18 +547,19 @@ def create_mcp_server() -> FastMCP:
531
547
  @mcp.tool(
532
548
  name="neighbors",
533
549
  description=(
534
- "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (stored labels for one hop; "
535
- "type Symbol origins may also pass composed DECLARES.DECLARES_CLIENT, DECLARES.DECLARES_PRODUCER, or DECLARES.EXPOSES "
536
- "for 2-hop member rollups; method Symbol origins may pass OVERRIDDEN_BY, OVERRIDDEN_BY.DECLARES_CLIENT, "
537
- "OVERRIDDEN_BY.DECLARES_PRODUCER, OVERRIDDEN_BY.EXPOSES for override-axis rollups — out only, via_id in "
538
- "attrs on composed keys). "
550
+ "Graph walk: **direction** (`in` | `out`) and non-empty **edge_types** are required (one hop over stored edge "
551
+ "labels; type/method Symbol origins may also pass composed or override-axis keys — see `edge_types`). From a "
552
+ "type Symbol, `direction='out'` with EXPOSES yields route nodes and HTTP_CALLS/ASYNC_CALLS yield client/producer "
553
+ "nodes; `direction='in'` reverses each relationship. "
554
+ "`direction` and `edge_types` have no defaults; an empty `edge_types` fails. The CALLS-only features — "
555
+ "`edge_filter`, `include_unresolved`, `dedup_calls` — each require `edge_types=['CALLS']`; `edge_filter` and "
556
+ "`include_unresolved` are mutually exclusive. Violating a precondition (wrong CALLS context, composed/override "
557
+ "keys on an ineligible origin or with `direction='in'`, wildcards in prefix fields, unknown filter keys) returns "
558
+ "success=false with a message; `dedup_calls` with other edge_types is a silent no-op. "
539
559
  "Optional `filter` applies to each neighbor endpoint row; populated fields must be applicable to that "
540
- "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (strict frame). "
541
- "Optional `edge_filter` requires edge_types=['CALLS'] only (no composed dot-keys or extra stored "
542
- "labels); projects the ordered CALLS stream by edge attributes (min_confidence, strategies, "
543
- "callee_declaring_role). Wildcards in prefix fields are rejected. Unknown filter keys return success=false. "
544
- "Successful responses echo `requested_edge_types` and may include `hints_structured` (tool call suggestions with `reason` field) and `advisories` (pure informational text). "
545
- "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths)."
560
+ "neighbor's kind—mixed-kind result sets fail on the first inapplicable neighbor (per-neighbor strict frame). "
561
+ "Each edge's `attrs.strategy` indicates resolution quality (brownfield/fallback vs primary paths). "
562
+ "Successful responses echo `requested_edge_types`."
546
563
  ),
547
564
  )
548
565
  async def neighbors(
@@ -573,19 +590,19 @@ def create_mcp_server() -> FastMCP:
573
590
  le=1000,
574
591
  description="Skip this many edges after merge (pagination)",
575
592
  ),
576
- filter: dict[str, Any] | str | None = Field(
593
+ filter: mcp_v2.NodeFilter | None = Field(
577
594
  default=None,
578
595
  description=(
579
- "Optional NodeFilter on the neighbor node. Same applicability rules as `find` for that node's kind. "
580
- "Prefer a JSON object; a JSON-encoded string is accepted."
596
+ "Optional NodeFilter on the neighbor node. An empty object or omitted means no predicate. "
597
+ "Same applicability rules as `find` for that node's kind."
581
598
  ),
582
599
  ),
583
- edge_filter: dict[str, Any] | str | None = Field(
600
+ edge_filter: mcp_v2.EdgeFilter | None = Field(
584
601
  default=None,
585
602
  description=(
586
603
  "Optional EdgeFilter on CALLS edge attributes (edge_types=['CALLS'] only). Use "
587
604
  "callee_declaring_role for callee stereotype projection — not NodeFilter.role on method neighbors. "
588
- "Mutually exclusive with include_unresolved. Prefer a JSON object; a JSON-encoded string is accepted."
605
+ "Mutually exclusive with include_unresolved."
589
606
  ),
590
607
  ),
591
608
  include_unresolved: bool = Field(
@@ -627,10 +644,11 @@ def create_mcp_server() -> FastMCP:
627
644
  "status=one (single node), many (≥2 ranked candidates with reason), or none "
628
645
  "(no match — fall back to search(query=...) for natural language or fuzzy text). "
629
646
  "Optional hint_kind narrows to symbol, route, client, or producer. "
630
- "Successful responses may include hints_structured (tool call suggestions with `reason` field) and advisories (pure informational text) — same contract as other v2 tools. "
631
647
  "Malformed empty/whitespace identifier returns success=false. "
632
648
  "Examples: resolve('com.foo.Bar', hint_kind='symbol'); "
633
649
  "resolve('GET /api/v1/customers', hint_kind='route'); "
650
+ "resolve('PaymentClient', hint_kind='client'); "
651
+ "resolve('order.created', hint_kind='producer'); "
634
652
  "resolve('the client that handles assignments') → none (use search instead)."
635
653
  ),
636
654
  )
@@ -657,7 +675,7 @@ def main() -> None:
657
675
  # Load YAML config and apply embedding settings to environment
658
676
  # This ensures SBERT_MODEL and SBERT_DEVICE from .java-codebase-rag.yml are available
659
677
  # before any tool handler runs (same behavior as CLI path)
660
- cfg = resolve_operator_config(source_root=_project_root())
678
+ cfg = resolve_operator_config(source_root=_source_root_for_operator_config())
661
679
  cfg.apply_to_os_environ()
662
680
  mcp_v2.set_hints_enabled(cfg.hints_enabled)
663
681
 
@@ -254,6 +254,63 @@ http_client_overrides:
254
254
  assert _http_calls(db) == []
255
255
 
256
256
 
257
+ def _client_kinds(db_path: Path) -> list[str]:
258
+ db = ladybug.Database(str(db_path), read_only=True)
259
+ conn = ladybug.Connection(db)
260
+ r = conn.execute("MATCH (c:Client) RETURN c.client_kind AS client_kind")
261
+ out: list[str] = []
262
+ while r.has_next():
263
+ out.append(str(r.get_next()[0] or ""))
264
+ return out
265
+
266
+
267
+ def _producer_kinds(db_path: Path) -> list[str]:
268
+ db = ladybug.Database(str(db_path), read_only=True)
269
+ conn = ladybug.Connection(db)
270
+ r = conn.execute("MATCH (p:Producer) RETURN p.producer_kind AS producer_kind")
271
+ out: list[str] = []
272
+ while r.has_next():
273
+ out.append(str(r.get_next()[0] or ""))
274
+ return out
275
+
276
+
277
+ def test_29a_unknown_source_client_kind_warns_and_ignored(tmp_path: Path) -> None:
278
+ """In-source @CodebaseHttpClient(clientKind=<invalid enum>) is validated at parse
279
+ time (source-annotation mirror of the YAML-side test_29): the bad value is ignored
280
+ and a warning is emitted, so client_kind stays a closed set safe to surface as an enum."""
281
+ java = {
282
+ "p/X.java": (
283
+ "package p; import com.example.rag.*; class X { "
284
+ "@CodebaseHttpClient(clientKind=CodebaseClientKind.bogus, path=\"/bad\", method=CodebaseHttpMethod.GET) "
285
+ "void m() {} }"
286
+ ),
287
+ }
288
+ buf = io.StringIO()
289
+ with redirect_stderr(buf):
290
+ db = _build(tmp_path, None, java)
291
+ assert "invalid clientkind" in buf.getvalue().lower()
292
+ assert "bogus" not in _client_kinds(db)
293
+
294
+
295
+ def test_29b_unknown_source_producer_kind_warns_and_falls_back(tmp_path: Path) -> None:
296
+ """In-source @CodebaseProducer(producerKind=<invalid enum>) is validated at parse
297
+ time: the bad value is ignored with a warning and producer_kind falls back to the
298
+ kafka_send default."""
299
+ java = {
300
+ "p/X.java": (
301
+ "package p; import com.example.rag.*; class X { "
302
+ "@CodebaseProducer(topic=\"t\", producerKind=CodebaseProducerKind.bogus) void m() {} }"
303
+ ),
304
+ }
305
+ buf = io.StringIO()
306
+ with redirect_stderr(buf):
307
+ db = _build(tmp_path, None, java)
308
+ assert "invalid producerkind" in buf.getvalue().lower()
309
+ kinds = _producer_kinds(db)
310
+ assert "bogus" not in kinds
311
+ assert "kafka_send" in kinds
312
+
313
+
257
314
  def test_30_brownfield_percentage_counter(tmp_path: Path) -> None:
258
315
  java = {
259
316
  "p/X.java": (