java-codebase-rag 0.6.0__tar.gz → 0.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. {java_codebase_rag-0.6.0/java_codebase_rag.egg-info → java_codebase_rag-0.6.2}/PKG-INFO +2 -1
  2. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/ast_java.py +23 -6
  3. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/build_ast_graph.py +11 -4
  4. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli.py +18 -1
  5. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/config.py +66 -7
  6. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/installer.py +12 -2
  7. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2/java_codebase_rag.egg-info}/PKG-INFO +2 -1
  8. java_codebase_rag-0.6.2/java_codebase_rag.egg-info/entry_points.txt +3 -0
  9. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/requires.txt +1 -0
  10. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_ontology.py +4 -1
  11. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/mcp_v2.py +35 -10
  12. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/pyproject.toml +3 -2
  13. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/server.py +71 -53
  14. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_clients.py +57 -0
  15. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_config.py +200 -0
  16. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_incremental_graph.py +140 -0
  17. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_installer.py +135 -0
  18. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_java_codebase_rag_cli.py +111 -0
  19. java_codebase_rag-0.6.2/tests/test_mcp_server_project_root.py +81 -0
  20. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_microservice_scope.py +20 -13
  21. java_codebase_rag-0.6.0/java_codebase_rag.egg-info/entry_points.txt +0 -3
  22. java_codebase_rag-0.6.0/tests/test_mcp_server_project_root.py +0 -25
  23. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/LICENSE +0 -0
  24. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/README.md +0 -0
  25. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/brownfield_events.py +0 -0
  26. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/chunk_heuristics.py +0 -0
  27. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/graph_enrich.py +0 -0
  28. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/index_common.py +0 -0
  29. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/__init__.py +0 -0
  30. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/_fdlimit.py +0 -0
  31. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_format.py +0 -0
  32. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_progress.py +0 -0
  33. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md +0 -0
  34. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +0 -0
  35. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/lance_optimize.py +0 -0
  36. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/pipeline.py +0 -0
  37. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/SOURCES.txt +0 -0
  38. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
  39. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/top_level.txt +0 -0
  40. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_index_flow_lancedb.py +0 -0
  41. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_index_v1_common.py +0 -0
  42. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/ladybug_queries.py +0 -0
  43. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/mcp_hints.py +0 -0
  44. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/path_filtering.py +0 -0
  45. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/pr_analysis.py +0 -0
  46. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/search_lancedb.py +0 -0
  47. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/setup.cfg +0 -0
  48. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_agent_skills_static.py +0 -0
  49. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_assign_endpoint_client_extraction.py +0 -0
  50. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_graph_build.py +0 -0
  51. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_java_calls.py +0 -0
  52. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_java_capabilities.py +0 -0
  53. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_bank_chat_brownfield_integration.py +0 -0
  54. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_events.py +0 -0
  55. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_overrides.py +0 -0
  56. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_routes.py +0 -0
  57. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_edge_matching.py +0 -0
  58. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_edges_e2e.py +0 -0
  59. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_graph_receiver_resolution.py +0 -0
  60. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_graph_smoke_roundtrip.py +0 -0
  61. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_invariant.py +0 -0
  62. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cli_progress_stdout_invariant.py +0 -0
  63. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cli_quiet_parity.py +0 -0
  64. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_hint_recovery.py +0 -0
  65. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_node_extraction.py +0 -0
  66. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_role_rename.py +0 -0
  67. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cross_service_resolution_flag.py +0 -0
  68. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_edge_navigation_doc.py +0 -0
  69. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_fd_limit.py +0 -0
  70. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_feign_not_exposer.py +0 -0
  71. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_graph_enrich.py +0 -0
  72. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_installer_integration.py +0 -0
  73. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ladybug_queries.py +0 -0
  74. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_lance_optimize.py +0 -0
  75. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_lancedb_e2e.py +0 -0
  76. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_hints.py +0 -0
  77. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_tools.py +0 -0
  78. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_v2.py +0 -0
  79. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_v2_compose.py +0 -0
  80. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_meta_chain_core.py +0 -0
  81. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_outgoing_call_extraction.py +0 -0
  82. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_packaging_metadata.py +0 -0
  83. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_path_filtering.py +0 -0
  84. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_pr_analysis.py +0 -0
  85. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
  86. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_route_extraction.py +0 -0
  87. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_schema_consistency.py +0 -0
  88. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_search_lancedb.py +0 -0
  89. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_search_lancedb_capability.py +0 -0
  90. {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_string_value_atoms.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -13,6 +13,7 @@ Python with no tree-sitter dependency.
13
13
  from __future__ import annotations
14
14
 
15
15
  import posixpath
16
+ import sys
16
17
  from dataclasses import dataclass, field
17
18
  from functools import lru_cache
18
19
  from typing import Iterable
@@ -1642,9 +1643,17 @@ def _parse_codebase_http_client_annotation(
1642
1643
  pairs, _ = _annotation_kv_nodes(ann, src)
1643
1644
  client_kind = ""
1644
1645
  if "clientKind" in pairs:
1645
- val, _kind = _annotation_value(pairs["clientKind"], src)
1646
- if val and _kind == "enum":
1647
- client_kind = str(val)
1646
+ val, vkind = _annotation_value(pairs["clientKind"], src)
1647
+ if val and vkind == "enum":
1648
+ kind_val = str(val)
1649
+ from java_ontology import VALID_CLIENT_KINDS # deferred: java_ontology imports ast_java
1650
+ if kind_val in VALID_CLIENT_KINDS:
1651
+ client_kind = kind_val
1652
+ else:
1653
+ print(
1654
+ f"[lancedb-mcp] CodebaseHttpClient: invalid clientKind {kind_val!r} — ignored",
1655
+ file=sys.stderr,
1656
+ )
1648
1657
  target_service = ""
1649
1658
  if "targetService" in pairs:
1650
1659
  atoms = _string_value_atoms(pairs["targetService"], src, ctx)
@@ -1714,9 +1723,17 @@ def _parse_codebase_producer_annotation(
1714
1723
  client_kind = "kafka_send"
1715
1724
  kind_node = pairs.get("producerKind") or pairs.get("clientKind")
1716
1725
  if kind_node is not None:
1717
- val, _kind = _annotation_value(kind_node, src)
1718
- if val and _kind == "enum":
1719
- client_kind = str(val)
1726
+ val, vkind = _annotation_value(kind_node, src)
1727
+ if val and vkind == "enum":
1728
+ kind_val = str(val)
1729
+ from java_ontology import VALID_PRODUCER_KINDS # deferred: java_ontology imports ast_java
1730
+ if kind_val in VALID_PRODUCER_KINDS:
1731
+ client_kind = kind_val
1732
+ else:
1733
+ print(
1734
+ f"[lancedb-mcp] CodebaseProducer: invalid producerKind {kind_val!r} — ignored",
1735
+ file=sys.stderr,
1736
+ )
1720
1737
  topic = ""
1721
1738
  if "topic" in pairs:
1722
1739
  atoms = _string_value_atoms(pairs["topic"], src, ctx)
@@ -3668,10 +3668,17 @@ def incremental_rebuild(
3668
3668
 
3669
3669
 
3670
3670
  def _init_hash_tracker(source_root: Path, ladybug_path: Path) -> int:
3671
- """Initialize hash tracker for all Java files. Returns number of files hashed."""
3671
+ """Initialize hash tracker for all Java files. Returns number of files hashed.
3672
+
3673
+ Called right after a full graph rebuild (``write_ladybug``), so the store must
3674
+ mirror exactly the files that were just indexed. We deliberately do NOT
3675
+ ``load()`` the existing store: ``update`` re-hashes every current file anyway,
3676
+ and preserving old entries would leave stale hashes for files that no longer
3677
+ exist (deleted or now-ignored). Those ghosts would be re-detected as "removed"
3678
+ on every subsequent ``increment``, sustaining an endless full-rebuild loop.
3679
+ """
3672
3680
  index_dir = ladybug_path.parent
3673
3681
  tracker = FileHashTracker(index_dir)
3674
- tracker.load()
3675
3682
  ignore = LayeredIgnore(source_root)
3676
3683
  all_files: set[str] = set()
3677
3684
  source_root_resolved = source_root.resolve()
@@ -3742,7 +3749,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3742
3749
 
3743
3750
  # Write declares_client edges
3744
3751
  for row in tables.declares_client_rows:
3745
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3752
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3746
3753
  conn.execute(_CREATE_DECLARES_CLIENT, {
3747
3754
  "sid": row.symbol_id,
3748
3755
  "cid": row.client_id,
@@ -3753,7 +3760,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
3753
3760
 
3754
3761
  # Write declares_producer edges
3755
3762
  for row in tables.declares_producer_rows:
3756
- source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
3763
+ source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
3757
3764
  conn.execute(_CREATE_DECLARES_PRODUCER, {
3758
3765
  "sid": row.symbol_id,
3759
3766
  "pid": row.producer_id,
@@ -6,6 +6,7 @@ from __future__ import annotations
6
6
  import argparse
7
7
  import asyncio
8
8
  import json
9
+ import os
9
10
  import pprint
10
11
  import shutil
11
12
  import sys
@@ -930,5 +931,21 @@ def main(argv: list[str] | None = None) -> int:
930
931
  return 2
931
932
 
932
933
 
934
+ def _console_script_main() -> None:
935
+ """Real CLI entry: terminate without interpreter finalization.
936
+
937
+ A pyarrow/lance worker thread (loaded via lancedb in lifecycle commands) can
938
+ outlive CPython finalization in a one-shot CLI subprocess and trip
939
+ ``PyGILState_Release`` (SIGABRT, exit -6). Flushing + ``os._exit`` skips that
940
+ racy teardown — the command has already done its work and emitted its result.
941
+ ``main()`` stays return-based so in-process test callers (``cli.main(...)``)
942
+ keep working.
943
+ """
944
+ rc = main()
945
+ sys.stdout.flush()
946
+ sys.stderr.flush()
947
+ os._exit(rc)
948
+
949
+
933
950
  if __name__ == "__main__":
934
- raise SystemExit(main())
951
+ _console_script_main()
@@ -52,14 +52,36 @@ _DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
52
52
  _UNRESOLVED_VAR_RE = re.compile(r"\$(\w+|\{[^}]+\})")
53
53
 
54
54
 
55
- def maybe_expand_embedding_model_path(value: str) -> str:
56
- """Expand ``~`` and ``$VAR`` when *value* is path-shaped.
55
+ def maybe_expand_embedding_model_path(
56
+ value: str,
57
+ *,
58
+ config_dir: Path | None = None,
59
+ source_root: Path | None = None,
60
+ source: SettingSource | None = None,
61
+ ) -> str:
62
+ """Expand ``~`` / ``$VAR`` for path-shaped values and resolve relatives to absolute.
57
63
 
58
64
  Path-shape: starts with ``/``, ``./``, ``../``, ``~``, or contains ``$``.
59
65
  Plain ``org/name`` (hub id) does not match and is passed through unchanged.
60
66
 
61
- Used for ``embedding.model`` after precedence resolution and for runtime
62
- ``SBERT_MODEL`` reads (e.g. MCP) so the string matches ``ResolvedOperatorConfig``.
67
+ Relative resolution mirrors :func:`_resolve_index_dir_path` so a committed
68
+ config is portable regardless of process CWD:
69
+
70
+ * YAML values (``source == "yaml"``) resolve against ``config_dir`` (the
71
+ directory holding ``.java-codebase-rag.yml``).
72
+ * CLI / env values resolve against ``source_root``.
73
+
74
+ Only a result that still starts with ``./`` or ``../`` *after* ``~`` /
75
+ ``$VAR`` expansion is re-based — so hub ids (``org/name``), absolute paths,
76
+ ``~/``-expanded paths, and an env var that already yielded an absolute path
77
+ are all left untouched.
78
+
79
+ When no base is supplied (the runtime ``SBERT_MODEL`` read via
80
+ :func:`resolved_sbert_model_for_process_env`), relative resolution is
81
+ skipped: the value is returned ``expandvars`` / ``expanduser``-expanded but
82
+ not re-based, matching the prior best-effort behavior. The main resolution
83
+ path (:func:`resolve_operator_config`) supplies a base, so the absolute path
84
+ it stores is what downstream loaders receive.
63
85
  """
64
86
  needs_expand = value.startswith(("/", "./", "../", "~")) or "$" in value
65
87
  if not needs_expand:
@@ -70,9 +92,31 @@ def maybe_expand_embedding_model_path(value: str) -> str:
70
92
  f"java-codebase-rag: path-shaped model string contains unresolved variable: {expanded}",
71
93
  file=sys.stderr,
72
94
  )
95
+ if expanded.startswith(("./", "../")):
96
+ base = _embedding_model_base(
97
+ source=source, config_dir=config_dir, source_root=source_root
98
+ )
99
+ if base is not None:
100
+ return str((base / expanded).resolve())
73
101
  return expanded
74
102
 
75
103
 
104
+ def _embedding_model_base(
105
+ *,
106
+ source: SettingSource | None,
107
+ config_dir: Path | None,
108
+ source_root: Path | None,
109
+ ) -> Path | None:
110
+ """Base directory for a relative ``embedding.model``.
111
+
112
+ Mirrors :func:`_resolve_index_dir_path`: YAML values anchor on the config
113
+ file's directory; CLI / env values anchor on the resolved ``source_root``.
114
+ """
115
+ if source == "yaml":
116
+ return config_dir
117
+ return source_root
118
+
119
+
76
120
  def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
77
121
  """``SBERT_MODEL`` from the process environment, with the same expansion as YAML/CLI resolution.
78
122
 
@@ -306,9 +350,19 @@ def _pick_bool(
306
350
  def _resolve_index_dir_path(
307
351
  *,
308
352
  source_root: Path,
353
+ config_dir: Path,
309
354
  cli_index_dir: str | None,
310
355
  yaml_dict: dict[str, Any],
311
356
  ) -> tuple[Path, SettingSource]:
357
+ # Bases for relative paths:
358
+ # - YAML ``index_dir`` -> the config file's directory (``config_dir``),
359
+ # the SAME base used for YAML ``source_root``. Paths written in the
360
+ # config file are relative to the file, so both keys stay consistent.
361
+ # - CLI / env ``index_dir`` -> ``source_root`` (unchanged). These are not
362
+ # "in the config file"; preserving the existing base avoids a semantics
363
+ # change for operators who pass ``--index-dir`` on the command line.
364
+ # - Default ``./.java-codebase-rag`` -> ``source_root`` so the index sits
365
+ # beside the Java tree (the layout ``discover_project_root`` anchors on).
312
366
  raw_cli = cli_index_dir.strip() if isinstance(cli_index_dir, str) else None
313
367
  if raw_cli:
314
368
  p = Path(raw_cli).expanduser()
@@ -324,7 +378,7 @@ def _resolve_index_dir_path(
324
378
  idx = yaml_dict.get("index_dir")
325
379
  if isinstance(idx, str) and idx.strip():
326
380
  p = Path(idx.strip()).expanduser()
327
- out = p.resolve() if p.is_absolute() else (source_root / p).resolve()
381
+ out = p.resolve() if p.is_absolute() else (config_dir / p).resolve()
328
382
  return out, "yaml"
329
383
 
330
384
  return (source_root / ".java-codebase-rag").resolve(), "default"
@@ -368,7 +422,7 @@ def resolve_operator_config(
368
422
  root = config_dir
369
423
 
370
424
  index_dir, index_src = _resolve_index_dir_path(
371
- source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
425
+ source_root=root, config_dir=config_dir, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
372
426
  )
373
427
  model, model_src = _pick_str(
374
428
  cli_val=cli_embedding_model,
@@ -377,7 +431,12 @@ def resolve_operator_config(
377
431
  yaml_path=("embedding", "model"),
378
432
  default=_DEFAULT_EMBEDDING_MODEL,
379
433
  )
380
- model = maybe_expand_embedding_model_path(model)
434
+ model = maybe_expand_embedding_model_path(
435
+ model,
436
+ config_dir=config_dir,
437
+ source_root=root,
438
+ source=model_src,
439
+ )
381
440
  device, device_src = _pick_optional_device(
382
441
  cli_val=cli_embedding_device,
383
442
  env_key="SBERT_DEVICE",
@@ -759,6 +759,11 @@ def generate_yaml_config(
759
759
  else:
760
760
  config["embedding"].pop("model", None)
761
761
 
762
+ # Seed cross-service resolution safe-by-default: only evidence-backed cross-service
763
+ # edges survive (see _is_brownfield_sourced in build_ast_graph). setdefault preserves
764
+ # an explicit user choice (e.g. `auto`) on re-run update.
765
+ config.setdefault("cross_service_resolution", "brownfield_only")
766
+
762
767
  # Keys NOT written by installer (preserved if present):
763
768
  # - source_root (config.py resolves from walk-up discovery)
764
769
  # - index_dir (config.py defaults to <source_root>/.java-codebase-rag)
@@ -1250,9 +1255,14 @@ def run_update(
1250
1255
  print("Skipping index update.")
1251
1256
  return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
1252
1257
 
1253
- # Resolve configuration
1258
+ # Resolve configuration. Pass source_root=None so the YAML ``source_root``
1259
+ # field is honored exactly like increment/init/reprocess — passing the
1260
+ # discovered config dir here routes resolve_operator_config into the
1261
+ # explicit-override branch that SKIPS the YAML field, which made `update`
1262
+ # point cocoindex at the config dir (no Java) against the real index and
1263
+ # mass-delete it. Discovery still runs against the CLI's cwd.
1254
1264
  try:
1255
- cfg = resolve_operator_config(source_root=project_root, cli_index_dir=None)
1265
+ cfg = resolve_operator_config(source_root=None, cli_index_dir=None)
1256
1266
  index_dir = cfg.index_dir
1257
1267
  except Exception as e:
1258
1268
  print(f"\nWarning: Failed to resolve configuration: {e}")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: java-codebase-rag
3
- Version: 0.6.0
3
+ Version: 0.6.2
4
4
  Summary: MCP server for semantic + structural search over Java codebases
5
5
  Author: HumanBean17
6
6
  License-Expression: MIT
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
35
35
  Provides-Extra: dev
36
36
  Requires-Dist: pytest>=7; extra == "dev"
37
37
  Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
38
+ Requires-Dist: pytest-xdist>=3; extra == "dev"
38
39
  Requires-Dist: ruff>=0.4; extra == "dev"
39
40
  Dynamic: license-file
40
41
 
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ java-codebase-rag = java_codebase_rag.cli:_console_script_main
3
+ java-codebase-rag-mcp = server:main
@@ -16,4 +16,5 @@ unidiff<1,>=0.7.3
16
16
  [dev]
17
17
  pytest>=7
18
18
  pytest-asyncio>=0.21
19
+ pytest-xdist>=3
19
20
  ruff>=0.4
@@ -15,7 +15,10 @@ from ast_java import (
15
15
  _TYPE_ANN_TO_CAPABILITY,
16
16
  )
17
17
 
18
- # Roles: Spring stereotype values plus DTO from `infer_role_for_type`.
18
+ # Roles assignable by indexing: Spring stereotype values plus DTO. ``OTHER`` is the
19
+ # built-in inference fallback (ast_java.infer_role when nothing matches) and is
20
+ # deliberately excluded here — it is a read-side value (the mcp_v2 ``Role`` enum
21
+ # includes it) but not a role a user may set via @CodebaseRole / role_overrides.
19
22
  VALID_ROLES: frozenset[str] = frozenset((*ROLE_ANNOTATIONS.values(), "DTO"))
20
23
 
21
24
  VALID_CAPABILITIES: frozenset[str] = frozenset(
@@ -48,6 +48,22 @@ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
48
48
 
49
49
  DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
50
50
 
51
+ # Closed value taxonomies surfaced to MCP consumers as enums. Sources of truth:
52
+ # Role — VALID_ROLES in java_ontology.py + the "OTHER" inference fallback (ast_java.infer_role)
53
+ # Framework — hardcoded literals across ast_java.py / build_ast_graph.py
54
+ # SourceLayer — exhaustive classifier build_ast_graph._client_source_layer / _producer_source_layer
55
+ # ClientKind — VALID_CLIENT_KINDS in java_ontology.py (every producer validated at index time)
56
+ # ProducerKind — VALID_PRODUCER_KINDS in java_ontology.py (every producer validated at index time)
57
+ # Keep these in sync with the indexing-side taxonomies if they change.
58
+ Role = Literal[
59
+ "CONTROLLER", "SERVICE", "REPOSITORY", "COMPONENT", "CONFIG",
60
+ "ENTITY", "CLIENT", "MAPPER", "DTO", "OTHER",
61
+ ]
62
+ Framework = Literal["spring_mvc", "webflux", "kafka", "rabbitmq", "jms", "stream", "feign", ""]
63
+ SourceLayer = Literal["builtin", "layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"]
64
+ ClientKind = Literal["feign_method", "rest_template", "web_client"]
65
+ ProducerKind = Literal["kafka_send", "stream_bridge_send"]
66
+
51
67
  # Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
52
68
  # dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
53
69
  EdgeType = Literal[
@@ -133,21 +149,30 @@ class NodeFilter(BaseModel):
133
149
 
134
150
  microservice: str | None = None
135
151
  module: str | None = None
136
- source_layer: str | None = None
137
- role: str | None = None
138
- exclude_roles: list[str] | None = None
152
+ source_layer: SourceLayer | None = None
153
+ role: Role | None = None
154
+ exclude_roles: list[Role] | None = None
139
155
  annotation: str | None = None
140
156
  capability: str | None = None
141
157
  fqn_prefix: str | None = None
142
158
  symbol_kind: DeclarationSymbolKind | None = None
143
159
  symbol_kinds: list[DeclarationSymbolKind] | None = None
144
- http_method: str | None = None
160
+ http_method: str | None = Field(
161
+ default=None,
162
+ description="HTTP verb (commonly GET/POST/PUT/DELETE/PATCH; user route annotations may yield others).",
163
+ )
145
164
  path_prefix: str | None = None
146
- framework: str | None = None
147
- client_kind: str | None = None
165
+ framework: Framework | None = None
166
+ client_kind: ClientKind | None = Field(
167
+ default=None,
168
+ description="Outbound HTTP client kind: feign_method, rest_template, or web_client.",
169
+ )
148
170
  target_service: str | None = None
149
171
  target_path_prefix: str | None = None
150
- producer_kind: str | None = None
172
+ producer_kind: ProducerKind | None = Field(
173
+ default=None,
174
+ description="Outbound async producer kind: kafka_send or stream_bridge_send.",
175
+ )
151
176
  topic_prefix: str | None = None
152
177
 
153
178
 
@@ -157,9 +182,9 @@ class EdgeFilter(BaseModel):
157
182
  min_confidence: float | None = None
158
183
  exclude_strategies: list[str] | None = None
159
184
  include_strategies: list[str] | None = None
160
- callee_declaring_role: str | None = None
161
- callee_declaring_roles: list[str] | None = None
162
- exclude_callee_declaring_roles: list[str] | None = None
185
+ callee_declaring_role: Role | None = None
186
+ callee_declaring_roles: list[Role] | None = None
187
+ exclude_callee_declaring_roles: list[Role] | None = None
163
188
 
164
189
  @model_validator(mode="after")
165
190
  def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "java-codebase-rag"
7
- version = "0.6.0"
7
+ version = "0.6.2"
8
8
  description = "MCP server for semantic + structural search over Java codebases"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.11"
@@ -43,6 +43,7 @@ dependencies = [
43
43
  dev = [
44
44
  "pytest>=7",
45
45
  "pytest-asyncio>=0.21",
46
+ "pytest-xdist>=3",
46
47
  "ruff>=0.4",
47
48
  ]
48
49
 
@@ -52,7 +53,7 @@ Repository = "https://github.com/HumanBean17/java-codebase-rag"
52
53
  Issues = "https://github.com/HumanBean17/java-codebase-rag/issues"
53
54
 
54
55
  [project.scripts]
55
- java-codebase-rag = "java_codebase_rag.cli:main"
56
+ java-codebase-rag = "java_codebase_rag.cli:_console_script_main"
56
57
  java-codebase-rag-mcp = "server:main"
57
58
 
58
59
  [tool.setuptools]