java-codebase-rag 0.6.0__tar.gz → 0.6.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {java_codebase_rag-0.6.0/java_codebase_rag.egg-info → java_codebase_rag-0.6.2}/PKG-INFO +2 -1
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/ast_java.py +23 -6
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/build_ast_graph.py +11 -4
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli.py +18 -1
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/config.py +66 -7
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/installer.py +12 -2
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2/java_codebase_rag.egg-info}/PKG-INFO +2 -1
- java_codebase_rag-0.6.2/java_codebase_rag.egg-info/entry_points.txt +3 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/requires.txt +1 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_ontology.py +4 -1
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/mcp_v2.py +35 -10
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/pyproject.toml +3 -2
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/server.py +71 -53
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_clients.py +57 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_config.py +200 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_incremental_graph.py +140 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_installer.py +135 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_java_codebase_rag_cli.py +111 -0
- java_codebase_rag-0.6.2/tests/test_mcp_server_project_root.py +81 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_microservice_scope.py +20 -13
- java_codebase_rag-0.6.0/java_codebase_rag.egg-info/entry_points.txt +0 -3
- java_codebase_rag-0.6.0/tests/test_mcp_server_project_root.py +0 -25
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/LICENSE +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/README.md +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/brownfield_events.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/chunk_heuristics.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/graph_enrich.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/index_common.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/__init__.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/_fdlimit.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_format.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/cli_progress.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/agents/explorer-rag-enhanced.md +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/install_data/skills/explore-codebase/SKILL.md +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/lance_optimize.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag/pipeline.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/SOURCES.txt +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_codebase_rag.egg-info/top_level.txt +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_index_flow_lancedb.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/java_index_v1_common.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/ladybug_queries.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/mcp_hints.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/path_filtering.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/pr_analysis.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/search_lancedb.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/setup.cfg +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_agent_skills_static.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_assign_endpoint_client_extraction.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_graph_build.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_java_calls.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ast_java_capabilities.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_bank_chat_brownfield_integration.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_events.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_overrides.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_brownfield_routes.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_edge_matching.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_edges_e2e.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_graph_receiver_resolution.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_graph_smoke_roundtrip.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_call_invariant.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cli_progress_stdout_invariant.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cli_quiet_parity.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_hint_recovery.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_node_extraction.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_client_role_rename.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_cross_service_resolution_flag.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_edge_navigation_doc.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_fd_limit.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_feign_not_exposer.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_graph_enrich.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_installer_integration.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_ladybug_queries.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_lance_optimize.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_lancedb_e2e.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_hints.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_tools.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_v2.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_mcp_v2_compose.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_meta_chain_core.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_outgoing_call_extraction.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_packaging_metadata.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_path_filtering.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_pr_analysis.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_route_extraction.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_schema_consistency.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_search_lancedb.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_search_lancedb_capability.py +0 -0
- {java_codebase_rag-0.6.0 → java_codebase_rag-0.6.2}/tests/test_string_value_atoms.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: pytest>=7; extra == "dev"
|
|
37
37
|
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-xdist>=3; extra == "dev"
|
|
38
39
|
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
39
40
|
Dynamic: license-file
|
|
40
41
|
|
|
@@ -13,6 +13,7 @@ Python with no tree-sitter dependency.
|
|
|
13
13
|
from __future__ import annotations
|
|
14
14
|
|
|
15
15
|
import posixpath
|
|
16
|
+
import sys
|
|
16
17
|
from dataclasses import dataclass, field
|
|
17
18
|
from functools import lru_cache
|
|
18
19
|
from typing import Iterable
|
|
@@ -1642,9 +1643,17 @@ def _parse_codebase_http_client_annotation(
|
|
|
1642
1643
|
pairs, _ = _annotation_kv_nodes(ann, src)
|
|
1643
1644
|
client_kind = ""
|
|
1644
1645
|
if "clientKind" in pairs:
|
|
1645
|
-
val,
|
|
1646
|
-
if val and
|
|
1647
|
-
|
|
1646
|
+
val, vkind = _annotation_value(pairs["clientKind"], src)
|
|
1647
|
+
if val and vkind == "enum":
|
|
1648
|
+
kind_val = str(val)
|
|
1649
|
+
from java_ontology import VALID_CLIENT_KINDS # deferred: java_ontology imports ast_java
|
|
1650
|
+
if kind_val in VALID_CLIENT_KINDS:
|
|
1651
|
+
client_kind = kind_val
|
|
1652
|
+
else:
|
|
1653
|
+
print(
|
|
1654
|
+
f"[lancedb-mcp] CodebaseHttpClient: invalid clientKind {kind_val!r} — ignored",
|
|
1655
|
+
file=sys.stderr,
|
|
1656
|
+
)
|
|
1648
1657
|
target_service = ""
|
|
1649
1658
|
if "targetService" in pairs:
|
|
1650
1659
|
atoms = _string_value_atoms(pairs["targetService"], src, ctx)
|
|
@@ -1714,9 +1723,17 @@ def _parse_codebase_producer_annotation(
|
|
|
1714
1723
|
client_kind = "kafka_send"
|
|
1715
1724
|
kind_node = pairs.get("producerKind") or pairs.get("clientKind")
|
|
1716
1725
|
if kind_node is not None:
|
|
1717
|
-
val,
|
|
1718
|
-
if val and
|
|
1719
|
-
|
|
1726
|
+
val, vkind = _annotation_value(kind_node, src)
|
|
1727
|
+
if val and vkind == "enum":
|
|
1728
|
+
kind_val = str(val)
|
|
1729
|
+
from java_ontology import VALID_PRODUCER_KINDS # deferred: java_ontology imports ast_java
|
|
1730
|
+
if kind_val in VALID_PRODUCER_KINDS:
|
|
1731
|
+
client_kind = kind_val
|
|
1732
|
+
else:
|
|
1733
|
+
print(
|
|
1734
|
+
f"[lancedb-mcp] CodebaseProducer: invalid producerKind {kind_val!r} — ignored",
|
|
1735
|
+
file=sys.stderr,
|
|
1736
|
+
)
|
|
1720
1737
|
topic = ""
|
|
1721
1738
|
if "topic" in pairs:
|
|
1722
1739
|
atoms = _string_value_atoms(pairs["topic"], src, ctx)
|
|
@@ -3668,10 +3668,17 @@ def incremental_rebuild(
|
|
|
3668
3668
|
|
|
3669
3669
|
|
|
3670
3670
|
def _init_hash_tracker(source_root: Path, ladybug_path: Path) -> int:
|
|
3671
|
-
"""Initialize hash tracker for all Java files. Returns number of files hashed.
|
|
3671
|
+
"""Initialize hash tracker for all Java files. Returns number of files hashed.
|
|
3672
|
+
|
|
3673
|
+
Called right after a full graph rebuild (``write_ladybug``), so the store must
|
|
3674
|
+
mirror exactly the files that were just indexed. We deliberately do NOT
|
|
3675
|
+
``load()`` the existing store: ``update`` re-hashes every current file anyway,
|
|
3676
|
+
and preserving old entries would leave stale hashes for files that no longer
|
|
3677
|
+
exist (deleted or now-ignored). Those ghosts would be re-detected as "removed"
|
|
3678
|
+
on every subsequent ``increment``, sustaining an endless full-rebuild loop.
|
|
3679
|
+
"""
|
|
3672
3680
|
index_dir = ladybug_path.parent
|
|
3673
3681
|
tracker = FileHashTracker(index_dir)
|
|
3674
|
-
tracker.load()
|
|
3675
3682
|
ignore = LayeredIgnore(source_root)
|
|
3676
3683
|
all_files: set[str] = set()
|
|
3677
3684
|
source_root_resolved = source_root.resolve()
|
|
@@ -3742,7 +3749,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
|
|
|
3742
3749
|
|
|
3743
3750
|
# Write declares_client edges
|
|
3744
3751
|
for row in tables.declares_client_rows:
|
|
3745
|
-
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
|
|
3752
|
+
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
|
|
3746
3753
|
conn.execute(_CREATE_DECLARES_CLIENT, {
|
|
3747
3754
|
"sid": row.symbol_id,
|
|
3748
3755
|
"cid": row.client_id,
|
|
@@ -3753,7 +3760,7 @@ def _write_clients_producers_and_calls(conn: ladybug.Connection, tables: GraphTa
|
|
|
3753
3760
|
|
|
3754
3761
|
# Write declares_producer edges
|
|
3755
3762
|
for row in tables.declares_producer_rows:
|
|
3756
|
-
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="")).file_path
|
|
3763
|
+
source_file = member_by_id.get(row.symbol_id, MemberEntry(kind="", decl=None, parent_id="", parent_fqn="", file_path="", module="", microservice="", node_id="")).file_path
|
|
3757
3764
|
conn.execute(_CREATE_DECLARES_PRODUCER, {
|
|
3758
3765
|
"sid": row.symbol_id,
|
|
3759
3766
|
"pid": row.producer_id,
|
|
@@ -6,6 +6,7 @@ from __future__ import annotations
|
|
|
6
6
|
import argparse
|
|
7
7
|
import asyncio
|
|
8
8
|
import json
|
|
9
|
+
import os
|
|
9
10
|
import pprint
|
|
10
11
|
import shutil
|
|
11
12
|
import sys
|
|
@@ -930,5 +931,21 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
930
931
|
return 2
|
|
931
932
|
|
|
932
933
|
|
|
934
|
+
def _console_script_main() -> None:
|
|
935
|
+
"""Real CLI entry: terminate without interpreter finalization.
|
|
936
|
+
|
|
937
|
+
A pyarrow/lance worker thread (loaded via lancedb in lifecycle commands) can
|
|
938
|
+
outlive CPython finalization in a one-shot CLI subprocess and trip
|
|
939
|
+
``PyGILState_Release`` (SIGABRT, exit -6). Flushing + ``os._exit`` skips that
|
|
940
|
+
racy teardown — the command has already done its work and emitted its result.
|
|
941
|
+
``main()`` stays return-based so in-process test callers (``cli.main(...)``)
|
|
942
|
+
keep working.
|
|
943
|
+
"""
|
|
944
|
+
rc = main()
|
|
945
|
+
sys.stdout.flush()
|
|
946
|
+
sys.stderr.flush()
|
|
947
|
+
os._exit(rc)
|
|
948
|
+
|
|
949
|
+
|
|
933
950
|
if __name__ == "__main__":
|
|
934
|
-
|
|
951
|
+
_console_script_main()
|
|
@@ -52,14 +52,36 @@ _DEFAULT_EMBEDDING_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
|
|
|
52
52
|
_UNRESOLVED_VAR_RE = re.compile(r"\$(\w+|\{[^}]+\})")
|
|
53
53
|
|
|
54
54
|
|
|
55
|
-
def maybe_expand_embedding_model_path(
|
|
56
|
-
|
|
55
|
+
def maybe_expand_embedding_model_path(
|
|
56
|
+
value: str,
|
|
57
|
+
*,
|
|
58
|
+
config_dir: Path | None = None,
|
|
59
|
+
source_root: Path | None = None,
|
|
60
|
+
source: SettingSource | None = None,
|
|
61
|
+
) -> str:
|
|
62
|
+
"""Expand ``~`` / ``$VAR`` for path-shaped values and resolve relatives to absolute.
|
|
57
63
|
|
|
58
64
|
Path-shape: starts with ``/``, ``./``, ``../``, ``~``, or contains ``$``.
|
|
59
65
|
Plain ``org/name`` (hub id) does not match and is passed through unchanged.
|
|
60
66
|
|
|
61
|
-
|
|
62
|
-
|
|
67
|
+
Relative resolution mirrors :func:`_resolve_index_dir_path` so a committed
|
|
68
|
+
config is portable regardless of process CWD:
|
|
69
|
+
|
|
70
|
+
* YAML values (``source == "yaml"``) resolve against ``config_dir`` (the
|
|
71
|
+
directory holding ``.java-codebase-rag.yml``).
|
|
72
|
+
* CLI / env values resolve against ``source_root``.
|
|
73
|
+
|
|
74
|
+
Only a result that still starts with ``./`` or ``../`` *after* ``~`` /
|
|
75
|
+
``$VAR`` expansion is re-based — so hub ids (``org/name``), absolute paths,
|
|
76
|
+
``~/``-expanded paths, and an env var that already yielded an absolute path
|
|
77
|
+
are all left untouched.
|
|
78
|
+
|
|
79
|
+
When no base is supplied (the runtime ``SBERT_MODEL`` read via
|
|
80
|
+
:func:`resolved_sbert_model_for_process_env`), relative resolution is
|
|
81
|
+
skipped: the value is returned ``expandvars`` / ``expanduser``-expanded but
|
|
82
|
+
not re-based, matching the prior best-effort behavior. The main resolution
|
|
83
|
+
path (:func:`resolve_operator_config`) supplies a base, so the absolute path
|
|
84
|
+
it stores is what downstream loaders receive.
|
|
63
85
|
"""
|
|
64
86
|
needs_expand = value.startswith(("/", "./", "../", "~")) or "$" in value
|
|
65
87
|
if not needs_expand:
|
|
@@ -70,9 +92,31 @@ def maybe_expand_embedding_model_path(value: str) -> str:
|
|
|
70
92
|
f"java-codebase-rag: path-shaped model string contains unresolved variable: {expanded}",
|
|
71
93
|
file=sys.stderr,
|
|
72
94
|
)
|
|
95
|
+
if expanded.startswith(("./", "../")):
|
|
96
|
+
base = _embedding_model_base(
|
|
97
|
+
source=source, config_dir=config_dir, source_root=source_root
|
|
98
|
+
)
|
|
99
|
+
if base is not None:
|
|
100
|
+
return str((base / expanded).resolve())
|
|
73
101
|
return expanded
|
|
74
102
|
|
|
75
103
|
|
|
104
|
+
def _embedding_model_base(
|
|
105
|
+
*,
|
|
106
|
+
source: SettingSource | None,
|
|
107
|
+
config_dir: Path | None,
|
|
108
|
+
source_root: Path | None,
|
|
109
|
+
) -> Path | None:
|
|
110
|
+
"""Base directory for a relative ``embedding.model``.
|
|
111
|
+
|
|
112
|
+
Mirrors :func:`_resolve_index_dir_path`: YAML values anchor on the config
|
|
113
|
+
file's directory; CLI / env values anchor on the resolved ``source_root``.
|
|
114
|
+
"""
|
|
115
|
+
if source == "yaml":
|
|
116
|
+
return config_dir
|
|
117
|
+
return source_root
|
|
118
|
+
|
|
119
|
+
|
|
76
120
|
def resolved_sbert_model_for_process_env(import_time_default: str) -> str:
|
|
77
121
|
"""``SBERT_MODEL`` from the process environment, with the same expansion as YAML/CLI resolution.
|
|
78
122
|
|
|
@@ -306,9 +350,19 @@ def _pick_bool(
|
|
|
306
350
|
def _resolve_index_dir_path(
|
|
307
351
|
*,
|
|
308
352
|
source_root: Path,
|
|
353
|
+
config_dir: Path,
|
|
309
354
|
cli_index_dir: str | None,
|
|
310
355
|
yaml_dict: dict[str, Any],
|
|
311
356
|
) -> tuple[Path, SettingSource]:
|
|
357
|
+
# Bases for relative paths:
|
|
358
|
+
# - YAML ``index_dir`` -> the config file's directory (``config_dir``),
|
|
359
|
+
# the SAME base used for YAML ``source_root``. Paths written in the
|
|
360
|
+
# config file are relative to the file, so both keys stay consistent.
|
|
361
|
+
# - CLI / env ``index_dir`` -> ``source_root`` (unchanged). These are not
|
|
362
|
+
# "in the config file"; preserving the existing base avoids a semantics
|
|
363
|
+
# change for operators who pass ``--index-dir`` on the command line.
|
|
364
|
+
# - Default ``./.java-codebase-rag`` -> ``source_root`` so the index sits
|
|
365
|
+
# beside the Java tree (the layout ``discover_project_root`` anchors on).
|
|
312
366
|
raw_cli = cli_index_dir.strip() if isinstance(cli_index_dir, str) else None
|
|
313
367
|
if raw_cli:
|
|
314
368
|
p = Path(raw_cli).expanduser()
|
|
@@ -324,7 +378,7 @@ def _resolve_index_dir_path(
|
|
|
324
378
|
idx = yaml_dict.get("index_dir")
|
|
325
379
|
if isinstance(idx, str) and idx.strip():
|
|
326
380
|
p = Path(idx.strip()).expanduser()
|
|
327
|
-
out = p.resolve() if p.is_absolute() else (
|
|
381
|
+
out = p.resolve() if p.is_absolute() else (config_dir / p).resolve()
|
|
328
382
|
return out, "yaml"
|
|
329
383
|
|
|
330
384
|
return (source_root / ".java-codebase-rag").resolve(), "default"
|
|
@@ -368,7 +422,7 @@ def resolve_operator_config(
|
|
|
368
422
|
root = config_dir
|
|
369
423
|
|
|
370
424
|
index_dir, index_src = _resolve_index_dir_path(
|
|
371
|
-
source_root=root, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
|
|
425
|
+
source_root=root, config_dir=config_dir, cli_index_dir=cli_index_dir, yaml_dict=yaml_dict
|
|
372
426
|
)
|
|
373
427
|
model, model_src = _pick_str(
|
|
374
428
|
cli_val=cli_embedding_model,
|
|
@@ -377,7 +431,12 @@ def resolve_operator_config(
|
|
|
377
431
|
yaml_path=("embedding", "model"),
|
|
378
432
|
default=_DEFAULT_EMBEDDING_MODEL,
|
|
379
433
|
)
|
|
380
|
-
model = maybe_expand_embedding_model_path(
|
|
434
|
+
model = maybe_expand_embedding_model_path(
|
|
435
|
+
model,
|
|
436
|
+
config_dir=config_dir,
|
|
437
|
+
source_root=root,
|
|
438
|
+
source=model_src,
|
|
439
|
+
)
|
|
381
440
|
device, device_src = _pick_optional_device(
|
|
382
441
|
cli_val=cli_embedding_device,
|
|
383
442
|
env_key="SBERT_DEVICE",
|
|
@@ -759,6 +759,11 @@ def generate_yaml_config(
|
|
|
759
759
|
else:
|
|
760
760
|
config["embedding"].pop("model", None)
|
|
761
761
|
|
|
762
|
+
# Seed cross-service resolution safe-by-default: only evidence-backed cross-service
|
|
763
|
+
# edges survive (see _is_brownfield_sourced in build_ast_graph). setdefault preserves
|
|
764
|
+
# an explicit user choice (e.g. `auto`) on re-run update.
|
|
765
|
+
config.setdefault("cross_service_resolution", "brownfield_only")
|
|
766
|
+
|
|
762
767
|
# Keys NOT written by installer (preserved if present):
|
|
763
768
|
# - source_root (config.py resolves from walk-up discovery)
|
|
764
769
|
# - index_dir (config.py defaults to <source_root>/.java-codebase-rag)
|
|
@@ -1250,9 +1255,14 @@ def run_update(
|
|
|
1250
1255
|
print("Skipping index update.")
|
|
1251
1256
|
return EXIT_PARTIAL if has_artifact_failures else EXIT_SUCCESS
|
|
1252
1257
|
|
|
1253
|
-
# Resolve configuration
|
|
1258
|
+
# Resolve configuration. Pass source_root=None so the YAML ``source_root``
|
|
1259
|
+
# field is honored exactly like increment/init/reprocess — passing the
|
|
1260
|
+
# discovered config dir here routes resolve_operator_config into the
|
|
1261
|
+
# explicit-override branch that SKIPS the YAML field, which made `update`
|
|
1262
|
+
# point cocoindex at the config dir (no Java) against the real index and
|
|
1263
|
+
# mass-delete it. Discovery still runs against the CLI's cwd.
|
|
1254
1264
|
try:
|
|
1255
|
-
cfg = resolve_operator_config(source_root=
|
|
1265
|
+
cfg = resolve_operator_config(source_root=None, cli_index_dir=None)
|
|
1256
1266
|
index_dir = cfg.index_dir
|
|
1257
1267
|
except Exception as e:
|
|
1258
1268
|
print(f"\nWarning: Failed to resolve configuration: {e}")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.2
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -35,6 +35,7 @@ Requires-Dist: unidiff<1,>=0.7.3
|
|
|
35
35
|
Provides-Extra: dev
|
|
36
36
|
Requires-Dist: pytest>=7; extra == "dev"
|
|
37
37
|
Requires-Dist: pytest-asyncio>=0.21; extra == "dev"
|
|
38
|
+
Requires-Dist: pytest-xdist>=3; extra == "dev"
|
|
38
39
|
Requires-Dist: ruff>=0.4; extra == "dev"
|
|
39
40
|
Dynamic: license-file
|
|
40
41
|
|
|
@@ -15,7 +15,10 @@ from ast_java import (
|
|
|
15
15
|
_TYPE_ANN_TO_CAPABILITY,
|
|
16
16
|
)
|
|
17
17
|
|
|
18
|
-
# Roles: Spring stereotype values plus DTO
|
|
18
|
+
# Roles assignable by indexing: Spring stereotype values plus DTO. ``OTHER`` is the
|
|
19
|
+
# built-in inference fallback (ast_java.infer_role when nothing matches) and is
|
|
20
|
+
# deliberately excluded here — it is a read-side value (the mcp_v2 ``Role`` enum
|
|
21
|
+
# includes it) but not a role a user may set via @CodebaseRole / role_overrides.
|
|
19
22
|
VALID_ROLES: frozenset[str] = frozenset((*ROLE_ANNOTATIONS.values(), "DTO"))
|
|
20
23
|
|
|
21
24
|
VALID_CAPABILITIES: frozenset[str] = frozenset(
|
|
@@ -48,6 +48,22 @@ def _hints_or_skip(tool: str, payload: dict) -> tuple[list, list]:
|
|
|
48
48
|
|
|
49
49
|
DeclarationSymbolKind = Literal["class", "interface", "enum", "record", "annotation", "method", "constructor"]
|
|
50
50
|
|
|
51
|
+
# Closed value taxonomies surfaced to MCP consumers as enums. Sources of truth:
|
|
52
|
+
# Role — VALID_ROLES in java_ontology.py + the "OTHER" inference fallback (ast_java.infer_role)
|
|
53
|
+
# Framework — hardcoded literals across ast_java.py / build_ast_graph.py
|
|
54
|
+
# SourceLayer — exhaustive classifier build_ast_graph._client_source_layer / _producer_source_layer
|
|
55
|
+
# ClientKind — VALID_CLIENT_KINDS in java_ontology.py (every producer validated at index time)
|
|
56
|
+
# ProducerKind — VALID_PRODUCER_KINDS in java_ontology.py (every producer validated at index time)
|
|
57
|
+
# Keep these in sync with the indexing-side taxonomies if they change.
|
|
58
|
+
Role = Literal[
|
|
59
|
+
"CONTROLLER", "SERVICE", "REPOSITORY", "COMPONENT", "CONFIG",
|
|
60
|
+
"ENTITY", "CLIENT", "MAPPER", "DTO", "OTHER",
|
|
61
|
+
]
|
|
62
|
+
Framework = Literal["spring_mvc", "webflux", "kafka", "rabbitmq", "jms", "stream", "feign", ""]
|
|
63
|
+
SourceLayer = Literal["builtin", "layer_a_meta", "layer_b_ann", "layer_b_fqn", "layer_c_source"]
|
|
64
|
+
ClientKind = Literal["feign_method", "rest_template", "web_client"]
|
|
65
|
+
ProducerKind = Literal["kafka_send", "stream_bridge_send"]
|
|
66
|
+
|
|
51
67
|
# Stored graph edge labels for one-hop neighbors. Composed DECLARES.* and OVERRIDDEN_BY.*
|
|
52
68
|
# dot-keys are separate ComposedEdgeType literals (2-hop traversal). Stored OVERRIDES is an EdgeType.
|
|
53
69
|
EdgeType = Literal[
|
|
@@ -133,21 +149,30 @@ class NodeFilter(BaseModel):
|
|
|
133
149
|
|
|
134
150
|
microservice: str | None = None
|
|
135
151
|
module: str | None = None
|
|
136
|
-
source_layer:
|
|
137
|
-
role:
|
|
138
|
-
exclude_roles: list[
|
|
152
|
+
source_layer: SourceLayer | None = None
|
|
153
|
+
role: Role | None = None
|
|
154
|
+
exclude_roles: list[Role] | None = None
|
|
139
155
|
annotation: str | None = None
|
|
140
156
|
capability: str | None = None
|
|
141
157
|
fqn_prefix: str | None = None
|
|
142
158
|
symbol_kind: DeclarationSymbolKind | None = None
|
|
143
159
|
symbol_kinds: list[DeclarationSymbolKind] | None = None
|
|
144
|
-
http_method: str | None =
|
|
160
|
+
http_method: str | None = Field(
|
|
161
|
+
default=None,
|
|
162
|
+
description="HTTP verb (commonly GET/POST/PUT/DELETE/PATCH; user route annotations may yield others).",
|
|
163
|
+
)
|
|
145
164
|
path_prefix: str | None = None
|
|
146
|
-
framework:
|
|
147
|
-
client_kind:
|
|
165
|
+
framework: Framework | None = None
|
|
166
|
+
client_kind: ClientKind | None = Field(
|
|
167
|
+
default=None,
|
|
168
|
+
description="Outbound HTTP client kind: feign_method, rest_template, or web_client.",
|
|
169
|
+
)
|
|
148
170
|
target_service: str | None = None
|
|
149
171
|
target_path_prefix: str | None = None
|
|
150
|
-
producer_kind:
|
|
172
|
+
producer_kind: ProducerKind | None = Field(
|
|
173
|
+
default=None,
|
|
174
|
+
description="Outbound async producer kind: kafka_send or stream_bridge_send.",
|
|
175
|
+
)
|
|
151
176
|
topic_prefix: str | None = None
|
|
152
177
|
|
|
153
178
|
|
|
@@ -157,9 +182,9 @@ class EdgeFilter(BaseModel):
|
|
|
157
182
|
min_confidence: float | None = None
|
|
158
183
|
exclude_strategies: list[str] | None = None
|
|
159
184
|
include_strategies: list[str] | None = None
|
|
160
|
-
callee_declaring_role:
|
|
161
|
-
callee_declaring_roles: list[
|
|
162
|
-
exclude_callee_declaring_roles: list[
|
|
185
|
+
callee_declaring_role: Role | None = None
|
|
186
|
+
callee_declaring_roles: list[Role] | None = None
|
|
187
|
+
exclude_callee_declaring_roles: list[Role] | None = None
|
|
163
188
|
|
|
164
189
|
@model_validator(mode="after")
|
|
165
190
|
def _strategy_axes_mutually_exclusive(self) -> EdgeFilter:
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "java-codebase-rag"
|
|
7
|
-
version = "0.6.
|
|
7
|
+
version = "0.6.2"
|
|
8
8
|
description = "MCP server for semantic + structural search over Java codebases"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -43,6 +43,7 @@ dependencies = [
|
|
|
43
43
|
dev = [
|
|
44
44
|
"pytest>=7",
|
|
45
45
|
"pytest-asyncio>=0.21",
|
|
46
|
+
"pytest-xdist>=3",
|
|
46
47
|
"ruff>=0.4",
|
|
47
48
|
]
|
|
48
49
|
|
|
@@ -52,7 +53,7 @@ Repository = "https://github.com/HumanBean17/java-codebase-rag"
|
|
|
52
53
|
Issues = "https://github.com/HumanBean17/java-codebase-rag/issues"
|
|
53
54
|
|
|
54
55
|
[project.scripts]
|
|
55
|
-
java-codebase-rag = "java_codebase_rag.cli:
|
|
56
|
+
java-codebase-rag = "java_codebase_rag.cli:_console_script_main"
|
|
56
57
|
java-codebase-rag-mcp = "server:main"
|
|
57
58
|
|
|
58
59
|
[tool.setuptools]
|