java-codebase-rag 0.2.0__tar.gz → 0.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {java_codebase_rag-0.2.0/java_codebase_rag.egg-info → java_codebase_rag-0.2.2}/PKG-INFO +27 -6
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/README.md +24 -5
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2/java_codebase_rag.egg-info}/PKG-INFO +27 -6
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/SOURCES.txt +1 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/requires.txt +2 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/pyproject.toml +3 -1
- java_codebase_rag-0.2.2/tests/test_agent_skills_static.py +250 -0
- java_codebase_rag-0.2.2/tests/test_packaging_metadata.py +14 -0
- java_codebase_rag-0.2.0/tests/test_agent_skills_static.py +0 -318
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/LICENSE +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/ast_java.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/brownfield_events.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/build_ast_graph.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/chunk_heuristics.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/graph_enrich.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/index_common.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag/__init__.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag/cli.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag/cli_progress.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag/config.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag/pipeline.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/dependency_links.txt +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/entry_points.txt +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/top_level.txt +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_index_flow_lancedb.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_index_v1_common.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_ontology.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/kuzu_queries.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/mcp_hints.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/mcp_v2.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/path_filtering.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/pr_analysis.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/search_lancedb.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/server.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/setup.cfg +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_assign_endpoint_client_extraction.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_ast_graph_build.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_ast_java_calls.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_ast_java_capabilities.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_bank_chat_brownfield_integration.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_brownfield_clients.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_brownfield_events.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_brownfield_overrides.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_brownfield_routes.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_edge_matching.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_edges_e2e.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_graph_receiver_resolution.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_graph_smoke_roundtrip.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_invariant.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_cli_progress_stdout_invariant.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_cli_quiet_parity.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_client_hint_recovery.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_client_node_extraction.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_client_role_rename.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_cross_service_resolution_flag.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_edge_navigation_doc.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_feign_not_exposer.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_graph_enrich.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_java_codebase_rag_cli.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_kuzu_queries.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_lancedb_e2e.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_mcp_hints.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_mcp_tools.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_mcp_v2.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_mcp_v2_compose.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_meta_chain_core.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_outgoing_call_extraction.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_path_filtering.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_pr_analysis.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_resolve_routes_messaging_layer_c.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_route_extraction.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_schema_consistency.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_search_lancedb.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_search_lancedb_capability.py +0 -0
- {java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_string_value_atoms.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,6 +18,7 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
18
18
|
Requires-Python: >=3.11
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: cocoindex[lancedb]<2,>=1.0.0a43
|
|
21
22
|
Requires-Dist: kuzu<0.12,>=0.11.3
|
|
22
23
|
Requires-Dist: lancedb<0.31,>=0.25.3
|
|
23
24
|
Requires-Dist: mcp<2,>=1.27.0
|
|
@@ -26,6 +27,7 @@ Requires-Dist: pathspec<2,>=1.0.4
|
|
|
26
27
|
Requires-Dist: pyarrow<24,>=23.0.1
|
|
27
28
|
Requires-Dist: PyYAML<7,>=6.0.3
|
|
28
29
|
Requires-Dist: sentence-transformers<6,>=5.4.0
|
|
30
|
+
Requires-Dist: transformers<=5.5.3,>=4.48.3
|
|
29
31
|
Requires-Dist: tree-sitter<0.26,>=0.25.2
|
|
30
32
|
Requires-Dist: tree-sitter-java<0.24,>=0.23.5
|
|
31
33
|
Requires-Dist: unidiff<1,>=0.7.3
|
|
@@ -50,6 +52,24 @@ For the design rationale, the GPS metaphor, and the full ontology, see [`docs/pa
|
|
|
50
52
|
|
|
51
53
|
---
|
|
52
54
|
|
|
55
|
+
## Why this exists
|
|
56
|
+
|
|
57
|
+
Generic code-search tools (grep, ctags, vector-only RAG) hit a ceiling on real Java microservice estates: they find files but lose the structure that makes a Spring/JAX-RS system navigable. This project is built around five choices that target that gap.
|
|
58
|
+
|
|
59
|
+
- **Hybrid RAG + GraphRAG, not either-or.** Semantic recall (LanceDB chunk vectors) and structural navigation (Kuzu property graph) are composed in one surface. `search` finds candidate nodes by meaning; `neighbors` walks the exact edge you care about (`CALLS`, `IMPLEMENTS`, `INJECTS`, `DECLARES_ROUTE`, …). The agent picks the right primitive per step instead of being forced into pure-vector or pure-symbol search.
|
|
60
|
+
|
|
61
|
+
- **A Java-tuned role model.** Symbols are labelled with stereotypes inferred from Spring and JAX-RS conventions — `CONTROLLER`, `SERVICE`, `REPOSITORY`, `CLIENT`, `PRODUCER`, `MAPPER`, `DTO`. Agents can ask "list controllers" or "who injects this repository" directly, instead of grep-ing for `@RestController` and hoping for the best. Roles drive both filtering (`find` with a `NodeFilter`) and ranking.
|
|
62
|
+
|
|
63
|
+
- **Ranking specialized for Java codebases.** The composite ranker is aware of role, microservice, and FQN structure — not a generic BM25. A search for `"chat ingress"` surfaces controllers before utility classes; a search scoped to one microservice doesn't drown in matches from the other 19. Defaults are tuned on the bank-chat fixture and exposed in `docs/CONFIGURATION.md` for per-repo overrides.
|
|
64
|
+
|
|
65
|
+
- **Cross-service resolution + system-level navigation.** `HTTP_CALLS` and `ASYNC_CALLS` edges connect Clients and Producers in one microservice to Routes and Handlers in another, resolved at index time from URL/topic strings + Spring `@FeignClient` / `RestTemplate` conventions. `/who-hits-route`, `/trace-request-flow`, and `/impact-of` use these to answer questions a single-service tool fundamentally can't — "who calls this REST endpoint from outside this service", "trace this Kafka message end-to-end", "if I change this DTO, which services break".
|
|
66
|
+
|
|
67
|
+
- **Brownfield annotations as a first-class override.** Real Java estates have hand-rolled HTTP clients, dynamic topic names, reflection-heavy routing. `@CodebaseHttpRoute`, `@CodebaseAsyncRoute`, `@CodebaseHttpClient`, and `@CodebaseProducer` let you pin the truth in source. They have **exclusive priority** — when a symbol is annotated, framework-convention inference is skipped entirely. You get a correct graph on legacy code without rewriting it.
|
|
68
|
+
|
|
69
|
+
The rest of this README is the install, walkthrough, and tool cheat sheet for putting that to work.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
53
73
|
## Install
|
|
54
74
|
|
|
55
75
|
```bash
|
|
@@ -57,6 +77,7 @@ pip install java-codebase-rag
|
|
|
57
77
|
```
|
|
58
78
|
|
|
59
79
|
Python **3.11+** required. After install, `java-codebase-rag --help` should print the CLI groups.
|
|
80
|
+
The package includes the CocoIndex lifecycle dependency used by `init`, `increment`, `reprocess`, and `erase`.
|
|
60
81
|
|
|
61
82
|
> **Stability disclaimer.** This package does **not** promise backward compatibility. MCP tool contracts, env vars, Lance/Kuzu schemas, config files, and Python APIs may change without a deprecation period. Track `main` and rebuild indexes when ontology or embedding settings change.
|
|
62
83
|
|
|
@@ -132,9 +153,9 @@ See [`mcp.json.example`](./mcp.json.example) for the same shape in `.mcp.json` (
|
|
|
132
153
|
|
|
133
154
|
Pick **one** of two options (not both — they cover the same navigation intents):
|
|
134
155
|
|
|
135
|
-
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and
|
|
156
|
+
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and navigation patterns. Self-contained — no external file dependencies.
|
|
136
157
|
|
|
137
|
-
2. **[`
|
|
158
|
+
2. **[`/explore-codebase`](./skills/explore-codebase/SKILL.md)** (for hosts with skill discovery) — single self-contained skill with the complete operating manual. If your MCP host supports skill discovery (Claude Code, Qwen Code, Cursor), load `/explore-codebase` to get the full tool reference, edge taxonomy, decision tree, and recovery playbook in one shot.
|
|
138
159
|
|
|
139
160
|
Also: **[`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md)** — 7-phase agent-driven verification you run after indexing your real project.
|
|
140
161
|
|
|
@@ -154,7 +175,7 @@ Full schemas, `NodeFilter` / `EdgeFilter` semantics, and the hints contract live
|
|
|
154
175
|
|
|
155
176
|
### Three-layer architecture
|
|
156
177
|
|
|
157
|
-
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (
|
|
178
|
+
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (skill). The [`/explore-codebase`](./skills/explore-codebase/SKILL.md) skill provides the full operating manual for Layer 2. See the [architecture diagram in `skills/README.md`](./skills/README.md#three-layer-architecture).
|
|
158
179
|
|
|
159
180
|
---
|
|
160
181
|
|
|
@@ -197,7 +218,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
|
|
|
197
218
|
| [`docs/CONFIGURATION.md`](./docs/CONFIGURATION.md) | Environment variables, project YAML, graph ontology, brownfield overrides, ignore patterns. |
|
|
198
219
|
| [`docs/JAVA-CODEBASE-RAG-CLI.md`](./docs/JAVA-CODEBASE-RAG-CLI.md) | CLI operator playbook: workflows, exit codes, env alignment. |
|
|
199
220
|
| [`docs/EDGE-NAVIGATION.md`](./docs/EDGE-NAVIGATION.md) | MCP-traversable edges, directions, dot-key composition. |
|
|
200
|
-
| [`skills/`](./skills/) |
|
|
221
|
+
| [`skills/`](./skills/) | Single `/explore-codebase` skill — complete MCP operating manual for hosts with skill discovery (alternative to copy-pasting AGENT-GUIDE). See [`skills/README.md`](./skills/README.md). |
|
|
201
222
|
| [`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md) | 7-phase agent-driven verification after indexing your project. |
|
|
202
223
|
| [`docs/CODEBASE_REQUIREMENTS.md`](./docs/CODEBASE_REQUIREMENTS.md) | Assumptions about your Java repo + per-file edit map for non-conforming codebases. |
|
|
203
224
|
| [`automation/cursor_propose_only/README.md`](./automation/cursor_propose_only/README.md) | Optional proposal orchestration workflow (single-command autopilot, planning bundles, automated execution/review loops). |
|
|
@@ -214,7 +235,7 @@ python3 -m venv .venv
|
|
|
214
235
|
.venv/bin/pip install -r requirements.txt
|
|
215
236
|
```
|
|
216
237
|
|
|
217
|
-
The `cocoindex` package
|
|
238
|
+
The `cocoindex` package powers lifecycle commands that run the indexer (`init`, `increment`, `reprocess`, `erase`). Search and MCP navigation do not invoke it directly.
|
|
218
239
|
|
|
219
240
|
The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloaded on first `init`). Override via the `EMBEDDING_MODEL` env var — see [`docs/CONFIGURATION.md` §1](./docs/CONFIGURATION.md#1-environment-variables).
|
|
220
241
|
|
|
@@ -17,6 +17,24 @@ For the design rationale, the GPS metaphor, and the full ontology, see [`docs/pa
|
|
|
17
17
|
|
|
18
18
|
---
|
|
19
19
|
|
|
20
|
+
## Why this exists
|
|
21
|
+
|
|
22
|
+
Generic code-search tools (grep, ctags, vector-only RAG) hit a ceiling on real Java microservice estates: they find files but lose the structure that makes a Spring/JAX-RS system navigable. This project is built around five choices that target that gap.
|
|
23
|
+
|
|
24
|
+
- **Hybrid RAG + GraphRAG, not either-or.** Semantic recall (LanceDB chunk vectors) and structural navigation (Kuzu property graph) are composed in one surface. `search` finds candidate nodes by meaning; `neighbors` walks the exact edge you care about (`CALLS`, `IMPLEMENTS`, `INJECTS`, `DECLARES_ROUTE`, …). The agent picks the right primitive per step instead of being forced into pure-vector or pure-symbol search.
|
|
25
|
+
|
|
26
|
+
- **A Java-tuned role model.** Symbols are labelled with stereotypes inferred from Spring and JAX-RS conventions — `CONTROLLER`, `SERVICE`, `REPOSITORY`, `CLIENT`, `PRODUCER`, `MAPPER`, `DTO`. Agents can ask "list controllers" or "who injects this repository" directly, instead of grep-ing for `@RestController` and hoping for the best. Roles drive both filtering (`find` with a `NodeFilter`) and ranking.
|
|
27
|
+
|
|
28
|
+
- **Ranking specialized for Java codebases.** The composite ranker is aware of role, microservice, and FQN structure — not a generic BM25. A search for `"chat ingress"` surfaces controllers before utility classes; a search scoped to one microservice doesn't drown in matches from the other 19. Defaults are tuned on the bank-chat fixture and exposed in `docs/CONFIGURATION.md` for per-repo overrides.
|
|
29
|
+
|
|
30
|
+
- **Cross-service resolution + system-level navigation.** `HTTP_CALLS` and `ASYNC_CALLS` edges connect Clients and Producers in one microservice to Routes and Handlers in another, resolved at index time from URL/topic strings + Spring `@FeignClient` / `RestTemplate` conventions. `/who-hits-route`, `/trace-request-flow`, and `/impact-of` use these to answer questions a single-service tool fundamentally can't — "who calls this REST endpoint from outside this service", "trace this Kafka message end-to-end", "if I change this DTO, which services break".
|
|
31
|
+
|
|
32
|
+
- **Brownfield annotations as a first-class override.** Real Java estates have hand-rolled HTTP clients, dynamic topic names, reflection-heavy routing. `@CodebaseHttpRoute`, `@CodebaseAsyncRoute`, `@CodebaseHttpClient`, and `@CodebaseProducer` let you pin the truth in source. They have **exclusive priority** — when a symbol is annotated, framework-convention inference is skipped entirely. You get a correct graph on legacy code without rewriting it.
|
|
33
|
+
|
|
34
|
+
The rest of this README is the install, walkthrough, and tool cheat sheet for putting that to work.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
20
38
|
## Install
|
|
21
39
|
|
|
22
40
|
```bash
|
|
@@ -24,6 +42,7 @@ pip install java-codebase-rag
|
|
|
24
42
|
```
|
|
25
43
|
|
|
26
44
|
Python **3.11+** required. After install, `java-codebase-rag --help` should print the CLI groups.
|
|
45
|
+
The package includes the CocoIndex lifecycle dependency used by `init`, `increment`, `reprocess`, and `erase`.
|
|
27
46
|
|
|
28
47
|
> **Stability disclaimer.** This package does **not** promise backward compatibility. MCP tool contracts, env vars, Lance/Kuzu schemas, config files, and Python APIs may change without a deprecation period. Track `main` and rebuild indexes when ontology or embedding settings change.
|
|
29
48
|
|
|
@@ -99,9 +118,9 @@ See [`mcp.json.example`](./mcp.json.example) for the same shape in `.mcp.json` (
|
|
|
99
118
|
|
|
100
119
|
Pick **one** of two options (not both — they cover the same navigation intents):
|
|
101
120
|
|
|
102
|
-
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and
|
|
121
|
+
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and navigation patterns. Self-contained — no external file dependencies.
|
|
103
122
|
|
|
104
|
-
2. **[`
|
|
123
|
+
2. **[`/explore-codebase`](./skills/explore-codebase/SKILL.md)** (for hosts with skill discovery) — single self-contained skill with the complete operating manual. If your MCP host supports skill discovery (Claude Code, Qwen Code, Cursor), load `/explore-codebase` to get the full tool reference, edge taxonomy, decision tree, and recovery playbook in one shot.
|
|
105
124
|
|
|
106
125
|
Also: **[`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md)** — 7-phase agent-driven verification you run after indexing your real project.
|
|
107
126
|
|
|
@@ -121,7 +140,7 @@ Full schemas, `NodeFilter` / `EdgeFilter` semantics, and the hints contract live
|
|
|
121
140
|
|
|
122
141
|
### Three-layer architecture
|
|
123
142
|
|
|
124
|
-
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (
|
|
143
|
+
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (skill). The [`/explore-codebase`](./skills/explore-codebase/SKILL.md) skill provides the full operating manual for Layer 2. See the [architecture diagram in `skills/README.md`](./skills/README.md#three-layer-architecture).
|
|
125
144
|
|
|
126
145
|
---
|
|
127
146
|
|
|
@@ -164,7 +183,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
|
|
|
164
183
|
| [`docs/CONFIGURATION.md`](./docs/CONFIGURATION.md) | Environment variables, project YAML, graph ontology, brownfield overrides, ignore patterns. |
|
|
165
184
|
| [`docs/JAVA-CODEBASE-RAG-CLI.md`](./docs/JAVA-CODEBASE-RAG-CLI.md) | CLI operator playbook: workflows, exit codes, env alignment. |
|
|
166
185
|
| [`docs/EDGE-NAVIGATION.md`](./docs/EDGE-NAVIGATION.md) | MCP-traversable edges, directions, dot-key composition. |
|
|
167
|
-
| [`skills/`](./skills/) |
|
|
186
|
+
| [`skills/`](./skills/) | Single `/explore-codebase` skill — complete MCP operating manual for hosts with skill discovery (alternative to copy-pasting AGENT-GUIDE). See [`skills/README.md`](./skills/README.md). |
|
|
168
187
|
| [`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md) | 7-phase agent-driven verification after indexing your project. |
|
|
169
188
|
| [`docs/CODEBASE_REQUIREMENTS.md`](./docs/CODEBASE_REQUIREMENTS.md) | Assumptions about your Java repo + per-file edit map for non-conforming codebases. |
|
|
170
189
|
| [`automation/cursor_propose_only/README.md`](./automation/cursor_propose_only/README.md) | Optional proposal orchestration workflow (single-command autopilot, planning bundles, automated execution/review loops). |
|
|
@@ -181,7 +200,7 @@ python3 -m venv .venv
|
|
|
181
200
|
.venv/bin/pip install -r requirements.txt
|
|
182
201
|
```
|
|
183
202
|
|
|
184
|
-
The `cocoindex` package
|
|
203
|
+
The `cocoindex` package powers lifecycle commands that run the indexer (`init`, `increment`, `reprocess`, `erase`). Search and MCP navigation do not invoke it directly.
|
|
185
204
|
|
|
186
205
|
The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloaded on first `init`). Override via the `EMBEDDING_MODEL` env var — see [`docs/CONFIGURATION.md` §1](./docs/CONFIGURATION.md#1-environment-variables).
|
|
187
206
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: java-codebase-rag
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.2
|
|
4
4
|
Summary: MCP server for semantic + structural search over Java codebases
|
|
5
5
|
Author: HumanBean17
|
|
6
6
|
License-Expression: MIT
|
|
@@ -18,6 +18,7 @@ Classifier: Topic :: Software Development :: Libraries
|
|
|
18
18
|
Requires-Python: >=3.11
|
|
19
19
|
Description-Content-Type: text/markdown
|
|
20
20
|
License-File: LICENSE
|
|
21
|
+
Requires-Dist: cocoindex[lancedb]<2,>=1.0.0a43
|
|
21
22
|
Requires-Dist: kuzu<0.12,>=0.11.3
|
|
22
23
|
Requires-Dist: lancedb<0.31,>=0.25.3
|
|
23
24
|
Requires-Dist: mcp<2,>=1.27.0
|
|
@@ -26,6 +27,7 @@ Requires-Dist: pathspec<2,>=1.0.4
|
|
|
26
27
|
Requires-Dist: pyarrow<24,>=23.0.1
|
|
27
28
|
Requires-Dist: PyYAML<7,>=6.0.3
|
|
28
29
|
Requires-Dist: sentence-transformers<6,>=5.4.0
|
|
30
|
+
Requires-Dist: transformers<=5.5.3,>=4.48.3
|
|
29
31
|
Requires-Dist: tree-sitter<0.26,>=0.25.2
|
|
30
32
|
Requires-Dist: tree-sitter-java<0.24,>=0.23.5
|
|
31
33
|
Requires-Dist: unidiff<1,>=0.7.3
|
|
@@ -50,6 +52,24 @@ For the design rationale, the GPS metaphor, and the full ontology, see [`docs/pa
|
|
|
50
52
|
|
|
51
53
|
---
|
|
52
54
|
|
|
55
|
+
## Why this exists
|
|
56
|
+
|
|
57
|
+
Generic code-search tools (grep, ctags, vector-only RAG) hit a ceiling on real Java microservice estates: they find files but lose the structure that makes a Spring/JAX-RS system navigable. This project is built around five choices that target that gap.
|
|
58
|
+
|
|
59
|
+
- **Hybrid RAG + GraphRAG, not either-or.** Semantic recall (LanceDB chunk vectors) and structural navigation (Kuzu property graph) are composed in one surface. `search` finds candidate nodes by meaning; `neighbors` walks the exact edge you care about (`CALLS`, `IMPLEMENTS`, `INJECTS`, `DECLARES_ROUTE`, …). The agent picks the right primitive per step instead of being forced into pure-vector or pure-symbol search.
|
|
60
|
+
|
|
61
|
+
- **A Java-tuned role model.** Symbols are labelled with stereotypes inferred from Spring and JAX-RS conventions — `CONTROLLER`, `SERVICE`, `REPOSITORY`, `CLIENT`, `PRODUCER`, `MAPPER`, `DTO`. Agents can ask "list controllers" or "who injects this repository" directly, instead of grep-ing for `@RestController` and hoping for the best. Roles drive both filtering (`find` with a `NodeFilter`) and ranking.
|
|
62
|
+
|
|
63
|
+
- **Ranking specialized for Java codebases.** The composite ranker is aware of role, microservice, and FQN structure — not a generic BM25. A search for `"chat ingress"` surfaces controllers before utility classes; a search scoped to one microservice doesn't drown in matches from the other 19. Defaults are tuned on the bank-chat fixture and exposed in `docs/CONFIGURATION.md` for per-repo overrides.
|
|
64
|
+
|
|
65
|
+
- **Cross-service resolution + system-level navigation.** `HTTP_CALLS` and `ASYNC_CALLS` edges connect Clients and Producers in one microservice to Routes and Handlers in another, resolved at index time from URL/topic strings + Spring `@FeignClient` / `RestTemplate` conventions. `/who-hits-route`, `/trace-request-flow`, and `/impact-of` use these to answer questions a single-service tool fundamentally can't — "who calls this REST endpoint from outside this service", "trace this Kafka message end-to-end", "if I change this DTO, which services break".
|
|
66
|
+
|
|
67
|
+
- **Brownfield annotations as a first-class override.** Real Java estates have hand-rolled HTTP clients, dynamic topic names, reflection-heavy routing. `@CodebaseHttpRoute`, `@CodebaseAsyncRoute`, `@CodebaseHttpClient`, and `@CodebaseProducer` let you pin the truth in source. They have **exclusive priority** — when a symbol is annotated, framework-convention inference is skipped entirely. You get a correct graph on legacy code without rewriting it.
|
|
68
|
+
|
|
69
|
+
The rest of this README is the install, walkthrough, and tool cheat sheet for putting that to work.
|
|
70
|
+
|
|
71
|
+
---
|
|
72
|
+
|
|
53
73
|
## Install
|
|
54
74
|
|
|
55
75
|
```bash
|
|
@@ -57,6 +77,7 @@ pip install java-codebase-rag
|
|
|
57
77
|
```
|
|
58
78
|
|
|
59
79
|
Python **3.11+** required. After install, `java-codebase-rag --help` should print the CLI groups.
|
|
80
|
+
The package includes the CocoIndex lifecycle dependency used by `init`, `increment`, `reprocess`, and `erase`.
|
|
60
81
|
|
|
61
82
|
> **Stability disclaimer.** This package does **not** promise backward compatibility. MCP tool contracts, env vars, Lance/Kuzu schemas, config files, and Python APIs may change without a deprecation period. Track `main` and rebuild indexes when ontology or embedding settings change.
|
|
62
83
|
|
|
@@ -132,9 +153,9 @@ See [`mcp.json.example`](./mcp.json.example) for the same shape in `.mcp.json` (
|
|
|
132
153
|
|
|
133
154
|
Pick **one** of two options (not both — they cover the same navigation intents):
|
|
134
155
|
|
|
135
|
-
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and
|
|
156
|
+
1. **[`docs/AGENT-GUIDE.md`](./docs/AGENT-GUIDE.md)** (recommended for most) — standalone MCP operating manual. Copy-paste the `BEGIN`/`END` block into your project's `QWEN.md`, `CLAUDE.md`, or `AGENTS.md`. Contains: five-tool reference, `NodeFilter` / edge taxonomy, ontology glossary, recovery playbook, and navigation patterns. Self-contained — no external file dependencies.
|
|
136
157
|
|
|
137
|
-
2. **[`
|
|
158
|
+
2. **[`/explore-codebase`](./skills/explore-codebase/SKILL.md)** (for hosts with skill discovery) — single self-contained skill with the complete operating manual. If your MCP host supports skill discovery (Claude Code, Qwen Code, Cursor), load `/explore-codebase` to get the full tool reference, edge taxonomy, decision tree, and recovery playbook in one shot.
|
|
138
159
|
|
|
139
160
|
Also: **[`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md)** — 7-phase agent-driven verification you run after indexing your real project.
|
|
140
161
|
|
|
@@ -154,7 +175,7 @@ Full schemas, `NodeFilter` / `EdgeFilter` semantics, and the hints contract live
|
|
|
154
175
|
|
|
155
176
|
### Three-layer architecture
|
|
156
177
|
|
|
157
|
-
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (
|
|
178
|
+
Layer 1 (storage) → Layer 2 (5 MCP tools) → Layer 3 (skill). The [`/explore-codebase`](./skills/explore-codebase/SKILL.md) skill provides the full operating manual for Layer 2. See the [architecture diagram in `skills/README.md`](./skills/README.md#three-layer-architecture).
|
|
158
179
|
|
|
159
180
|
---
|
|
160
181
|
|
|
@@ -197,7 +218,7 @@ Run `java-codebase-rag --help` to list grouped subcommands. Operator playbook wi
|
|
|
197
218
|
| [`docs/CONFIGURATION.md`](./docs/CONFIGURATION.md) | Environment variables, project YAML, graph ontology, brownfield overrides, ignore patterns. |
|
|
198
219
|
| [`docs/JAVA-CODEBASE-RAG-CLI.md`](./docs/JAVA-CODEBASE-RAG-CLI.md) | CLI operator playbook: workflows, exit codes, env alignment. |
|
|
199
220
|
| [`docs/EDGE-NAVIGATION.md`](./docs/EDGE-NAVIGATION.md) | MCP-traversable edges, directions, dot-key composition. |
|
|
200
|
-
| [`skills/`](./skills/) |
|
|
221
|
+
| [`skills/`](./skills/) | Single `/explore-codebase` skill — complete MCP operating manual for hosts with skill discovery (alternative to copy-pasting AGENT-GUIDE). See [`skills/README.md`](./skills/README.md). |
|
|
201
222
|
| [`docs/MANUAL-VERIFICATION-CHECKLIST.md`](./docs/MANUAL-VERIFICATION-CHECKLIST.md) | 7-phase agent-driven verification after indexing your project. |
|
|
202
223
|
| [`docs/CODEBASE_REQUIREMENTS.md`](./docs/CODEBASE_REQUIREMENTS.md) | Assumptions about your Java repo + per-file edit map for non-conforming codebases. |
|
|
203
224
|
| [`automation/cursor_propose_only/README.md`](./automation/cursor_propose_only/README.md) | Optional proposal orchestration workflow (single-command autopilot, planning bundles, automated execution/review loops). |
|
|
@@ -214,7 +235,7 @@ python3 -m venv .venv
|
|
|
214
235
|
.venv/bin/pip install -r requirements.txt
|
|
215
236
|
```
|
|
216
237
|
|
|
217
|
-
The `cocoindex` package
|
|
238
|
+
The `cocoindex` package powers lifecycle commands that run the indexer (`init`, `increment`, `reprocess`, `erase`). Search and MCP navigation do not invoke it directly.
|
|
218
239
|
|
|
219
240
|
The default embedding model is `sentence-transformers/all-MiniLM-L6-v2` (downloaded on first `init`). Override via the `EMBEDDING_MODEL` env var — see [`docs/CONFIGURATION.md` §1](./docs/CONFIGURATION.md#1-environment-variables).
|
|
220
241
|
|
|
@@ -61,6 +61,7 @@ tests/test_mcp_v2.py
|
|
|
61
61
|
tests/test_mcp_v2_compose.py
|
|
62
62
|
tests/test_meta_chain_core.py
|
|
63
63
|
tests/test_outgoing_call_extraction.py
|
|
64
|
+
tests/test_packaging_metadata.py
|
|
64
65
|
tests/test_path_filtering.py
|
|
65
66
|
tests/test_pr_analysis.py
|
|
66
67
|
tests/test_resolve_routes_messaging_layer_c.py
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
cocoindex[lancedb]<2,>=1.0.0a43
|
|
1
2
|
kuzu<0.12,>=0.11.3
|
|
2
3
|
lancedb<0.31,>=0.25.3
|
|
3
4
|
mcp<2,>=1.27.0
|
|
@@ -6,6 +7,7 @@ pathspec<2,>=1.0.4
|
|
|
6
7
|
pyarrow<24,>=23.0.1
|
|
7
8
|
PyYAML<7,>=6.0.3
|
|
8
9
|
sentence-transformers<6,>=5.4.0
|
|
10
|
+
transformers<=5.5.3,>=4.48.3
|
|
9
11
|
tree-sitter<0.26,>=0.25.2
|
|
10
12
|
tree-sitter-java<0.24,>=0.23.5
|
|
11
13
|
unidiff<1,>=0.7.3
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "java-codebase-rag"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.2"
|
|
8
8
|
description = "MCP server for semantic + structural search over Java codebases"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
requires-python = ">=3.11"
|
|
@@ -23,6 +23,7 @@ classifiers = [
|
|
|
23
23
|
"Topic :: Software Development :: Libraries",
|
|
24
24
|
]
|
|
25
25
|
dependencies = [
|
|
26
|
+
"cocoindex[lancedb]>=1.0.0a43,<2",
|
|
26
27
|
"kuzu>=0.11.3,<0.12",
|
|
27
28
|
"lancedb>=0.25.3,<0.31",
|
|
28
29
|
"mcp>=1.27.0,<2",
|
|
@@ -31,6 +32,7 @@ dependencies = [
|
|
|
31
32
|
"pyarrow>=23.0.1,<24",
|
|
32
33
|
"PyYAML>=6.0.3,<7",
|
|
33
34
|
"sentence-transformers>=5.4.0,<6",
|
|
35
|
+
"transformers>=4.48.3,<=5.5.3",
|
|
34
36
|
"tree-sitter>=0.25.2,<0.26",
|
|
35
37
|
"tree-sitter-java>=0.23.5,<0.24",
|
|
36
38
|
"unidiff>=0.7.3,<1",
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
"""Static validation for skills/ directory SKILL.md files.
|
|
2
|
+
|
|
3
|
+
Imports allowlists from production code (mcp_v2, java_ontology) — not
|
|
4
|
+
hand-maintained lists. Validates:
|
|
5
|
+
- frontmatter (name + description present)
|
|
6
|
+
- MCP tool names referenced in skill body
|
|
7
|
+
- find kind values
|
|
8
|
+
- direction values
|
|
9
|
+
- edge_types values
|
|
10
|
+
- worked example section present
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import re
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import get_args
|
|
18
|
+
|
|
19
|
+
from java_ontology import NodeKind
|
|
20
|
+
from mcp_v2 import ComposedEdgeType, EdgeType
|
|
21
|
+
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
# Allowlists sourced from production code
|
|
24
|
+
# ---------------------------------------------------------------------------
|
|
25
|
+
|
|
26
|
+
_VALID_TOOLS: frozenset[str] = frozenset(["search", "find", "describe", "neighbors", "resolve"])
|
|
27
|
+
|
|
28
|
+
_VALID_KINDS: frozenset[str] = frozenset(k.lower() for k in get_args(NodeKind))
|
|
29
|
+
|
|
30
|
+
_VALID_DIRECTIONS: frozenset[str] = frozenset(["in", "out"])
|
|
31
|
+
|
|
32
|
+
_ALL_EDGE_TYPES: frozenset[str] = frozenset(get_args(EdgeType)) | frozenset(get_args(ComposedEdgeType))
|
|
33
|
+
|
|
34
|
+
# ---------------------------------------------------------------------------
|
|
35
|
+
# Helpers
|
|
36
|
+
# ---------------------------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
SKILLS_DIR = Path(__file__).resolve().parent.parent / "skills"
|
|
39
|
+
SKILL_NAME = "explore-codebase"
|
|
40
|
+
EXPECTED_SKILL_DIRS = {"explore-codebase", "navigate-codebase"}
|
|
41
|
+
SKILL_PATH = SKILLS_DIR / SKILL_NAME / "SKILL.md"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def _parse_frontmatter(text: str) -> dict[str, str]:
|
|
45
|
+
"""Parse simple YAML frontmatter (key: value pairs only)."""
|
|
46
|
+
m = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)
|
|
47
|
+
if not m:
|
|
48
|
+
return {}
|
|
49
|
+
result: dict[str, str] = {}
|
|
50
|
+
for line in m.group(1).splitlines():
|
|
51
|
+
if ":" in line:
|
|
52
|
+
key, _, value = line.partition(":")
|
|
53
|
+
result[key.strip()] = value.strip()
|
|
54
|
+
return result
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def _read_skill() -> tuple[dict[str, str], str]:
|
|
58
|
+
"""Read the explore-codebase SKILL.md and return (frontmatter, body)."""
|
|
59
|
+
text = SKILL_PATH.read_text(encoding="utf-8")
|
|
60
|
+
fm = _parse_frontmatter(text)
|
|
61
|
+
body = re.sub(r"^---\n.*?\n---\n*", "", text, count=1, flags=re.DOTALL)
|
|
62
|
+
return fm, body
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _extract_tool_refs(body: str) -> set[str]:
|
|
66
|
+
"""Extract tool names referenced in MCP call patterns."""
|
|
67
|
+
refs: set[str] = set()
|
|
68
|
+
for m in re.finditer(r"`(search|find|describe|neighbors|resolve)\b", body):
|
|
69
|
+
refs.add(m.group(1))
|
|
70
|
+
for m in re.finditer(r"\b(search|find|describe|neighbors|resolve)\s*[\(\{]", body):
|
|
71
|
+
refs.add(m.group(1))
|
|
72
|
+
return refs
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _extract_kind_refs(body: str) -> set[str]:
|
|
76
|
+
"""Extract find kind values from skill body."""
|
|
77
|
+
refs: set[str] = set()
|
|
78
|
+
for m in re.finditer(r'kind\s*=\s*["\']?(\w+)["\']?', body):
|
|
79
|
+
val = m.group(1).lower()
|
|
80
|
+
if val in _VALID_KINDS:
|
|
81
|
+
refs.add(val)
|
|
82
|
+
return refs
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def _extract_direction_refs(body: str) -> set[str]:
|
|
86
|
+
"""Extract direction values from skill body."""
|
|
87
|
+
refs: set[str] = set()
|
|
88
|
+
for m in re.finditer(r'direction\s*:\s*["\']?(in|out)["\']?', body):
|
|
89
|
+
refs.add(m.group(1))
|
|
90
|
+
return refs
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _extract_edge_type_refs(body: str) -> set[str]:
|
|
94
|
+
"""Extract edge_types values referenced in skill body."""
|
|
95
|
+
refs: set[str] = set()
|
|
96
|
+
for m in re.finditer(r'edge_types\s*:\s*\[([^\]]+)\]', body):
|
|
97
|
+
inner = m.group(1)
|
|
98
|
+
for val in re.findall(r'"(\w[\w.]*)"', inner):
|
|
99
|
+
if val in _ALL_EDGE_TYPES:
|
|
100
|
+
refs.add(val)
|
|
101
|
+
for m in re.finditer(r'\["(\w[\w.]*)"', body):
|
|
102
|
+
val = m.group(1)
|
|
103
|
+
if val in _ALL_EDGE_TYPES:
|
|
104
|
+
refs.add(val)
|
|
105
|
+
return refs
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
# ---------------------------------------------------------------------------
|
|
109
|
+
# Tests
|
|
110
|
+
# ---------------------------------------------------------------------------
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class TestSkillFrontmatter:
|
|
114
|
+
"""SKILL.md must have valid frontmatter."""
|
|
115
|
+
|
|
116
|
+
def test_skill_file_exists(self):
|
|
117
|
+
assert SKILL_PATH.is_file(), f"Missing {SKILL_PATH}"
|
|
118
|
+
|
|
119
|
+
def test_frontmatter_has_name_and_description(self):
|
|
120
|
+
fm, _ = _read_skill()
|
|
121
|
+
assert "name" in fm, "SKILL.md missing frontmatter 'name'"
|
|
122
|
+
assert fm["name"] == SKILL_NAME, f"name={fm['name']!r}, expected {SKILL_NAME!r}"
|
|
123
|
+
assert "description" in fm, "SKILL.md missing frontmatter 'description'"
|
|
124
|
+
assert len(fm["description"]) >= 20, (
|
|
125
|
+
f"description too short ({len(fm['description'])} chars)"
|
|
126
|
+
)
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class TestMCPToolReferences:
|
|
130
|
+
"""Tool names in skill body must be valid MCP navigation tools."""
|
|
131
|
+
|
|
132
|
+
def test_tool_refs_are_valid(self):
|
|
133
|
+
_, body = _read_skill()
|
|
134
|
+
refs = _extract_tool_refs(body)
|
|
135
|
+
invalid = refs - _VALID_TOOLS
|
|
136
|
+
assert not invalid, f"SKILL.md references invalid tools: {invalid}"
|
|
137
|
+
|
|
138
|
+
def test_skill_references_all_five_tools(self):
|
|
139
|
+
_, body = _read_skill()
|
|
140
|
+
refs = _extract_tool_refs(body)
|
|
141
|
+
missing = _VALID_TOOLS - refs
|
|
142
|
+
assert not missing, f"SKILL.md does not reference all 5 tools, missing: {missing}"
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class TestKindAndEdgeReferences:
|
|
146
|
+
"""Kind, direction, and edge_type values must match production allowlists."""
|
|
147
|
+
|
|
148
|
+
def test_kind_refs_are_valid(self):
|
|
149
|
+
_, body = _read_skill()
|
|
150
|
+
refs = _extract_kind_refs(body)
|
|
151
|
+
invalid = refs - _VALID_KINDS
|
|
152
|
+
assert not invalid, f"SKILL.md references invalid find kinds: {invalid}"
|
|
153
|
+
|
|
154
|
+
def test_direction_refs_are_valid(self):
|
|
155
|
+
_, body = _read_skill()
|
|
156
|
+
refs = _extract_direction_refs(body)
|
|
157
|
+
invalid = refs - _VALID_DIRECTIONS
|
|
158
|
+
assert not invalid, f"SKILL.md references invalid directions: {invalid}"
|
|
159
|
+
|
|
160
|
+
def test_edge_type_refs_are_valid(self):
|
|
161
|
+
_, body = _read_skill()
|
|
162
|
+
refs = _extract_edge_type_refs(body)
|
|
163
|
+
invalid = refs - _ALL_EDGE_TYPES
|
|
164
|
+
assert not invalid, f"SKILL.md references invalid edge_types: {invalid}"
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
class TestBodyStructure:
|
|
168
|
+
"""Skill body must contain key sections."""
|
|
169
|
+
|
|
170
|
+
def test_has_worked_example(self):
|
|
171
|
+
_, body = _read_skill()
|
|
172
|
+
assert "## Worked example" in body, "SKILL.md missing '## Worked example'"
|
|
173
|
+
|
|
174
|
+
def test_has_decision_tree(self):
|
|
175
|
+
_, body = _read_skill()
|
|
176
|
+
assert "## Decision tree" in body, "SKILL.md missing '## Decision tree'"
|
|
177
|
+
|
|
178
|
+
def test_has_recovery_playbook(self):
|
|
179
|
+
_, body = _read_skill()
|
|
180
|
+
assert "## Recovery playbook" in body, "SKILL.md missing '## Recovery playbook'"
|
|
181
|
+
|
|
182
|
+
def test_has_edge_taxonomy(self):
|
|
183
|
+
_, body = _read_skill()
|
|
184
|
+
assert "## Edge taxonomy" in body, "SKILL.md missing '## Edge taxonomy'"
|
|
185
|
+
|
|
186
|
+
def test_has_navigation_patterns(self):
|
|
187
|
+
_, body = _read_skill()
|
|
188
|
+
assert "## Common navigation patterns" in body, "SKILL.md missing '## Common navigation patterns'"
|
|
189
|
+
|
|
190
|
+
def test_has_reasoning_preamble(self):
|
|
191
|
+
_, body = _read_skill()
|
|
192
|
+
assert "## Forced reasoning preamble" in body, "SKILL.md missing '## Forced reasoning preamble'"
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
class TestDirectoryIntegrity:
|
|
196
|
+
"""skills/ must have expected structure."""
|
|
197
|
+
|
|
198
|
+
def test_skill_dir_exists(self):
|
|
199
|
+
assert (SKILLS_DIR / SKILL_NAME).is_dir(), f"skills/{SKILL_NAME}/ missing"
|
|
200
|
+
|
|
201
|
+
def test_no_tier_dirs(self):
|
|
202
|
+
"""Old tier-1/ and tier-2/ directories must not exist."""
|
|
203
|
+
for tier in ("tier-1", "tier-2"):
|
|
204
|
+
assert not (SKILLS_DIR / tier).is_dir(), f"Old skills/{tier}/ still exists — remove it"
|
|
205
|
+
|
|
206
|
+
def test_readme_exists(self):
|
|
207
|
+
assert (SKILLS_DIR / "README.md").is_file(), "skills/README.md missing"
|
|
208
|
+
|
|
209
|
+
def test_no_other_skill_dirs(self):
|
|
210
|
+
"""Only documented consumer skill directories should exist."""
|
|
211
|
+
skill_dirs = {
|
|
212
|
+
p.name for p in SKILLS_DIR.iterdir()
|
|
213
|
+
if p.is_dir() and (p / "SKILL.md").exists()
|
|
214
|
+
}
|
|
215
|
+
assert skill_dirs == EXPECTED_SKILL_DIRS, (
|
|
216
|
+
f"Expected only documented skills {EXPECTED_SKILL_DIRS}, found: {skill_dirs}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class TestAgentGuideConsistency:
|
|
221
|
+
"""AGENT-GUIDE.md copy-paste block must be self-contained."""
|
|
222
|
+
|
|
223
|
+
def test_guide_has_navigation_patterns_table(self):
|
|
224
|
+
"""The copy-paste block must include a navigation patterns section."""
|
|
225
|
+
guide = Path(__file__).resolve().parent.parent / "docs" / "AGENT-GUIDE.md"
|
|
226
|
+
text = guide.read_text(encoding="utf-8")
|
|
227
|
+
begin = text.find("<!-- BEGIN java-codebase-rag MCP guide -->")
|
|
228
|
+
end = text.find("<!-- END java-codebase-rag MCP guide -->")
|
|
229
|
+
assert begin != -1 and end != -1, "AGENT-GUIDE.md missing BEGIN/END markers"
|
|
230
|
+
block = text[begin:end]
|
|
231
|
+
assert "### Common navigation patterns" in block, (
|
|
232
|
+
"AGENT-GUIDE.md copy-paste block missing '### Common navigation patterns'"
|
|
233
|
+
)
|
|
234
|
+
for pattern in ["CALLS", "EXPOSES", "IMPLEMENTS", "INJECTS"]:
|
|
235
|
+
assert pattern in block, f"AGENT-GUIDE.md copy-paste block missing {pattern} pattern"
|
|
236
|
+
|
|
237
|
+
def test_guide_copy_block_does_not_reference_skills_dir(self):
|
|
238
|
+
"""The copy-paste block must not reference skills/ — it won't exist
|
|
239
|
+
in the consumer's project."""
|
|
240
|
+
guide = Path(__file__).resolve().parent.parent / "docs" / "AGENT-GUIDE.md"
|
|
241
|
+
text = guide.read_text(encoding="utf-8")
|
|
242
|
+
begin = text.find("<!-- BEGIN java-codebase-rag MCP guide -->")
|
|
243
|
+
end = text.find("<!-- END java-codebase-rag MCP guide -->")
|
|
244
|
+
assert begin != -1 and end != -1, "AGENT-GUIDE.md missing BEGIN/END markers"
|
|
245
|
+
block = text[begin:end]
|
|
246
|
+
assert "skills/" not in block, (
|
|
247
|
+
"AGENT-GUIDE.md copy-paste block references skills/ — "
|
|
248
|
+
"this path won't resolve in a consumer project. "
|
|
249
|
+
"Keep skills/ references outside the copy-paste block."
|
|
250
|
+
)
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import tomllib
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_published_package_installs_cocoindex_for_lifecycle_commands() -> None:
|
|
8
|
+
data = tomllib.loads((Path(__file__).resolve().parents[1] / "pyproject.toml").read_text())
|
|
9
|
+
deps = data["project"]["dependencies"]
|
|
10
|
+
|
|
11
|
+
cocoindex_deps = [dep for dep in deps if dep.startswith("cocoindex")]
|
|
12
|
+
|
|
13
|
+
assert cocoindex_deps
|
|
14
|
+
assert any("[lancedb]" in dep for dep in cocoindex_deps)
|
|
@@ -1,318 +0,0 @@
|
|
|
1
|
-
"""Static validation for skills/ directory SKILL.md files.
|
|
2
|
-
|
|
3
|
-
Imports allowlists from production code (mcp_v2, java_ontology) — not
|
|
4
|
-
hand-maintained lists. Validates:
|
|
5
|
-
- frontmatter (name + description present)
|
|
6
|
-
- MCP tool names referenced in skill bodies
|
|
7
|
-
- find kind values
|
|
8
|
-
- direction values
|
|
9
|
-
- edge_types values
|
|
10
|
-
- Tier 2 body structure (stop conditions, recursion limit)
|
|
11
|
-
|
|
12
|
-
Known gap (intentional — see AGENT-SKILLS-AND-COMMANDS-PROPOSE §11):
|
|
13
|
-
- edge_filter parameters (callee_declaring_role, min_confidence,
|
|
14
|
-
exclude_callee_declaring_roles, dedup_calls, include_unresolved)
|
|
15
|
-
referenced in /mini-map are NOT validated against mcp_v2 parameter
|
|
16
|
-
definitions. The static validator does not parse edge_filter dicts.
|
|
17
|
-
On re-index, manually verify /mini-map against the MCP surface.
|
|
18
|
-
"""
|
|
19
|
-
|
|
20
|
-
from __future__ import annotations
|
|
21
|
-
|
|
22
|
-
import re
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
from typing import get_args
|
|
25
|
-
|
|
26
|
-
import pytest
|
|
27
|
-
|
|
28
|
-
from java_ontology import NodeKind
|
|
29
|
-
from mcp_v2 import ComposedEdgeType, EdgeType
|
|
30
|
-
|
|
31
|
-
# ---------------------------------------------------------------------------
|
|
32
|
-
# Allowlists sourced from production code
|
|
33
|
-
# ---------------------------------------------------------------------------
|
|
34
|
-
|
|
35
|
-
_VALID_TOOLS: frozenset[str] = frozenset(["search", "find", "describe", "neighbors", "resolve"])
|
|
36
|
-
|
|
37
|
-
_VALID_KINDS: frozenset[str] = frozenset(k.lower() for k in get_args(NodeKind))
|
|
38
|
-
|
|
39
|
-
_VALID_DIRECTIONS: frozenset[str] = frozenset(["in", "out"])
|
|
40
|
-
|
|
41
|
-
_ALL_EDGE_TYPES: frozenset[str] = frozenset(get_args(EdgeType)) | frozenset(get_args(ComposedEdgeType))
|
|
42
|
-
|
|
43
|
-
# ---------------------------------------------------------------------------
|
|
44
|
-
# Helpers
|
|
45
|
-
# ---------------------------------------------------------------------------
|
|
46
|
-
|
|
47
|
-
SKILLS_DIR = Path(__file__).resolve().parent.parent / "skills"
|
|
48
|
-
|
|
49
|
-
TIER1_NAMES = [
|
|
50
|
-
"nl", "controllers", "routes", "clients", "producers",
|
|
51
|
-
"callers", "callees", "handlers", "who-hits-route",
|
|
52
|
-
"implements", "injects",
|
|
53
|
-
]
|
|
54
|
-
|
|
55
|
-
TIER2_NAMES = [
|
|
56
|
-
"explain-feature", "impact-of", "trace-request-flow", "mini-map",
|
|
57
|
-
]
|
|
58
|
-
|
|
59
|
-
ALL_SKILL_NAMES = TIER1_NAMES + TIER2_NAMES
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _parse_frontmatter(text: str) -> dict[str, str]:
|
|
63
|
-
"""Parse simple YAML frontmatter (key: value pairs only)."""
|
|
64
|
-
m = re.match(r"^---\n(.*?)\n---", text, re.DOTALL)
|
|
65
|
-
if not m:
|
|
66
|
-
return {}
|
|
67
|
-
result: dict[str, str] = {}
|
|
68
|
-
for line in m.group(1).splitlines():
|
|
69
|
-
if ":" in line:
|
|
70
|
-
key, _, value = line.partition(":")
|
|
71
|
-
result[key.strip()] = value.strip()
|
|
72
|
-
return result
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _extract_tool_refs(body: str) -> set[str]:
|
|
76
|
-
"""Extract tool names referenced in MCP call patterns."""
|
|
77
|
-
# Match patterns like `search(...)`, `find(kind=...)`, `describe(id=...)`,
|
|
78
|
-
# `neighbors({ids:`, `resolve(identifier=`, also backtick-wrapped names.
|
|
79
|
-
refs: set[str] = set()
|
|
80
|
-
for m in re.finditer(r"`(search|find|describe|neighbors|resolve)\b", body):
|
|
81
|
-
refs.add(m.group(1))
|
|
82
|
-
# Also catch patterns like search(query=...) find(kind=...) without backticks
|
|
83
|
-
for m in re.finditer(r"\b(search|find|describe|neighbors|resolve)\s*[\(\{]", body):
|
|
84
|
-
refs.add(m.group(1))
|
|
85
|
-
return refs
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
def _extract_kind_refs(body: str) -> set[str]:
|
|
89
|
-
"""Extract find kind values from skill body."""
|
|
90
|
-
refs: set[str] = set()
|
|
91
|
-
for m in re.finditer(r'kind\s*=\s*["\']?(\w+)["\']?', body):
|
|
92
|
-
val = m.group(1).lower()
|
|
93
|
-
if val in _VALID_KINDS:
|
|
94
|
-
refs.add(val)
|
|
95
|
-
return refs
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def _extract_direction_refs(body: str) -> set[str]:
|
|
99
|
-
"""Extract direction values from skill body."""
|
|
100
|
-
refs: set[str] = set()
|
|
101
|
-
for m in re.finditer(r'direction\s*:\s*["\']?(in|out)["\']?', body):
|
|
102
|
-
refs.add(m.group(1))
|
|
103
|
-
return refs
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
def _extract_edge_type_refs(body: str) -> set[str]:
|
|
107
|
-
"""Extract edge_types values referenced in skill body."""
|
|
108
|
-
refs: set[str] = set()
|
|
109
|
-
# Match edge_types lists: ["CALLS"] or ["HTTP_CALLS","ASYNC_CALLS","EXPOSES"]
|
|
110
|
-
for m in re.finditer(r'edge_types\s*:\s*\[([^\]]+)\]', body):
|
|
111
|
-
inner = m.group(1)
|
|
112
|
-
for val in re.findall(r'"(\w[\w.]*)"', inner):
|
|
113
|
-
if val in _ALL_EDGE_TYPES:
|
|
114
|
-
refs.add(val)
|
|
115
|
-
# Also match quoted edge names in backticked patterns
|
|
116
|
-
for m in re.finditer(r'\["(\w[\w.]*)"', body):
|
|
117
|
-
val = m.group(1)
|
|
118
|
-
if val in _ALL_EDGE_TYPES:
|
|
119
|
-
refs.add(val)
|
|
120
|
-
return refs
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def _read_skill(name: str) -> tuple[dict[str, str], str]:
|
|
124
|
-
"""Read a skill's SKILL.md and return (frontmatter, body)."""
|
|
125
|
-
path = SKILLS_DIR / name / "SKILL.md"
|
|
126
|
-
text = path.read_text(encoding="utf-8")
|
|
127
|
-
fm = _parse_frontmatter(text)
|
|
128
|
-
# Body is everything after the closing ---
|
|
129
|
-
body = re.sub(r"^---\n.*?\n---\n*", "", text, count=1, flags=re.DOTALL)
|
|
130
|
-
return fm, body
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# ---------------------------------------------------------------------------
|
|
134
|
-
# Parametrized test ids
|
|
135
|
-
# ---------------------------------------------------------------------------
|
|
136
|
-
|
|
137
|
-
@pytest.fixture(params=ALL_SKILL_NAMES, ids=lambda n: f"skill:{n}")
|
|
138
|
-
def skill_name(request):
|
|
139
|
-
return request.param
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
# ---------------------------------------------------------------------------
|
|
143
|
-
# Tests
|
|
144
|
-
# ---------------------------------------------------------------------------
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
class TestSkillFrontmatter:
|
|
148
|
-
"""Every SKILL.md must have valid frontmatter."""
|
|
149
|
-
|
|
150
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
151
|
-
def test_frontmatter_has_name_and_description(self, name: str):
|
|
152
|
-
fm, _ = _read_skill(name)
|
|
153
|
-
assert "name" in fm, f"skills/{name}/SKILL.md missing frontmatter 'name'"
|
|
154
|
-
assert fm["name"] == name, f"skills/{name}/SKILL.md: name={fm['name']!r}, expected {name!r}"
|
|
155
|
-
assert "description" in fm, f"skills/{name}/SKILL.md missing frontmatter 'description'"
|
|
156
|
-
assert len(fm["description"]) >= 20, (
|
|
157
|
-
f"skills/{name}/SKILL.md description too short ({len(fm['description'])} chars)"
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
161
|
-
def test_skill_file_exists(self, name: str):
|
|
162
|
-
path = SKILLS_DIR / name / "SKILL.md"
|
|
163
|
-
assert path.is_file(), f"Missing skills/{name}/SKILL.md"
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
class TestMCPToolReferences:
|
|
167
|
-
"""Tool names in skill bodies must be valid MCP navigation tools."""
|
|
168
|
-
|
|
169
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
170
|
-
def test_tool_refs_are_valid(self, name: str):
|
|
171
|
-
_, body = _read_skill(name)
|
|
172
|
-
refs = _extract_tool_refs(body)
|
|
173
|
-
invalid = refs - _VALID_TOOLS
|
|
174
|
-
assert not invalid, f"skills/{name}/SKILL.md references invalid tools: {invalid}"
|
|
175
|
-
|
|
176
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
177
|
-
def test_skill_references_at_least_one_tool(self, name: str):
|
|
178
|
-
_, body = _read_skill(name)
|
|
179
|
-
refs = _extract_tool_refs(body)
|
|
180
|
-
assert refs, f"skills/{name}/SKILL.md references no MCP tools"
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
class TestKindAndEdgeReferences:
|
|
184
|
-
"""Kind, direction, and edge_type values must match production allowlists."""
|
|
185
|
-
|
|
186
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
187
|
-
def test_kind_refs_are_valid(self, name: str):
|
|
188
|
-
_, body = _read_skill(name)
|
|
189
|
-
refs = _extract_kind_refs(body)
|
|
190
|
-
invalid = refs - _VALID_KINDS
|
|
191
|
-
assert not invalid, f"skills/{name}/SKILL.md references invalid find kinds: {invalid}"
|
|
192
|
-
|
|
193
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
194
|
-
def test_direction_refs_are_valid(self, name: str):
|
|
195
|
-
_, body = _read_skill(name)
|
|
196
|
-
refs = _extract_direction_refs(body)
|
|
197
|
-
invalid = refs - _VALID_DIRECTIONS
|
|
198
|
-
assert not invalid, f"skills/{name}/SKILL.md references invalid directions: {invalid}"
|
|
199
|
-
|
|
200
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
201
|
-
def test_edge_type_refs_are_valid(self, name: str):
|
|
202
|
-
_, body = _read_skill(name)
|
|
203
|
-
refs = _extract_edge_type_refs(body)
|
|
204
|
-
invalid = refs - _ALL_EDGE_TYPES
|
|
205
|
-
assert not invalid, f"skills/{name}/SKILL.md references invalid edge_types: {invalid}"
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
class TestTier2BodyStructure:
|
|
209
|
-
"""Tier 2 skills must have stop conditions and recursion limits."""
|
|
210
|
-
|
|
211
|
-
@pytest.mark.parametrize("name", TIER2_NAMES)
|
|
212
|
-
def test_has_stop_conditions(self, name: str):
|
|
213
|
-
_, body = _read_skill(name)
|
|
214
|
-
assert "## Stop conditions" in body, f"skills/{name}/SKILL.md missing '## Stop conditions'"
|
|
215
|
-
|
|
216
|
-
@pytest.mark.parametrize("name", TIER2_NAMES)
|
|
217
|
-
def test_has_recursion_limit(self, name: str):
|
|
218
|
-
_, body = _read_skill(name)
|
|
219
|
-
assert "## Recursion limit" in body, f"skills/{name}/SKILL.md missing '## Recursion limit'"
|
|
220
|
-
|
|
221
|
-
def test_mini_map_has_classification_rules(self):
|
|
222
|
-
_, body = _read_skill("mini-map")
|
|
223
|
-
assert "### Step 4 — Skill heuristics" in body or "Classification" in body, (
|
|
224
|
-
"skills/mini-map/SKILL.md missing classification rules"
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
def test_mini_map_has_output_shape(self):
|
|
228
|
-
_, body = _read_skill("mini-map")
|
|
229
|
-
assert "PERSISTS" in body and "DELEGATES" in body, (
|
|
230
|
-
"skills/mini-map/SKILL.md missing output shape (PERSISTS/DELEGATES labels)"
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
class TestWorkedExamples:
|
|
235
|
-
"""Every skill must have a worked example section."""
|
|
236
|
-
|
|
237
|
-
@pytest.mark.parametrize("name", ALL_SKILL_NAMES)
|
|
238
|
-
def test_has_worked_example(self, name: str):
|
|
239
|
-
_, body = _read_skill(name)
|
|
240
|
-
assert "## Worked example" in body, f"skills/{name}/SKILL.md missing '## Worked example'"
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
class TestDirectoryIntegrity:
|
|
244
|
-
"""skills/ directory must contain exactly the expected skills."""
|
|
245
|
-
|
|
246
|
-
def test_no_extra_skill_dirs(self):
|
|
247
|
-
actual = {p.name for p in SKILLS_DIR.iterdir() if p.is_dir() and (p / "SKILL.md").exists()}
|
|
248
|
-
expected = set(ALL_SKILL_NAMES)
|
|
249
|
-
extra = actual - expected
|
|
250
|
-
assert not extra, f"Unexpected skill directories: {extra}"
|
|
251
|
-
|
|
252
|
-
def test_no_missing_skill_dirs(self):
|
|
253
|
-
actual = {p.name for p in SKILLS_DIR.iterdir() if p.is_dir() and (p / "SKILL.md").exists()}
|
|
254
|
-
expected = set(ALL_SKILL_NAMES)
|
|
255
|
-
missing = expected - actual
|
|
256
|
-
assert not missing, f"Missing skill directories: {missing}"
|
|
257
|
-
|
|
258
|
-
def test_readme_exists(self):
|
|
259
|
-
assert (SKILLS_DIR / "README.md").is_file(), "skills/README.md missing"
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
class TestAgentGuideConsistency:
|
|
263
|
-
"""AGENT-GUIDE.md copy-paste block must be self-contained."""
|
|
264
|
-
|
|
265
|
-
def test_guide_has_navigation_patterns_table(self):
|
|
266
|
-
"""The copy-paste block must include a navigation patterns section
|
|
267
|
-
(it's standalone — no external file references work in a consumer project)."""
|
|
268
|
-
guide = Path(__file__).resolve().parent.parent / "docs" / "AGENT-GUIDE.md"
|
|
269
|
-
text = guide.read_text(encoding="utf-8")
|
|
270
|
-
# Extract the copy-paste block (marker on its own line)
|
|
271
|
-
begin = text.find("<!-- BEGIN java-codebase-rag MCP guide -->")
|
|
272
|
-
end = text.find("<!-- END java-codebase-rag MCP guide -->")
|
|
273
|
-
assert begin != -1 and end != -1, "AGENT-GUIDE.md missing BEGIN/END markers"
|
|
274
|
-
block = text[begin:end]
|
|
275
|
-
assert "### Common navigation patterns" in block, (
|
|
276
|
-
"AGENT-GUIDE.md copy-paste block missing '### Common navigation patterns'"
|
|
277
|
-
)
|
|
278
|
-
# Verify key patterns are present
|
|
279
|
-
for pattern in ["CALLS", "EXPOSES", "IMPLEMENTS", "INJECTS"]:
|
|
280
|
-
assert pattern in block, f"AGENT-GUIDE.md copy-paste block missing {pattern} pattern"
|
|
281
|
-
|
|
282
|
-
def test_guide_copy_block_does_not_reference_skills_dir(self):
|
|
283
|
-
"""The copy-paste block must not reference skills/ — it won't exist
|
|
284
|
-
in the consumer's project."""
|
|
285
|
-
guide = Path(__file__).resolve().parent.parent / "docs" / "AGENT-GUIDE.md"
|
|
286
|
-
text = guide.read_text(encoding="utf-8")
|
|
287
|
-
begin = text.find("<!-- BEGIN java-codebase-rag MCP guide -->")
|
|
288
|
-
end = text.find("<!-- END java-codebase-rag MCP guide -->")
|
|
289
|
-
assert begin != -1 and end != -1, "AGENT-GUIDE.md missing BEGIN/END markers"
|
|
290
|
-
block = text[begin:end]
|
|
291
|
-
assert "skills/" not in block, (
|
|
292
|
-
"AGENT-GUIDE.md copy-paste block references skills/ — "
|
|
293
|
-
"this path won't resolve in a consumer project. "
|
|
294
|
-
"Keep skills/ references outside the copy-paste block."
|
|
295
|
-
)
|
|
296
|
-
|
|
297
|
-
def test_guide_copy_block_has_no_slash_command_aliases(self):
|
|
298
|
-
"""The copy-paste block must not contain slash-command alias bullets
|
|
299
|
-
like `/nl <text>` → ... — these imply commands that don't exist
|
|
300
|
-
and will mislead the agent. Incidental mentions (e.g. cross-references
|
|
301
|
-
in prose) are fine."""
|
|
302
|
-
guide = Path(__file__).resolve().parent.parent / "docs" / "AGENT-GUIDE.md"
|
|
303
|
-
text = guide.read_text(encoding="utf-8")
|
|
304
|
-
begin = text.find("<!-- BEGIN java-codebase-rag MCP guide -->")
|
|
305
|
-
end = text.find("<!-- END java-codebase-rag MCP guide -->")
|
|
306
|
-
block = text[begin:end]
|
|
307
|
-
# Match alias definition lines: - `/skillname ...` → tool(...)
|
|
308
|
-
skill_names_pattern = "|".join(re.escape(n) for n in ALL_SKILL_NAMES)
|
|
309
|
-
alias_pattern = re.compile(
|
|
310
|
-
rf"^- `/(?:{skill_names_pattern})\s",
|
|
311
|
-
re.MULTILINE,
|
|
312
|
-
)
|
|
313
|
-
matches = alias_pattern.findall(block)
|
|
314
|
-
assert not matches, (
|
|
315
|
-
f"AGENT-GUIDE.md copy-paste block contains slash-command alias bullets: "
|
|
316
|
-
f"{alias_pattern.findall(block)}. "
|
|
317
|
-
"These are not real commands and will mislead the agent."
|
|
318
|
-
)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/entry_points.txt
RENAMED
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/java_codebase_rag.egg-info/top_level.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_assign_endpoint_client_extraction.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_bank_chat_brownfield_integration.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_graph_receiver_resolution.py
RENAMED
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_call_graph_smoke_roundtrip.py
RENAMED
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_cli_progress_stdout_invariant.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_cross_service_resolution_flag.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{java_codebase_rag-0.2.0 → java_codebase_rag-0.2.2}/tests/test_resolve_routes_messaging_layer_c.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|