galaxy-tool-refactor-mcp 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- galaxy_tool_refactor_mcp-0.2.0/.gitignore +24 -0
- galaxy_tool_refactor_mcp-0.2.0/CLAUDE.md +75 -0
- galaxy_tool_refactor_mcp-0.2.0/PKG-INFO +57 -0
- galaxy_tool_refactor_mcp-0.2.0/README.md +44 -0
- galaxy_tool_refactor_mcp-0.2.0/docs/decisions.md +80 -0
- galaxy_tool_refactor_mcp-0.2.0/docs/vision.md +70 -0
- galaxy_tool_refactor_mcp-0.2.0/pyproject.toml +50 -0
- galaxy_tool_refactor_mcp-0.2.0/src/galaxy_tool_refactor_mcp/__init__.py +5 -0
- galaxy_tool_refactor_mcp-0.2.0/src/galaxy_tool_refactor_mcp/server.py +119 -0
- galaxy_tool_refactor_mcp-0.2.0/src/galaxy_tool_refactor_mcp/service.py +199 -0
- galaxy_tool_refactor_mcp-0.2.0/tests/test_server.py +53 -0
- galaxy_tool_refactor_mcp-0.2.0/tests/test_service.py +156 -0
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# Machine-local scratch, never committed: the cloned corpus (.local/corpus,
|
|
2
|
+
# seeded from corpus_sources.json) and external source clones for inspection
|
|
3
|
+
# (e.g. .local/galaxy-src = a clone of galaxyproject/galaxy used to verify
|
|
4
|
+
# Galaxy-internal behaviour locally).
|
|
5
|
+
# No trailing slash: `.local/` matches only a directory, so an accidental
|
|
6
|
+
# symlink at this path was committable (it happened once; removed in this branch).
|
|
7
|
+
.local
|
|
8
|
+
.venv/
|
|
9
|
+
__pycache__/
|
|
10
|
+
*.pyc
|
|
11
|
+
*.pyo
|
|
12
|
+
.pytest_cache/
|
|
13
|
+
.mypy_cache/
|
|
14
|
+
.ruff_cache/
|
|
15
|
+
dist/
|
|
16
|
+
*.egg-info/
|
|
17
|
+
|
|
18
|
+
# Local-only draft of the GCC poster abstract.
|
|
19
|
+
gcc2026-abstract.txt
|
|
20
|
+
|
|
21
|
+
# Claude Code session scratch (the tracked .claude/ settings + skills stay; this
|
|
22
|
+
# runtime lock for scheduled wakeups is machine-local).
|
|
23
|
+
.claude/scheduled_tasks.lock
|
|
24
|
+
.claude/worktrees/
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# CLAUDE.md
|
|
2
|
+
|
|
3
|
+
Guidance for Claude Code working in this repository.
|
|
4
|
+
|
|
5
|
+
## Project
|
|
6
|
+
|
|
7
|
+
`galaxy-tool-refactor-mcp` is the **MCP server** tier (tier 4) of the Galaxy tool
|
|
8
|
+
refactoring framework — an agent-facing front-end over the registry facade, a
|
|
9
|
+
sibling of the user-facing CLI.
|
|
10
|
+
|
|
11
|
+
| Tier | Layer | Package |
|
|
12
|
+
|---|---|---|
|
|
13
|
+
| 0.5 | rule metadata | `galaxy-tool-refactor-rules` |
|
|
14
|
+
| 1 | parsing & validation | `galaxy-tool-source` |
|
|
15
|
+
| 2 | structure | `galaxy-tool-codemod` |
|
|
16
|
+
| 3 | formatting | `galaxy-tool-fmt` |
|
|
17
|
+
| 3.5 | advisory checks | `galaxy-tool-lint` |
|
|
18
|
+
| 3.6 | rule registry / rulesets | `galaxy-tool-refactor-registry` |
|
|
19
|
+
| 4 | app / CLI | `galaxy-tool-refactor-cli` |
|
|
20
|
+
| 4 | **MCP server** | `galaxy-tool-refactor-mcp` *(this repo)* |
|
|
21
|
+
|
|
22
|
+
It depends on the tier-3.6 facade (plus tier-1 for the `ToolXmlSyntaxError`
|
|
23
|
+
boundary type, and tier-0.5 for the `Violation` type — a `TYPE_CHECKING`-only
|
|
24
|
+
import in `service.py`, declared as a direct dep because it is imported directly)
|
|
25
|
+
and `mcp` (FastMCP). The lower tiers do **not** depend on it.
|
|
26
|
+
|
|
27
|
+
## Key invariants
|
|
28
|
+
|
|
29
|
+
- **Thin adapter, split in two.** `service.py` is the protocol-agnostic core
|
|
30
|
+
(facade → JSON-able `dict`s, **no `mcp` import**, fully unit-tested);
|
|
31
|
+
`server.py` is the FastMCP binding (a handler per tool that delegates to
|
|
32
|
+
`service`). The split keeps the logic testable without a transport and the
|
|
33
|
+
protocol shell minimal. This is *why* the facade is library-first.
|
|
34
|
+
- **Never writes to disk.** Agents supply XML content as a `str` and get content
|
|
35
|
+
back; `write_path` is never passed. The XML `str` is encoded to `bytes` before
|
|
36
|
+
the facade sees it, so it is parsed as content, never mistaken for a path.
|
|
37
|
+
- **`server.py` is the error boundary.** Its handlers translate the facade's typed
|
|
38
|
+
`UnknownRuleset` / `UnknownRuleCode` and tier-1's `ToolXmlSyntaxError` into a
|
|
39
|
+
plain `ValueError` whose message FastMCP returns as a tool error (the MCP
|
|
40
|
+
analogue of the CLI's `click` boundary). `service.py` lets them propagate.
|
|
41
|
+
- **FastMCP introspects handler signatures at runtime** (`eval_str=True`), so a
|
|
42
|
+
registered handler's annotations must be evaluable at import time — use builtin
|
|
43
|
+
types (`list[str] | None`), not `TYPE_CHECKING`-only names.
|
|
44
|
+
- **Goal 1 only.** Agent-authored rules (`docs/vision.md` Goal 2) are out of
|
|
45
|
+
scope; the server exposes the fixed registry.
|
|
46
|
+
|
|
47
|
+
## Coding standards
|
|
48
|
+
|
|
49
|
+
Hand-written code follows **dignified-python** (vendored at the workspace root
|
|
50
|
+
`.claude/skills/dignified-python/`): LBYL over try/except (exceptions only at the
|
|
51
|
+
MCP error boundary in `server.py`, chained `from e`); keyword-only args after the
|
|
52
|
+
first; absolute imports, no re-exports, no `__all__`; no import-time side effects.
|
|
53
|
+
`optimized-python` is a secondary reference; dignified-python governs on conflict.
|
|
54
|
+
New code lands tests-first.
|
|
55
|
+
|
|
56
|
+
## Commands
|
|
57
|
+
|
|
58
|
+
Run from the **workspace root** (`galaxy-tool-refactor/`):
|
|
59
|
+
|
|
60
|
+
- `uv sync`
|
|
61
|
+
- `uv run --package galaxy-tool-refactor-mcp pytest galaxy-tool-refactor-mcp/tests/`
|
|
62
|
+
- `uv run ruff check galaxy-tool-refactor-mcp/src galaxy-tool-refactor-mcp/tests`
|
|
63
|
+
- `uv run mypy --config-file galaxy-tool-refactor-mcp/pyproject.toml galaxy-tool-refactor-mcp/src`
|
|
64
|
+
- `uv run galaxy-tool-refactor-mcp` — serve over stdio
|
|
65
|
+
|
|
66
|
+
## Useful references
|
|
67
|
+
|
|
68
|
+
- `galaxy-tool-refactor-registry/src/galaxy_tool_refactor_registry/facade.py` —
|
|
69
|
+
the `run` / `upgrade` / `detect` / `convert_help` / `tokenize_version` /
|
|
70
|
+
`list_rulesets` / `list_rules` entry points `service.py` wraps; `results.py` for the structured result shapes serialised.
|
|
71
|
+
- `galaxy-tool-refactor-cli/src/galaxy_tool_refactor_cli/cli.py` — the sibling
|
|
72
|
+
front-end over the same facade.
|
|
73
|
+
- `docs/decisions.md` D1–D3 — the design + the `convert_help_tool` /
|
|
74
|
+
`tokenize_version_tool` additions; `docs/vision.md` — the agent-authored-rules
|
|
75
|
+
future (Goal 2).
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: galaxy-tool-refactor-mcp
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: MCP server exposing the Galaxy tool refactoring facade to AI agents.
|
|
5
|
+
Author: Richard Burhans
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Requires-Python: >=3.10
|
|
8
|
+
Requires-Dist: galaxy-tool-refactor-registry==0.2.0
|
|
9
|
+
Requires-Dist: galaxy-tool-refactor-rules==0.2.0
|
|
10
|
+
Requires-Dist: galaxy-tool-source==0.2.0
|
|
11
|
+
Requires-Dist: mcp>=1.2
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
|
|
14
|
+
# galaxy-tool-refactor-mcp
|
|
15
|
+
|
|
16
|
+
An **MCP server** that exposes the Galaxy tool refactoring framework to AI coding
|
|
17
|
+
agents. A tier-4 sibling of `galaxy-tool-refactor-cli`: both wrap the
|
|
18
|
+
`galaxy-tool-refactor-registry` facade (tier 3.6) for a different audience — the
|
|
19
|
+
CLI for humans at a terminal, this for agents over the Model Context Protocol.
|
|
20
|
+
|
|
21
|
+
## Shape
|
|
22
|
+
|
|
23
|
+
The facade is **library-first** (structured args in, structured results out, no
|
|
24
|
+
disk writes unless asked, introspectable), so the server is a *thin adapter*:
|
|
25
|
+
|
|
26
|
+
- **`service.py`** — protocol-agnostic. Pure functions that take XML as a `str`
|
|
27
|
+
(plus `rulesets` / `select` / `ignore`) and return JSON-able `dict`s by calling the
|
|
28
|
+
facade. No `mcp` import; fully unit-tested.
|
|
29
|
+
- **`server.py`** — the FastMCP binding. A small handler per tool delegates to
|
|
30
|
+
`service`, and is the error boundary (facade `UnknownRuleset` / `UnknownRuleCode`
|
|
31
|
+
and tier-1 `ToolXmlSyntaxError` → a clean MCP tool error).
|
|
32
|
+
|
|
33
|
+
## Tools
|
|
34
|
+
|
|
35
|
+
| MCP tool | What it does |
|
|
36
|
+
|---|---|
|
|
37
|
+
| `format_tool` | Apply a ruleset's fixable rules then format; returns canonical XML + advisory notes. |
|
|
38
|
+
| `upgrade_tool` | Profile-upgrade then format; returns upgraded XML, steps applied, the behavior-preserving verdict, and notes. |
|
|
39
|
+
| `check_tool` | Report-only detect over the selected rules; returns the findings (each flagged fixable vs advisory). |
|
|
40
|
+
| `list_rulesets` | The baked-in rulesets (name / codes / is_default / description). |
|
|
41
|
+
| `list_rules` | The baked-in rules (code / summary / family / fixable / rulesets). |
|
|
42
|
+
|
|
43
|
+
Agents supply content and receive content — the server **never writes to disk**.
|
|
44
|
+
|
|
45
|
+
## Run
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
uv run galaxy-tool-refactor-mcp # serves over stdio
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
Point an MCP client (e.g. a coding agent) at that command. `list_rulesets` /
|
|
52
|
+
`list_rules` let the agent discover the available rulesets and rule codes at
|
|
53
|
+
runtime instead of hardcoding them.
|
|
54
|
+
|
|
55
|
+
See [`docs/decisions.md`](docs/decisions.md) D1 for the design, and
|
|
56
|
+
[`docs/vision.md`](docs/vision.md) for the longer-horizon agent-authored-rules
|
|
57
|
+
direction (Goal 2, still future).
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# galaxy-tool-refactor-mcp
|
|
2
|
+
|
|
3
|
+
An **MCP server** that exposes the Galaxy tool refactoring framework to AI coding
|
|
4
|
+
agents. A tier-4 sibling of `galaxy-tool-refactor-cli`: both wrap the
|
|
5
|
+
`galaxy-tool-refactor-registry` facade (tier 3.6) for a different audience — the
|
|
6
|
+
CLI for humans at a terminal, this for agents over the Model Context Protocol.
|
|
7
|
+
|
|
8
|
+
## Shape
|
|
9
|
+
|
|
10
|
+
The facade is **library-first** (structured args in, structured results out, no
|
|
11
|
+
disk writes unless asked, introspectable), so the server is a *thin adapter*:
|
|
12
|
+
|
|
13
|
+
- **`service.py`** — protocol-agnostic. Pure functions that take XML as a `str`
|
|
14
|
+
(plus `rulesets` / `select` / `ignore`) and return JSON-able `dict`s by calling the
|
|
15
|
+
facade. No `mcp` import; fully unit-tested.
|
|
16
|
+
- **`server.py`** — the FastMCP binding. A small handler per tool delegates to
|
|
17
|
+
`service`, and is the error boundary (facade `UnknownRuleset` / `UnknownRuleCode`
|
|
18
|
+
and tier-1 `ToolXmlSyntaxError` → a clean MCP tool error).
|
|
19
|
+
|
|
20
|
+
## Tools
|
|
21
|
+
|
|
22
|
+
| MCP tool | What it does |
|
|
23
|
+
|---|---|
|
|
24
|
+
| `format_tool` | Apply a ruleset's fixable rules then format; returns canonical XML + advisory notes. |
|
|
25
|
+
| `upgrade_tool` | Profile-upgrade then format; returns upgraded XML, steps applied, the behavior-preserving verdict, and notes. |
|
|
26
|
+
| `check_tool` | Report-only detect over the selected rules; returns the findings (each flagged fixable vs advisory). |
|
|
27
|
+
| `list_rulesets` | The baked-in rulesets (name / codes / is_default / description). |
|
|
28
|
+
| `list_rules` | The baked-in rules (code / summary / family / fixable / rulesets). |
|
|
29
|
+
|
|
30
|
+
Agents supply content and receive content — the server **never writes to disk**.
|
|
31
|
+
|
|
32
|
+
## Run
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
uv run galaxy-tool-refactor-mcp # serves over stdio
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
Point an MCP client (e.g. a coding agent) at that command. `list_rulesets` /
|
|
39
|
+
`list_rules` let the agent discover the available rulesets and rule codes at
|
|
40
|
+
runtime instead of hardcoding them.
|
|
41
|
+
|
|
42
|
+
See [`docs/decisions.md`](docs/decisions.md) D1 for the design, and
|
|
43
|
+
[`docs/vision.md`](docs/vision.md) for the longer-horizon agent-authored-rules
|
|
44
|
+
direction (Goal 2, still future).
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# Decisions — galaxy-tool-refactor-mcp
|
|
2
|
+
|
|
3
|
+
Each entry records a decision once it lands: a date, the decision, and the
|
|
4
|
+
rationale. Mirrors the conventions of the sibling packages' `docs/decisions.md`.
|
|
5
|
+
|
|
6
|
+
## D1 (2026-06-03) — The MCP server: a thin FastMCP adapter over the facade (vision Goal 1)
|
|
7
|
+
|
|
8
|
+
> **Renamed since (PR #146, registry D15):** presets became **rulesets** — the tool
|
|
9
|
+
> is now `list_rulesets`, the argument `ruleset`, the typed error `UnknownRuleset`.
|
|
10
|
+
> This entry keeps the original vocabulary as a historical record; the shipped
|
|
11
|
+
> surface is `server.py` / `service.py`.
|
|
12
|
+
|
|
13
|
+
### Decision
|
|
14
|
+
|
|
15
|
+
`galaxy-tool-refactor-mcp` becomes a real tier-4 package — an MCP server exposing
|
|
16
|
+
the registry facade to AI agents (Goal 1 of `docs/vision.md`). It is a sibling of
|
|
17
|
+
the CLI: both wrap the tier-3.6 facade for a different audience. Five tools:
|
|
18
|
+
`format_tool`, `upgrade_tool`, `check_tool`, `list_presets`, `list_rules`. Goal 2
|
|
19
|
+
(agent-authored rules) stays out of scope.
|
|
20
|
+
|
|
21
|
+
### Shape — split in two
|
|
22
|
+
|
|
23
|
+
- **`service.py` (protocol-agnostic).** Pure functions taking XML as a `str`
|
|
24
|
+
(plus `preset` / `select` / `ignore`) and returning JSON-able `dict`s by calling
|
|
25
|
+
`facade.run` / `upgrade` / `detect` / `list_presets` / `list_rules` and
|
|
26
|
+
serialising the structured results. **No `mcp` import** — so the substance is
|
|
27
|
+
unit-testable without a transport. This realises the vision's "thin adapter":
|
|
28
|
+
the facade is library-first precisely so this layer is a mechanical mapping.
|
|
29
|
+
- **`server.py` (FastMCP binding).** `build_server()` registers a small handler
|
|
30
|
+
per tool, each delegating to `service`. Factored out so tests introspect the
|
|
31
|
+
registered tools (`await server.list_tools()`) without starting a transport;
|
|
32
|
+
`main()` runs it over stdio.
|
|
33
|
+
|
|
34
|
+
### Rationale / boundaries
|
|
35
|
+
|
|
36
|
+
- **Never writes to disk.** Agents pass content and get content back —
|
|
37
|
+
`write_path` is never used. The XML `str` is `encode("utf-8")`d to `bytes`
|
|
38
|
+
before the facade, so it is always parsed as *content*, never a path.
|
|
39
|
+
- **`server.py` is the error boundary** (the MCP analogue of the CLI's `click`
|
|
40
|
+
boundary): it maps the facade's typed `UnknownPreset` / `UnknownRuleCode` and
|
|
41
|
+
tier-1's `ToolXmlSyntaxError` to a plain `ValueError` whose message FastMCP
|
|
42
|
+
surfaces as a tool error, so a bad preset or malformed tool is a clean error
|
|
43
|
+
result, not a crashed server. `service.py` lets the typed errors propagate.
|
|
44
|
+
- **Handler annotations are runtime-evaluable.** FastMCP builds each tool's input
|
|
45
|
+
schema via `inspect.signature(..., eval_str=True)`, so a registered handler's
|
|
46
|
+
annotations must resolve at import time. Handlers therefore use builtin
|
|
47
|
+
`list[str] | None`, not a `TYPE_CHECKING`-only `Sequence` (which raised
|
|
48
|
+
`InvalidSignature`). `service.py`, not introspected by FastMCP, keeps its
|
|
49
|
+
`Sequence` typing.
|
|
50
|
+
- **Dependency.** `mcp>=1.2` (ships `py.typed`, so mypy-strict passes). Registered
|
|
51
|
+
in the workspace; the qa-gate (`scripts/qa_gate.sh`) now covers eight packages.
|
|
52
|
+
|
|
53
|
+
### Reproduction
|
|
54
|
+
|
|
55
|
+
```sh
|
|
56
|
+
uv run --package galaxy-tool-refactor-mcp pytest galaxy-tool-refactor-mcp/tests/
|
|
57
|
+
uv run galaxy-tool-refactor-mcp # serve over stdio
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## D2 (2026-06-10) — `convert_help_tool`: the opt-in conversion joins the surface
|
|
61
|
+
|
|
62
|
+
The sixth tool, mirroring the CLI's `convert-help` (cli D12) over the facade's
|
|
63
|
+
`convert_help` (registry D18): `convert_help_tool(xml) -> {converted, formatted,
|
|
64
|
+
skip_reason}`. The conversion's gates live below this tier (profile >= 24.2 +
|
|
65
|
+
render equivalence, codemod §38); the adapter only serialises the structured
|
|
66
|
+
outcome — `converted=False` with the codemod's own `skip_reason` is a normal
|
|
67
|
+
result, not an MCP error, so an agent can act on the reason (e.g. call
|
|
68
|
+
`upgrade_tool` first, exactly what the profile-gate message says). No
|
|
69
|
+
ruleset/select parameters: GTR092 is not selectable anywhere, by design.
|
|
70
|
+
|
|
71
|
+
## D3 (2026-06-10) — `tokenize_version_tool`: the seventh tool
|
|
72
|
+
|
|
73
|
+
The GTR094 sibling of D2, same shape: `tokenize_version_tool(xml) ->
|
|
74
|
+
{tokenized, formatted, skip_reason}` over the facade's `tokenize_version`
|
|
75
|
+
(registry D19). One MCP-specific boundary, stated rather than hidden: every
|
|
76
|
+
MCP tool is **content-based** (agents supply XML strings; nothing touches
|
|
77
|
+
disk), so a tool whose `<macros>` imports files fails closed — the
|
|
78
|
+
expansion-equality gate cannot resolve imports without a source directory —
|
|
79
|
+
and the skip reason says to use the path-based CLI `tokenize-version` instead.
|
|
80
|
+
No ruleset/select parameters: GTR094, like GTR092, is not selectable anywhere.
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# Vision: `galaxy-tool-refactor` for AI agents (MCP server + agent-authored rules)
|
|
2
|
+
|
|
3
|
+
**Status:** **Goal 1 (the MCP server) is shipped** — see `docs/decisions.md` D1;
|
|
4
|
+
this section now describes what was built. **Goal 2 (agent-authored rules)
|
|
5
|
+
remains forward-looking** — recorded so the facade does not foreclose it. The
|
|
6
|
+
current locked decision — "adding rules is a developer task; no user-defined
|
|
7
|
+
rules" — still governs; Goal 2 is the relaxation path if/when desired.
|
|
8
|
+
|
|
9
|
+
## Goal 1 — `galaxy-tool-refactor` as a tool for agents (the MCP server) — SHIPPED
|
|
10
|
+
|
|
11
|
+
The MCP server (this package) wraps the `galaxy-tool-refactor-registry`
|
|
12
|
+
facade so coding agents can:
|
|
13
|
+
|
|
14
|
+
- **Discover** what the tool can do: `list_rulesets()` → ruleset names +
|
|
15
|
+
descriptions, `list_rules()` → code / summary / family / fixable-vs-advisory /
|
|
16
|
+
which rulesets include each rule. These map directly onto MCP tool descriptions
|
|
17
|
+
and enum-valued arguments, so an agent learns the available rulesets/rules at
|
|
18
|
+
runtime instead of hardcoding them.
|
|
19
|
+
- **Run** `format` / `upgrade` / `check` over content the agent supplies (raw
|
|
20
|
+
XML, not necessarily a path), with a chosen ruleset or explicit
|
|
21
|
+
`--select`/`--ignore` code set, and receive **structured results**
|
|
22
|
+
(`FormatResult` / `UpgradeResult` / `DetectResult`: formatted bytes, the
|
|
23
|
+
`Violation`s found, upgrade steps applied, advisory notes).
|
|
24
|
+
|
|
25
|
+
This is *why* the facade is library-first and structured: the MCP server is a
|
|
26
|
+
thin adapter (structured args in, structured results out), never a subprocess
|
|
27
|
+
that scrapes CLI text. The facade already honors the needed shape — content-or-
|
|
28
|
+
path input, structured results, disk writes only on explicit request, and
|
|
29
|
+
introspection — so the server is mostly an MCP-protocol binding over it.
|
|
30
|
+
|
|
31
|
+
The server is a **tier-4 sibling of the CLI**: both depend on the registry
|
|
32
|
+
facade; orchestration stays in the facade so the CLI and the MCP server share one
|
|
33
|
+
core and cannot drift.
|
|
34
|
+
|
|
35
|
+
## Goal 2 — agents authoring their own codemod / fmt extensions
|
|
36
|
+
|
|
37
|
+
Longer-horizon: let a coding agent write a new rule — a
|
|
38
|
+
`CodemodCommand`/`Rule` subclass with its `detect`/`apply` + a `RuleMeta` — and
|
|
39
|
+
have the framework discover and run it alongside the baked-in rules.
|
|
40
|
+
|
|
41
|
+
The seam already exists: the codemod tier's **detect-primitive**
|
|
42
|
+
`CodemodCommand` and the registry's **`RuleHandle`** adapter are the natural
|
|
43
|
+
authoring/integration contract — an agent targets `detect()` (+ `apply()` for a
|
|
44
|
+
fixable rule) and a `RuleMeta` code, and the registry wraps it into a `RuleHandle`
|
|
45
|
+
exactly like a built-in.
|
|
46
|
+
|
|
47
|
+
Open questions to resolve **later** (do not solve now; just avoid foreclosing):
|
|
48
|
+
|
|
49
|
+
1. **Discovery.** Stay with the current hardcoded family registries
|
|
50
|
+
(`coded_codemods()` / `all_rules()` / `all_checks()`, developer-only) or grow
|
|
51
|
+
an entry-point/plugin mechanism so third-party rule packages register
|
|
52
|
+
themselves. The unified registry is the single place that mechanism would
|
|
53
|
+
plug into.
|
|
54
|
+
2. **Authoring contract.** Pin down the minimum an agent must supply (tag
|
|
55
|
+
dispatch method names, `RuleMeta` fields, idempotence expectations) and
|
|
56
|
+
surface it as documentation / a template the MCP server can hand back.
|
|
57
|
+
3. **QA gating.** How an agent-authored rule earns trust before it ships — the
|
|
58
|
+
corpus idempotence / post-validity sweeps (`scripts/corpus_check.py`) are the
|
|
59
|
+
existing gate; an authored rule would run the same `codemod`/`rules` sweep.
|
|
60
|
+
4. **Trust / sandboxing.** Running an agent-authored `apply` thunk executes
|
|
61
|
+
third-party code. A plugin path needs a trust boundary (vetted packages,
|
|
62
|
+
opt-in, or a sandbox) — out of scope until the plugin mechanism is real.
|
|
63
|
+
|
|
64
|
+
## Design constraints this places on the registry facade (honored today)
|
|
65
|
+
|
|
66
|
+
- Library-first: no `click` / `sys.exit` / stdout-scraping in the call path.
|
|
67
|
+
- Structured results and structured introspection (`list_rulesets`/`list_rules`).
|
|
68
|
+
- Content-or-path input; never writes to disk unless a `write_path` is given.
|
|
69
|
+
- The `RuleHandle` is the uniform, code-addressable unit an MCP tool or a plugin
|
|
70
|
+
loader can enumerate and invoke without knowing which tier a rule came from.
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "galaxy-tool-refactor-mcp"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "MCP server exposing the Galaxy tool refactoring facade to AI agents."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = "MIT"
|
|
8
|
+
authors = [{ name = "Richard Burhans" }]
|
|
9
|
+
# A tier-4 sibling of the CLI: both wrap the registry facade (tier 3.6). The
|
|
10
|
+
# server is a thin protocol adapter — `service.py` (facade -> JSON, no `mcp`
|
|
11
|
+
# import) carries the logic; `server.py` binds it to FastMCP.
|
|
12
|
+
dependencies = [
|
|
13
|
+
"galaxy-tool-refactor-rules==0.2.0",
|
|
14
|
+
"galaxy-tool-refactor-registry==0.2.0",
|
|
15
|
+
"galaxy-tool-source==0.2.0",
|
|
16
|
+
"mcp>=1.2",
|
|
17
|
+
]
|
|
18
|
+
|
|
19
|
+
[project.scripts]
|
|
20
|
+
galaxy-tool-refactor-mcp = "galaxy_tool_refactor_mcp.server:main"
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["hatchling"]
|
|
24
|
+
build-backend = "hatchling.build"
|
|
25
|
+
|
|
26
|
+
[dependency-groups]
|
|
27
|
+
dev = [
|
|
28
|
+
"pytest>=8",
|
|
29
|
+
"ruff>=0.5",
|
|
30
|
+
"mypy>=1.10",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
[tool.ruff]
|
|
34
|
+
src = ["src"]
|
|
35
|
+
target-version = "py310"
|
|
36
|
+
|
|
37
|
+
[tool.ruff.lint]
|
|
38
|
+
select = ["E", "F", "I", "B", "UP", "SIM", "PTH"]
|
|
39
|
+
|
|
40
|
+
[tool.mypy]
|
|
41
|
+
files = ["src"]
|
|
42
|
+
strict = true
|
|
43
|
+
|
|
44
|
+
[tool.pytest.ini_options]
|
|
45
|
+
testpaths = ["tests"]
|
|
46
|
+
|
|
47
|
+
[tool.uv.sources]
|
|
48
|
+
galaxy-tool-refactor-rules = { workspace = true }
|
|
49
|
+
galaxy-tool-refactor-registry = { workspace = true }
|
|
50
|
+
galaxy-tool-source = { workspace = true }
|
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
"""The FastMCP binding — a thin protocol shell over ``service``.
|
|
2
|
+
|
|
3
|
+
Each MCP tool is a small handler with agent-facing named arguments that delegates
|
|
4
|
+
to the protocol-agnostic ``service`` adapter. The handlers are the **error
|
|
5
|
+
boundary** (the MCP analogue of the CLI's): they translate the facade's typed
|
|
6
|
+
``UnknownRuleset`` / ``UnknownRuleCode`` and tier-1's ``ToolXmlSyntaxError`` into a
|
|
7
|
+
plain ``ValueError`` whose message FastMCP returns as a tool error, so a malformed
|
|
8
|
+
tool or an unknown ruleset is a clean error result rather than a crashed server.
|
|
9
|
+
|
|
10
|
+
Run it with the ``galaxy-tool-refactor-mcp`` console script (stdio transport).
|
|
11
|
+
``build_server()`` is factored out so tests can introspect the registered tools
|
|
12
|
+
without starting a transport. See ``docs/decisions.md`` D1; agent-authored rules
|
|
13
|
+
(vision Goal 2) are out of scope.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from __future__ import annotations
|
|
17
|
+
|
|
18
|
+
from typing import TYPE_CHECKING
|
|
19
|
+
|
|
20
|
+
from galaxy_tool_refactor_registry.errors import UnknownRuleCode, UnknownRuleset
|
|
21
|
+
from galaxy_tool_source.binding import ToolXmlSyntaxError
|
|
22
|
+
from mcp.server.fastmcp import FastMCP
|
|
23
|
+
|
|
24
|
+
from galaxy_tool_refactor_mcp import service
|
|
25
|
+
|
|
26
|
+
if TYPE_CHECKING:
|
|
27
|
+
from collections.abc import Callable
|
|
28
|
+
from typing import TypeVar
|
|
29
|
+
|
|
30
|
+
T = TypeVar("T")
|
|
31
|
+
|
|
32
|
+
_SERVER_NAME = "galaxy-tool-refactor"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
def _guarded(produce: Callable[[], T], /) -> T:
|
|
36
|
+
"""Run *produce*, mapping the facade/parse errors to an agent-facing message."""
|
|
37
|
+
try:
|
|
38
|
+
return produce()
|
|
39
|
+
except (UnknownRuleset, UnknownRuleCode) as error:
|
|
40
|
+
raise ValueError(str(error)) from error
|
|
41
|
+
except ToolXmlSyntaxError as error:
|
|
42
|
+
raise ValueError(f"invalid tool XML: {error}") from error
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _format_tool(
|
|
46
|
+
xml: str,
|
|
47
|
+
rulesets: list[str] | None = None,
|
|
48
|
+
select: list[str] | None = None,
|
|
49
|
+
ignore: list[str] | None = None,
|
|
50
|
+
) -> dict[str, object]:
|
|
51
|
+
"""Apply a ruleset's fixable rules then format; return canonical XML + notes."""
|
|
52
|
+
return _guarded(
|
|
53
|
+
lambda: service.format_tool(
|
|
54
|
+
xml, rulesets=rulesets or (), select=select or (), ignore=ignore or ()
|
|
55
|
+
)
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _upgrade_tool(
|
|
60
|
+
xml: str,
|
|
61
|
+
select: list[str] | None = None,
|
|
62
|
+
ignore: list[str] | None = None,
|
|
63
|
+
) -> dict[str, object]:
|
|
64
|
+
"""Profile-upgrade then format; return upgraded XML, steps applied, and notes."""
|
|
65
|
+
return _guarded(
|
|
66
|
+
lambda: service.upgrade_tool(xml, select=select or (), ignore=ignore or ())
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _check_tool(
|
|
71
|
+
xml: str,
|
|
72
|
+
rulesets: list[str] | None = None,
|
|
73
|
+
select: list[str] | None = None,
|
|
74
|
+
ignore: list[str] | None = None,
|
|
75
|
+
) -> dict[str, object]:
|
|
76
|
+
"""Report-only detect over the selected rules; never mutates the tool."""
|
|
77
|
+
return _guarded(
|
|
78
|
+
lambda: service.check_tool(
|
|
79
|
+
xml, rulesets=rulesets or (), select=select or (), ignore=ignore or ()
|
|
80
|
+
)
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def _convert_help_tool(xml: str) -> dict[str, object]:
|
|
85
|
+
"""Convert an RST <help> to Markdown when provably render-equivalent (opt-in)."""
|
|
86
|
+
return _guarded(lambda: service.convert_help_tool(xml))
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def _tokenize_version_tool(xml: str) -> dict[str, object]:
|
|
90
|
+
"""Factor a literal version into @TOOL_VERSION@ tokens when provable (opt-in)."""
|
|
91
|
+
return _guarded(lambda: service.tokenize_version_tool(xml))
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _list_rulesets() -> list[dict[str, object]]:
|
|
95
|
+
"""The baked-in rulesets (name / codes / is_default / description)."""
|
|
96
|
+
return service.list_rulesets()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def _list_rules(include_upgrade: bool = False) -> list[dict[str, object]]:
|
|
100
|
+
"""The baked-in rules as JSON — every RuleInfo field (incl. cite)."""
|
|
101
|
+
return service.list_rules(include_upgrade=include_upgrade)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def build_server() -> FastMCP:
|
|
105
|
+
"""Construct the FastMCP server with every tool registered (no transport)."""
|
|
106
|
+
server = FastMCP(_SERVER_NAME)
|
|
107
|
+
server.add_tool(_format_tool, name="format_tool")
|
|
108
|
+
server.add_tool(_upgrade_tool, name="upgrade_tool")
|
|
109
|
+
server.add_tool(_check_tool, name="check_tool")
|
|
110
|
+
server.add_tool(_convert_help_tool, name="convert_help_tool")
|
|
111
|
+
server.add_tool(_tokenize_version_tool, name="tokenize_version_tool")
|
|
112
|
+
server.add_tool(_list_rulesets, name="list_rulesets")
|
|
113
|
+
server.add_tool(_list_rules, name="list_rules")
|
|
114
|
+
return server
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def main() -> None:
|
|
118
|
+
"""Console-script entry point: serve over stdio."""
|
|
119
|
+
build_server().run()
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""The protocol-agnostic adapter: registry facade → JSON-serialisable ``dict``s.
|
|
2
|
+
|
|
3
|
+
This is the substance of the MCP server, with **no ``mcp`` import** — it takes
|
|
4
|
+
agent-friendly inputs (XML as a ``str``, ruleset names, code lists) and returns
|
|
5
|
+
plain JSON-able structures by calling the tier-3.6 facade. ``server`` wraps these
|
|
6
|
+
as MCP tools. Keeping the logic here means it is unit-testable without a transport
|
|
7
|
+
and the FastMCP binding stays a thin shell (the vision's "thin adapter").
|
|
8
|
+
|
|
9
|
+
XML content arrives as a ``str`` and is encoded to ``bytes`` before the facade
|
|
10
|
+
sees it, so it is always parsed as *content*, never mistaken for a path. Nothing
|
|
11
|
+
here writes to disk — agents supply content and get content back. Selection /
|
|
12
|
+
parse errors propagate as the facade's typed ``UnknownRuleset`` /
|
|
13
|
+
``UnknownRuleCode`` and tier-1's ``ToolXmlSyntaxError``; the *server* is the error
|
|
14
|
+
boundary that turns them into MCP error responses (mirroring the CLI).
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from typing import TYPE_CHECKING
|
|
20
|
+
|
|
21
|
+
from galaxy_tool_refactor_registry import facade
|
|
22
|
+
from galaxy_tool_refactor_registry.resolve import resolve_codes, resolve_upgrade_codes
|
|
23
|
+
|
|
24
|
+
if TYPE_CHECKING:
|
|
25
|
+
from collections.abc import Sequence
|
|
26
|
+
|
|
27
|
+
from galaxy_tool_refactor_registry.results import (
|
|
28
|
+
ConvertHelpResult,
|
|
29
|
+
DetectResult,
|
|
30
|
+
FormatResult,
|
|
31
|
+
RuleInfo,
|
|
32
|
+
RulesetInfo,
|
|
33
|
+
TokenizeVersionResult,
|
|
34
|
+
UpgradeResult,
|
|
35
|
+
)
|
|
36
|
+
from galaxy_tool_refactor_rules.violation import Violation
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _violation_to_dict(violation: Violation, /) -> dict[str, object]:
|
|
40
|
+
"""Serialise a Violation to a JSON-able dict for an agent.
|
|
41
|
+
|
|
42
|
+
``code`` is the **precise rule code**, including a partition sub-rule's dotted
|
|
43
|
+
child code (``GTR020.1`` for the fix, ``GTR020.2`` for the advisory) — *not* the
|
|
44
|
+
parent display code. Agents get the exact sub-rule so they can distinguish the
|
|
45
|
+
fixable half from the advisory residual; the human CLI collapses both to the
|
|
46
|
+
parent (``GTR020``) via ``display_code``. Intentional asymmetry (registry D10).
|
|
47
|
+
"""
|
|
48
|
+
return {
|
|
49
|
+
"code": violation.code,
|
|
50
|
+
"sourceline": violation.sourceline,
|
|
51
|
+
"xpath": violation.xpath,
|
|
52
|
+
"message": violation.message,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _format_result_to_dict(result: FormatResult, /) -> dict[str, object]:
|
|
57
|
+
return {
|
|
58
|
+
"formatted": result.formatted.decode("utf-8"),
|
|
59
|
+
"advisory": [_violation_to_dict(v) for v in result.advisory],
|
|
60
|
+
"notes": list(result.notes),
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def _upgrade_result_to_dict(result: UpgradeResult, /) -> dict[str, object]:
|
|
65
|
+
return {
|
|
66
|
+
"formatted": result.formatted.decode("utf-8"),
|
|
67
|
+
"steps_applied": list(result.steps_applied),
|
|
68
|
+
"missing_upgrade": result.missing_upgrade,
|
|
69
|
+
"behavior_preserving": result.behavior_preserving,
|
|
70
|
+
"advisory": [_violation_to_dict(v) for v in result.advisory],
|
|
71
|
+
"notes": list(result.notes),
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _convert_help_result_to_dict(result: ConvertHelpResult, /) -> dict[str, object]:
|
|
76
|
+
return {
|
|
77
|
+
"converted": result.converted,
|
|
78
|
+
"formatted": result.formatted.decode("utf-8"),
|
|
79
|
+
"skip_reason": result.skip_reason,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _tokenize_version_result_to_dict(
|
|
84
|
+
result: TokenizeVersionResult, /
|
|
85
|
+
) -> dict[str, object]:
|
|
86
|
+
return {
|
|
87
|
+
"tokenized": result.tokenized,
|
|
88
|
+
"formatted": result.formatted.decode("utf-8"),
|
|
89
|
+
"skip_reason": result.skip_reason,
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _detect_result_to_dict(result: DetectResult, /) -> dict[str, object]:
|
|
94
|
+
return {
|
|
95
|
+
"violations": [
|
|
96
|
+
{**_violation_to_dict(v), "advisory": result.is_advisory(v)}
|
|
97
|
+
for v in result.violations
|
|
98
|
+
],
|
|
99
|
+
"advisory_codes": sorted(result.advisory_codes),
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _ruleset_info_to_dict(info: RulesetInfo, /) -> dict[str, object]:
|
|
104
|
+
return {
|
|
105
|
+
"name": info.name,
|
|
106
|
+
"codes": list(info.codes),
|
|
107
|
+
"is_default": info.is_default,
|
|
108
|
+
"description": info.description,
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def _rule_info_to_dict(info: RuleInfo, /) -> dict[str, object]:
|
|
113
|
+
return {
|
|
114
|
+
"code": info.code,
|
|
115
|
+
"summary": info.summary,
|
|
116
|
+
"family": info.family,
|
|
117
|
+
"fixable": info.fixable,
|
|
118
|
+
"rulesets": list(info.rulesets),
|
|
119
|
+
"planemo_linters": list(info.planemo_linters),
|
|
120
|
+
"since": info.since,
|
|
121
|
+
"cite": info.cite,
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def format_tool(
|
|
126
|
+
xml: str,
|
|
127
|
+
/,
|
|
128
|
+
*,
|
|
129
|
+
rulesets: Sequence[str] = (),
|
|
130
|
+
select: Sequence[str] = (),
|
|
131
|
+
ignore: Sequence[str] = (),
|
|
132
|
+
) -> dict[str, object]:
|
|
133
|
+
"""Apply a ruleset's fixable rules then format; return the canonical XML + notes."""
|
|
134
|
+
codes = resolve_codes(rulesets=rulesets, select=select, ignore=ignore)
|
|
135
|
+
return _format_result_to_dict(facade.run(xml.encode("utf-8"), codes=codes))
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def upgrade_tool(
|
|
139
|
+
xml: str,
|
|
140
|
+
/,
|
|
141
|
+
*,
|
|
142
|
+
select: Sequence[str] = (),
|
|
143
|
+
ignore: Sequence[str] = (),
|
|
144
|
+
) -> dict[str, object]:
|
|
145
|
+
"""Profile-upgrade then format; return the upgraded XML, steps, and notes."""
|
|
146
|
+
codes = resolve_upgrade_codes(select=select, ignore=ignore)
|
|
147
|
+
return _upgrade_result_to_dict(facade.upgrade(xml.encode("utf-8"), codes=codes))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def check_tool(
|
|
151
|
+
xml: str,
|
|
152
|
+
/,
|
|
153
|
+
*,
|
|
154
|
+
rulesets: Sequence[str] = (),
|
|
155
|
+
select: Sequence[str] = (),
|
|
156
|
+
ignore: Sequence[str] = (),
|
|
157
|
+
) -> dict[str, object]:
|
|
158
|
+
"""Report-only detect over the selected rules; never mutates the tool."""
|
|
159
|
+
codes = resolve_codes(rulesets=rulesets, select=select, ignore=ignore)
|
|
160
|
+
return _detect_result_to_dict(facade.detect(xml.encode("utf-8"), codes=codes))
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def convert_help_tool(xml: str, /) -> dict[str, object]:
|
|
164
|
+
"""Convert an RST ``<help>`` to Markdown when provable; else report why not.
|
|
165
|
+
|
|
166
|
+
The opt-in GTR092 conversion (registry D18): profile >= 24.2 (the XSD gate —
|
|
167
|
+
the skip reason says to run ``upgrade_tool`` first) + the tier-1
|
|
168
|
+
render-equivalence gate. ``converted=False`` is a normal outcome, not an
|
|
169
|
+
error; ``formatted`` then echoes the (serialised) unchanged tool.
|
|
170
|
+
"""
|
|
171
|
+
return _convert_help_result_to_dict(facade.convert_help(xml.encode("utf-8")))
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def tokenize_version_tool(xml: str, /) -> dict[str, object]:
|
|
175
|
+
"""Factor a literal version into @TOOL_VERSION@ tokens when provable (GTR094).
|
|
176
|
+
|
|
177
|
+
The opt-in tokenization (registry D19): fail-closed preconditions plus the
|
|
178
|
+
expansion-equality gate. ``tokenized=False`` is a normal outcome with the
|
|
179
|
+
codemod's own ``skip_reason``. Content-based like every MCP tool, so a tool
|
|
180
|
+
whose ``<macros>`` imports files fails closed (the gate cannot resolve
|
|
181
|
+
imports without a source directory) — the skip reason says so; use the CLI
|
|
182
|
+
``tokenize-version`` (path-based) for those.
|
|
183
|
+
"""
|
|
184
|
+
return _tokenize_version_result_to_dict(
|
|
185
|
+
facade.tokenize_version(xml.encode("utf-8"))
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def list_rulesets() -> list[dict[str, object]]:
|
|
190
|
+
"""The baked-in rulesets (name / codes / is_default / description)."""
|
|
191
|
+
return [_ruleset_info_to_dict(info) for info in facade.list_rulesets()]
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def list_rules(*, include_upgrade: bool = False) -> list[dict[str, object]]:
|
|
195
|
+
"""The baked-in rules as JSON — every RuleInfo field (incl. cite)."""
|
|
196
|
+
return [
|
|
197
|
+
_rule_info_to_dict(info)
|
|
198
|
+
for info in facade.list_rules(include_upgrade=include_upgrade)
|
|
199
|
+
]
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Tests for the FastMCP binding: tool registration + the error boundary."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from galaxy_tool_refactor_mcp.server import (
|
|
10
|
+
_check_tool,
|
|
11
|
+
_format_tool,
|
|
12
|
+
build_server,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
_TOOL = (
|
|
16
|
+
'<tool id="m" name="M" version="1.0.0" profile="24.0">'
|
|
17
|
+
"<command><![CDATA[echo x]]></command>"
|
|
18
|
+
'<inputs><param value="v" type="text" name="a"/></inputs>'
|
|
19
|
+
'<outputs><data name="o"/></outputs></tool>'
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_build_server_registers_every_tool() -> None:
|
|
24
|
+
server = build_server()
|
|
25
|
+
tools = asyncio.run(server.list_tools())
|
|
26
|
+
assert {tool.name for tool in tools} == {
|
|
27
|
+
"format_tool",
|
|
28
|
+
"upgrade_tool",
|
|
29
|
+
"check_tool",
|
|
30
|
+
"convert_help_tool",
|
|
31
|
+
"tokenize_version_tool",
|
|
32
|
+
"list_rulesets",
|
|
33
|
+
"list_rules",
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_handler_maps_unknown_ruleset_to_plain_valueerror() -> None:
|
|
38
|
+
"""The error boundary downgrades the typed UnknownRuleset to a plain message."""
|
|
39
|
+
with pytest.raises(ValueError) as exc_info:
|
|
40
|
+
_format_tool(_TOOL, rulesets=["does-not-exist"])
|
|
41
|
+
# Not the UnknownRuleset subclass — a clean message for the agent.
|
|
42
|
+
assert type(exc_info.value) is ValueError
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_handler_maps_malformed_xml_to_value_error() -> None:
|
|
46
|
+
with pytest.raises(ValueError, match="invalid tool XML"):
|
|
47
|
+
_format_tool("<tool><unclosed></tool>")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_handler_success_path_returns_dict() -> None:
|
|
51
|
+
result = _check_tool(_TOOL)
|
|
52
|
+
assert isinstance(result, dict)
|
|
53
|
+
assert "violations" in result
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
"""Tests for the protocol-agnostic service adapter (facade → JSON dicts)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
from galaxy_tool_refactor_registry.errors import UnknownRuleCode, UnknownRuleset
|
|
7
|
+
from galaxy_tool_source.binding import ToolXmlSyntaxError
|
|
8
|
+
from galaxy_tool_source.profiles import latest_profile
|
|
9
|
+
|
|
10
|
+
from galaxy_tool_refactor_mcp import service
|
|
11
|
+
|
|
12
|
+
# A tool with out-of-order <param> attributes (value, type, name) — GTR002
|
|
13
|
+
# reorders them, so `format` changes it and `check` reports a fixable finding.
|
|
14
|
+
_MESSY = (
|
|
15
|
+
'<tool id="m" name="M" version="1.0.0" profile="24.0">'
|
|
16
|
+
"<command><![CDATA[echo x]]></command>"
|
|
17
|
+
'<inputs><param value="v" type="text" name="a"/></inputs>'
|
|
18
|
+
'<outputs><data name="o"/></outputs></tool>'
|
|
19
|
+
)
|
|
20
|
+
# A 24.1 tool whose BAM format normalises on the 24.1 -> 24.2 bump.
|
|
21
|
+
_UPGRADABLE = (
|
|
22
|
+
'<tool id="m" name="M" version="1.0.0" profile="24.1">'
|
|
23
|
+
"<command><![CDATA[echo x]]></command>"
|
|
24
|
+
'<inputs><param name="i" type="data" format="BAM"/></inputs>'
|
|
25
|
+
'<outputs><data name="o"/></outputs></tool>'
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_format_tool_returns_canonical_xml() -> None:
|
|
30
|
+
result = service.format_tool(_MESSY)
|
|
31
|
+
formatted = result["formatted"]
|
|
32
|
+
assert isinstance(formatted, str)
|
|
33
|
+
# GTR002 reordered the param attributes to name, type, value.
|
|
34
|
+
param = formatted.partition("<param")[2]
|
|
35
|
+
assert param.index("name=") < param.index("type=") < param.index("value=")
|
|
36
|
+
assert isinstance(result["advisory"], list)
|
|
37
|
+
assert isinstance(result["notes"], list)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def test_format_tool_unknown_ruleset_raises() -> None:
|
|
41
|
+
with pytest.raises(UnknownRuleset):
|
|
42
|
+
service.format_tool(_MESSY, rulesets=["does-not-exist"])
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def test_format_tool_unknown_select_code_raises() -> None:
|
|
46
|
+
with pytest.raises(UnknownRuleCode):
|
|
47
|
+
service.format_tool(_MESSY, select=["GTR999"])
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def test_check_tool_reports_violations() -> None:
|
|
51
|
+
result = service.check_tool(_MESSY)
|
|
52
|
+
violations = result["violations"]
|
|
53
|
+
assert isinstance(violations, list)
|
|
54
|
+
codes = {v["code"] for v in violations} # type: ignore[index]
|
|
55
|
+
assert "GTR002" in codes # the out-of-order param attributes
|
|
56
|
+
assert isinstance(result["advisory_codes"], list)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_check_tool_strict_marks_advisory() -> None:
|
|
60
|
+
result = service.check_tool(_MESSY, rulesets=["strict"])
|
|
61
|
+
advisory_codes = result["advisory_codes"]
|
|
62
|
+
assert isinstance(advisory_codes, list)
|
|
63
|
+
# Strict adds the advisory checks; an advisory finding is marked advisory
|
|
64
|
+
# (a per-violation flag, not a code prefix).
|
|
65
|
+
advisory = [v for v in result["violations"] if v["advisory"]] # type: ignore[index]
|
|
66
|
+
assert advisory
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def test_upgrade_tool_bumps_profile() -> None:
|
|
70
|
+
result = service.upgrade_tool(_UPGRADABLE)
|
|
71
|
+
formatted = result["formatted"]
|
|
72
|
+
assert isinstance(formatted, str)
|
|
73
|
+
assert f'profile="{latest_profile()}"' in formatted
|
|
74
|
+
assert "24.1" in result["steps_applied"] # type: ignore[operator]
|
|
75
|
+
assert result["missing_upgrade"] is None
|
|
76
|
+
assert result["behavior_preserving"] in (True, False, None)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_list_rulesets_includes_default() -> None:
|
|
80
|
+
rulesets = service.list_rulesets()
|
|
81
|
+
assert rulesets
|
|
82
|
+
names = {r["name"] for r in rulesets}
|
|
83
|
+
assert "default" in names
|
|
84
|
+
assert any(r["is_default"] for r in rulesets)
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_list_rules_has_codes_and_families() -> None:
|
|
88
|
+
rules = service.list_rules()
|
|
89
|
+
codes = {r["code"] for r in rules}
|
|
90
|
+
assert all(c.startswith("GTR") for c in codes) # unified namespace
|
|
91
|
+
families = {r["family"] for r in rules}
|
|
92
|
+
assert {"codemod", "fmt", "check"} <= families
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_list_rules_include_upgrade_adds_more() -> None:
|
|
96
|
+
base = len(service.list_rules())
|
|
97
|
+
extended = len(service.list_rules(include_upgrade=True))
|
|
98
|
+
assert extended > base
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_malformed_xml_raises_syntax_error() -> None:
|
|
102
|
+
with pytest.raises(ToolXmlSyntaxError):
|
|
103
|
+
service.format_tool("<tool><unclosed></tool>")
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
_CONVERTIBLE = (
|
|
107
|
+
'<tool id="m" name="M" version="1.0.0" profile="24.2">'
|
|
108
|
+
"<command><![CDATA[echo x]]></command>"
|
|
109
|
+
"<help>Title\n=====\n\nSome **bold** text.\n</help></tool>"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_convert_help_tool_converts() -> None:
|
|
114
|
+
result = service.convert_help_tool(_CONVERTIBLE)
|
|
115
|
+
assert result["converted"] is True
|
|
116
|
+
assert result["skip_reason"] is None
|
|
117
|
+
formatted = result["formatted"]
|
|
118
|
+
assert isinstance(formatted, str)
|
|
119
|
+
assert 'format="markdown"' in formatted
|
|
120
|
+
assert "# Title" in formatted
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def test_convert_help_tool_reports_profile_skip() -> None:
|
|
124
|
+
old = _CONVERTIBLE.replace(' profile="24.2"', "")
|
|
125
|
+
result = service.convert_help_tool(old)
|
|
126
|
+
assert result["converted"] is False
|
|
127
|
+
skip_reason = result["skip_reason"]
|
|
128
|
+
assert isinstance(skip_reason, str) and "upgrade" in skip_reason
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
_TOKENIZABLE = (
|
|
132
|
+
'<tool id="m" name="M" version="1.20+galaxy0" profile="24.0">'
|
|
133
|
+
"<command><![CDATA[echo x]]></command>"
|
|
134
|
+
'<requirements><requirement type="package" version="1.20">samtools'
|
|
135
|
+
"</requirement></requirements>"
|
|
136
|
+
'<inputs><param name="i" type="text"/></inputs>'
|
|
137
|
+
'<outputs><data name="o"/></outputs></tool>'
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def test_tokenize_version_tool_tokenizes() -> None:
|
|
142
|
+
result = service.tokenize_version_tool(_TOKENIZABLE)
|
|
143
|
+
assert result["tokenized"] is True
|
|
144
|
+
assert result["skip_reason"] is None
|
|
145
|
+
formatted = result["formatted"]
|
|
146
|
+
assert isinstance(formatted, str)
|
|
147
|
+
assert 'version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"' in formatted
|
|
148
|
+
assert '<token name="@TOOL_VERSION@">1.20</token>' in formatted
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def test_tokenize_version_tool_reports_skip() -> None:
|
|
152
|
+
plain = _TOKENIZABLE.replace('version="1.20+galaxy0"', 'version="1.20"')
|
|
153
|
+
result = service.tokenize_version_tool(plain)
|
|
154
|
+
assert result["tokenized"] is False
|
|
155
|
+
skip_reason = result["skip_reason"]
|
|
156
|
+
assert isinstance(skip_reason, str) and "+galaxy" in skip_reason
|