galaxy-tool-codemod 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- galaxy_tool_codemod/__init__.py +12 -0
- galaxy_tool_codemod/_version.py +21 -0
- galaxy_tool_codemod/canonical.py +109 -0
- galaxy_tool_codemod/catalog.py +90 -0
- galaxy_tool_codemod/certify.py +47 -0
- galaxy_tool_codemod/change.py +63 -0
- galaxy_tool_codemod/codemod.py +136 -0
- galaxy_tool_codemod/codemods/__init__.py +7 -0
- galaxy_tool_codemod/codemods/_attribute_ordering.py +24 -0
- galaxy_tool_codemod/codemods/_cdata.py +46 -0
- galaxy_tool_codemod/codemods/_coarse_detect.py +65 -0
- galaxy_tool_codemod/codemods/_interpreter.py +125 -0
- galaxy_tool_codemod/codemods/_runtime_gated.py +21 -0
- galaxy_tool_codemod/codemods/_validation_repair.py +32 -0
- galaxy_tool_codemod/codemods/convert_help_markdown.py +117 -0
- galaxy_tool_codemod/codemods/drop_redundant_param_name.py +88 -0
- galaxy_tool_codemod/codemods/fix_from_work_dir_whitespace.py +57 -0
- galaxy_tool_codemod/codemods/fix_interpreter.py +126 -0
- galaxy_tool_codemod/codemods/fix_output_format_input.py +150 -0
- galaxy_tool_codemod/codemods/fix_typos.py +184 -0
- galaxy_tool_codemod/codemods/normalize_boolean_values.py +138 -0
- galaxy_tool_codemod/codemods/reorder_param_attributes.py +61 -0
- galaxy_tool_codemod/codemods/reorder_tool_attributes.py +52 -0
- galaxy_tool_codemod/codemods/reorder_tool_children.py +68 -0
- galaxy_tool_codemod/codemods/repair_help_rst.py +71 -0
- galaxy_tool_codemod/codemods/replace_output_element.py +113 -0
- galaxy_tool_codemod/codemods/single_quote_command_vars.py +139 -0
- galaxy_tool_codemod/codemods/tokenize_version.py +93 -0
- galaxy_tool_codemod/codemods/trim_attribute_whitespace.py +84 -0
- galaxy_tool_codemod/codemods/update_profile.py +110 -0
- galaxy_tool_codemod/codemods/upgrade_19_01.py +96 -0
- galaxy_tool_codemod/codemods/upgrade_21_09.py +245 -0
- galaxy_tool_codemod/codemods/upgrade_24_0.py +98 -0
- galaxy_tool_codemod/codemods/upgrade_24_1.py +74 -0
- galaxy_tool_codemod/codemods/upgrade_25_1.py +48 -0
- galaxy_tool_codemod/codemods/wrap_command_cdata.py +42 -0
- galaxy_tool_codemod/codemods/wrap_help_cdata.py +42 -0
- galaxy_tool_codemod/cursor.py +281 -0
- galaxy_tool_codemod/datatype_format.py +63 -0
- galaxy_tool_codemod/eligibility.py +81 -0
- galaxy_tool_codemod/module.py +65 -0
- galaxy_tool_codemod/parse.py +47 -0
- galaxy_tool_codemod/profile_semantics.py +620 -0
- galaxy_tool_codemod/py.typed +0 -0
- galaxy_tool_codemod/runtime_fixes.py +77 -0
- galaxy_tool_codemod/upgrades.py +137 -0
- galaxy_tool_codemod-0.2.0.dist-info/METADATA +134 -0
- galaxy_tool_codemod-0.2.0.dist-info/RECORD +50 -0
- galaxy_tool_codemod-0.2.0.dist-info/WHEEL +4 -0
- galaxy_tool_codemod-0.2.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
"""LibCST-shaped framework for structural refactors of Galaxy tool XML.
|
|
2
|
+
|
|
3
|
+
Tier 2 of the Galaxy tool refactoring architecture (see ``README.md``).
|
|
4
|
+
Per dignified-python this package does not re-export from its
|
|
5
|
+
submodules — import the symbols you need directly:
|
|
6
|
+
|
|
7
|
+
from galaxy_tool_codemod.parse import parse_module
|
|
8
|
+
from galaxy_tool_codemod.canonical import canonical_codemods
|
|
9
|
+
from galaxy_tool_codemod.module import Module
|
|
10
|
+
from galaxy_tool_codemod.cursor import Cursor
|
|
11
|
+
from galaxy_tool_codemod.codemod import CodemodCommand
|
|
12
|
+
"""
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
"""Shared version-parse helper for the codemod tier.
|
|
2
|
+
|
|
3
|
+
``packaging`` exposes no validity predicate, so a ``try``/``except`` over
|
|
4
|
+
``Version`` is the sanctioned third-party boundary. Both the profile-semantics
|
|
5
|
+
catalogue (``profile_semantics``) and the runtime-gated-fix crossing gate
|
|
6
|
+
(``runtime_fixes``) need to place a possibly-unparseable profile string, so the
|
|
7
|
+
helper lives here once rather than being mirrored in each (architecture audit
|
|
8
|
+
2026-06-03, ``../../docs/architecture_audit.md``).
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from packaging.version import InvalidVersion, Version
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def version_or_none(value: str, /) -> Version | None:
|
|
17
|
+
"""Parse *value* as a ``Version``, or ``None`` if it is not one."""
|
|
18
|
+
try:
|
|
19
|
+
return Version(value)
|
|
20
|
+
except InvalidVersion:
|
|
21
|
+
return None
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""The bundled codemod pipelines — ordered contracts consumed by the app tier.
|
|
2
|
+
|
|
3
|
+
Tier 2 (this package) and Tier 3 (``galaxy-tool-fmt``) are independent
|
|
4
|
+
siblings of Tier 1 (``galaxy-tool-source``); neither runs the user-facing
|
|
5
|
+
workflow. The orchestration — read a file, apply a pipeline, write the result
|
|
6
|
+
via fmt's serializer — lives in the top-level app tier
|
|
7
|
+
(``galaxy-tool-refactor-cli``). This module only declares *which* codemods run
|
|
8
|
+
and *in what order*; the app consumes these tuples.
|
|
9
|
+
|
|
10
|
+
Two pipelines, separated because profile upgrade is semantic and opt-in while
|
|
11
|
+
canonicalisation is safe and idempotent:
|
|
12
|
+
|
|
13
|
+
``canonical_codemods()`` — the structural canonical pipeline (the app's ``format``
|
|
14
|
+
command, run before fmt's cosmetic rules), derived from the codemods that declare
|
|
15
|
+
the ``"default"`` ruleset, ordered by ``meta.order``. Front-to-back:
|
|
16
|
+
|
|
17
|
+
1. ``FixTypos`` — repair near-miss spelling typos. A no-op unless the tool
|
|
18
|
+
validates at no profile, so it only acts on broken tools; running it first
|
|
19
|
+
lets the rest of the pipeline see a validatable tree.
|
|
20
|
+
2. ``NormalizeBooleanValues`` — canonicalize Python-style boolean attribute
|
|
21
|
+
values (``True``/``Yes``/…) to ``xs:boolean`` (``true``/``false``) on
|
|
22
|
+
schema-boolean attributes. Like ``FixTypos`` a no-op unless the tool validates
|
|
23
|
+
nowhere; behaviour-preserving and the sibling repair ``FixTypos`` cannot reach
|
|
24
|
+
(the lenient model accepts ``True``).
|
|
25
|
+
3. ``RepairHelpRst`` — repair the deterministically-fixable invalid ``<help>``
|
|
26
|
+
reStructuredText (GTR089.1, the fixable half of the GTR089 partition) behind
|
|
27
|
+
tier 1's behaviour-preserving gate. A no-op on valid or macro-bearing help;
|
|
28
|
+
what it can't reach stays the ``GTR089.2`` advisory residual. See
|
|
29
|
+
``docs/decisions.md`` §37.
|
|
30
|
+
4. ``TrimAttributeWhitespace`` / ``ReplaceOutputElement`` /
|
|
31
|
+
``DropRedundantParamName`` — the planemo-parity fixes (GTR035–GTR037):
|
|
32
|
+
value-level repairs that settle attribute *content* before the reorders tidy
|
|
33
|
+
attribute *order*.
|
|
34
|
+
5. ``ReorderParamAttributes`` / ``ReorderToolAttributes`` — tidy attribute order
|
|
35
|
+
once the tree is settled.
|
|
36
|
+
6. ``ReorderToolChildren`` — reorder the root ``<tool>``'s child elements to the
|
|
37
|
+
IUC convention (element-level tidying after attribute-level). Validity-safe:
|
|
38
|
+
the schema's ``<tool>`` content model is order-free (``xs:all``).
|
|
39
|
+
7. ``WrapCommandCdata`` / ``WrapHelpCdata`` — wrap a pure-text ``<command>`` /
|
|
40
|
+
``<help>`` body in ``<![CDATA[…]]>`` (IUC #34/#42). Behaviour-preserving — lxml
|
|
41
|
+
exposes the entity-unescaped text, so only the serialised bytes change, not the
|
|
42
|
+
value Galaxy runs/renders. Content-level tidying, so it runs after the
|
|
43
|
+
structural reorders; independent of them (it never touches child order). See
|
|
44
|
+
``docs/decisions.md`` §29.
|
|
45
|
+
8. ``SingleQuoteCommandVars`` — single-quote the *provably*-single-valued unquoted
|
|
46
|
+
Cheetah ``$var``\\ s in ``<command>`` (GTR020.1, the fixable half of the GTR020
|
|
47
|
+
partition).
|
|
48
|
+
Acts only on references whose value can never contain whitespace for a working
|
|
49
|
+
tool (bare single-token params, ``$__…__`` path built-ins, space-free attrs),
|
|
50
|
+
so it is behaviour-preserving like the CDATA wraps. It runs **after**
|
|
51
|
+
``WrapCommandCdata`` so it sees the body already in its canonical CDATA form and
|
|
52
|
+
preserves it. Unlike the rest of this pipeline it changes the default ``format``
|
|
53
|
+
output for tools that were never previously rewritten — a deliberate, data-backed
|
|
54
|
+
reversal of the GTR020.2-stays-advisory stance (``docs/decisions.md`` §30). The
|
|
55
|
+
advisory ``GTR020.2`` check still reports the non-provable residual this skips.
|
|
56
|
+
|
|
57
|
+
It deliberately does **not** change ``profile=`` or apply version migrations —
|
|
58
|
+
that is the upgrade pipeline's job.
|
|
59
|
+
|
|
60
|
+
``AUTO_UPGRADE_CODEMODS`` — the opt-in profile-upgrade pipeline (the app's
|
|
61
|
+
``upgrade`` command). Front-to-back:
|
|
62
|
+
|
|
63
|
+
1. ``FixTypos`` / ``NormalizeBooleanValues`` — repair first, so a broken-and-
|
|
64
|
+
outdated tool becomes validatable and therefore upgradable in one pass.
|
|
65
|
+
2. ``UpgradeToLatest`` — iteratively upgrade the (now possibly repaired) tool
|
|
66
|
+
toward the latest profile, re-declaring its profile between steps. This
|
|
67
|
+
subsumes ``UpdateProfile`` (it runs it internally each round).
|
|
68
|
+
|
|
69
|
+
``FixTypos`` / ``NormalizeBooleanValues`` intentionally appear in both pipelines;
|
|
70
|
+
both are idempotent, so running them in whichever pipeline the user invokes is
|
|
71
|
+
harmless.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
from __future__ import annotations
|
|
75
|
+
|
|
76
|
+
from functools import cache
|
|
77
|
+
|
|
78
|
+
from galaxy_tool_codemod.catalog import coded_codemods
|
|
79
|
+
from galaxy_tool_codemod.codemod import CodemodCommand
|
|
80
|
+
from galaxy_tool_codemod.codemods.fix_typos import FixTypos
|
|
81
|
+
from galaxy_tool_codemod.codemods.normalize_boolean_values import (
|
|
82
|
+
NormalizeBooleanValues,
|
|
83
|
+
)
|
|
84
|
+
from galaxy_tool_codemod.upgrades import UpgradeToLatest
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
@cache
|
|
88
|
+
def canonical_codemods() -> tuple[type[CodemodCommand], ...]:
|
|
89
|
+
"""The structural canonical/``format`` pipeline — **derived, not hardcoded**.
|
|
90
|
+
|
|
91
|
+
Every codemod that declares the ``"default"`` ruleset, ordered by ``meta.order``.
|
|
92
|
+
Membership and application order now live on each codemod's ``RuleMeta``
|
|
93
|
+
(``rulesets`` / ``order``), so this is computed from the rules rather than being
|
|
94
|
+
a second hand-maintained source of truth. The front-to-back order it yields is
|
|
95
|
+
the one documented above (``FixTypos`` → … → ``SingleQuoteCommandVars``).
|
|
96
|
+
"""
|
|
97
|
+
return tuple(
|
|
98
|
+
sorted(
|
|
99
|
+
(cls for cls in coded_codemods() if "default" in cls.meta.rulesets),
|
|
100
|
+
key=lambda cls: cls.meta.order,
|
|
101
|
+
)
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
AUTO_UPGRADE_CODEMODS: tuple[type[CodemodCommand], ...] = (
|
|
106
|
+
FixTypos,
|
|
107
|
+
NormalizeBooleanValues,
|
|
108
|
+
UpgradeToLatest,
|
|
109
|
+
)
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""The full set of GTR-coded codemods, for documentation and registry use.
|
|
2
|
+
|
|
3
|
+
Distinct from ``canonical_codemods()`` (``canonical.py``): that tuple is the
|
|
4
|
+
*ordered pipeline* fmt's CLI runs, and it omits the single-step ``upgrade_vN``
|
|
5
|
+
codemods because ``UpgradeToLatest`` drives them internally. This catalog lists
|
|
6
|
+
*every* codemod that carries a ``RuleMeta`` GTR code, so a cross-tier rule
|
|
7
|
+
registry (such as the corpus-format stat page) can enumerate them alongside the
|
|
8
|
+
formatter tier's ``all_rules()``.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from galaxy_tool_codemod.codemod import CodemodCommand
|
|
14
|
+
from galaxy_tool_codemod.codemods.convert_help_markdown import (
|
|
15
|
+
ConvertHelpToMarkdown,
|
|
16
|
+
)
|
|
17
|
+
from galaxy_tool_codemod.codemods.drop_redundant_param_name import (
|
|
18
|
+
DropRedundantParamName,
|
|
19
|
+
)
|
|
20
|
+
from galaxy_tool_codemod.codemods.fix_from_work_dir_whitespace import (
|
|
21
|
+
FixFromWorkDirWhitespace,
|
|
22
|
+
)
|
|
23
|
+
from galaxy_tool_codemod.codemods.fix_interpreter import FixInterpreter
|
|
24
|
+
from galaxy_tool_codemod.codemods.fix_output_format_input import (
|
|
25
|
+
FixOutputFormatInput,
|
|
26
|
+
)
|
|
27
|
+
from galaxy_tool_codemod.codemods.fix_typos import FixTypos
|
|
28
|
+
from galaxy_tool_codemod.codemods.normalize_boolean_values import (
|
|
29
|
+
NormalizeBooleanValues,
|
|
30
|
+
)
|
|
31
|
+
from galaxy_tool_codemod.codemods.reorder_param_attributes import (
|
|
32
|
+
ReorderParamAttributes,
|
|
33
|
+
)
|
|
34
|
+
from galaxy_tool_codemod.codemods.reorder_tool_attributes import (
|
|
35
|
+
ReorderToolAttributes,
|
|
36
|
+
)
|
|
37
|
+
from galaxy_tool_codemod.codemods.reorder_tool_children import (
|
|
38
|
+
ReorderToolChildren,
|
|
39
|
+
)
|
|
40
|
+
from galaxy_tool_codemod.codemods.repair_help_rst import RepairHelpRst
|
|
41
|
+
from galaxy_tool_codemod.codemods.replace_output_element import (
|
|
42
|
+
ReplaceOutputElement,
|
|
43
|
+
)
|
|
44
|
+
from galaxy_tool_codemod.codemods.single_quote_command_vars import (
|
|
45
|
+
SingleQuoteCommandVars,
|
|
46
|
+
)
|
|
47
|
+
from galaxy_tool_codemod.codemods.tokenize_version import TokenizeVersion
|
|
48
|
+
from galaxy_tool_codemod.codemods.trim_attribute_whitespace import (
|
|
49
|
+
TrimAttributeWhitespace,
|
|
50
|
+
)
|
|
51
|
+
from galaxy_tool_codemod.codemods.update_profile import UpdateProfile
|
|
52
|
+
from galaxy_tool_codemod.codemods.upgrade_19_01 import Upgrade19_01
|
|
53
|
+
from galaxy_tool_codemod.codemods.upgrade_21_09 import Upgrade21_09
|
|
54
|
+
from galaxy_tool_codemod.codemods.upgrade_24_0 import Upgrade24_0
|
|
55
|
+
from galaxy_tool_codemod.codemods.upgrade_24_1 import Upgrade24_1
|
|
56
|
+
from galaxy_tool_codemod.codemods.upgrade_25_1 import Upgrade25_1
|
|
57
|
+
from galaxy_tool_codemod.codemods.wrap_command_cdata import WrapCommandCdata
|
|
58
|
+
from galaxy_tool_codemod.codemods.wrap_help_cdata import WrapHelpCdata
|
|
59
|
+
from galaxy_tool_codemod.upgrades import UpgradeToLatest
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def coded_codemods() -> tuple[type[CodemodCommand], ...]:
|
|
63
|
+
"""Return every GTR-coded codemod class, sorted by ``meta.code``."""
|
|
64
|
+
classes: list[type[CodemodCommand]] = [
|
|
65
|
+
FixTypos,
|
|
66
|
+
ReorderParamAttributes,
|
|
67
|
+
ReorderToolAttributes,
|
|
68
|
+
ReorderToolChildren,
|
|
69
|
+
UpdateProfile,
|
|
70
|
+
Upgrade19_01,
|
|
71
|
+
Upgrade21_09,
|
|
72
|
+
Upgrade24_0,
|
|
73
|
+
Upgrade24_1,
|
|
74
|
+
Upgrade25_1,
|
|
75
|
+
UpgradeToLatest,
|
|
76
|
+
FixFromWorkDirWhitespace,
|
|
77
|
+
FixOutputFormatInput,
|
|
78
|
+
FixInterpreter,
|
|
79
|
+
NormalizeBooleanValues,
|
|
80
|
+
RepairHelpRst,
|
|
81
|
+
WrapCommandCdata,
|
|
82
|
+
WrapHelpCdata,
|
|
83
|
+
SingleQuoteCommandVars,
|
|
84
|
+
TrimAttributeWhitespace,
|
|
85
|
+
ReplaceOutputElement,
|
|
86
|
+
DropRedundantParamName,
|
|
87
|
+
ConvertHelpToMarkdown,
|
|
88
|
+
TokenizeVersion,
|
|
89
|
+
]
|
|
90
|
+
return tuple(sorted(classes, key=lambda cls: cls.meta.code))
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""The ``EditCertifier`` seam: a pluggable per-edit behaviour-preservation oracle.
|
|
2
|
+
|
|
3
|
+
GTR020.1 (``SingleQuoteCommandVars``) decides, per occurrence, whether single-quoting
|
|
4
|
+
a Cheetah ``$var`` in ``<command>`` preserves behaviour. By **default** it uses the
|
|
5
|
+
tier-1 static policy ``galaxy_tool_source.shell_oracle.quote_is_behavior_preserving``
|
|
6
|
+
— the
|
|
7
|
+
bashlex shell-context classifier composed with the value-domain rule, degrading to the
|
|
8
|
+
pure value-domain ``provably_quotable`` when the optional
|
|
9
|
+
``galaxy-tool-source[shell-oracle]`` extra is absent.
|
|
10
|
+
|
|
11
|
+
This Protocol reserves the seam (shipped consulting ``None`` = the static policy) for
|
|
12
|
+
the Phase-2 CT3 *render* certifier (``--certify=render``): an ``EditCertifier`` injected
|
|
13
|
+
into the codemod overrides the default and may only *narrow* the candidate set. The
|
|
14
|
+
codemod calls ``should_quote`` with the same arguments as the static policy so the two
|
|
15
|
+
are interchangeable. See
|
|
16
|
+
``../../docs/upgrade_research/cheetah_bashlex_boundary_oracle.md`` §4.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from typing import TYPE_CHECKING, Protocol, runtime_checkable
|
|
22
|
+
|
|
23
|
+
if TYPE_CHECKING:
|
|
24
|
+
from galaxy_tool_source.command_text import UnquotedVar
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@runtime_checkable
|
|
28
|
+
class EditCertifier(Protocol):
|
|
29
|
+
"""Certifies whether a single-quote edit on one ``<command>`` occurrence is safe."""
|
|
30
|
+
|
|
31
|
+
def should_quote(
|
|
32
|
+
self,
|
|
33
|
+
body: str,
|
|
34
|
+
/,
|
|
35
|
+
*,
|
|
36
|
+
occurrence: UnquotedVar,
|
|
37
|
+
kinds: dict[str, str],
|
|
38
|
+
structural: set[str],
|
|
39
|
+
) -> bool:
|
|
40
|
+
"""Whether single-quoting *occurrence* in ``<command>`` *body* keeps behaviour.
|
|
41
|
+
|
|
42
|
+
Signature-compatible with
|
|
43
|
+
``galaxy_tool_source.shell_oracle.quote_is_behavior_preserving`` so a
|
|
44
|
+
certifier and
|
|
45
|
+
the default static policy are drop-in interchangeable.
|
|
46
|
+
"""
|
|
47
|
+
...
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""``Change`` — one detected structural mutation, applied via a thunk.
|
|
2
|
+
|
|
3
|
+
A codemod's **detect** phase yields ``Change``s without touching the tree: each
|
|
4
|
+
carries the diagnostic data (``code``, ``sourceline``, ``xpath``, ``message`` —
|
|
5
|
+
the same fields as a tier-0.5 ``Violation``) plus a zero-argument ``mutate``
|
|
6
|
+
thunk that performs the mutation through the existing ``Cursor`` primitives. The
|
|
7
|
+
detect list *is* the report; running ``apply_changes`` over it is the fix. One
|
|
8
|
+
mutation site (the thunk body), one source of truth — the change a codemod
|
|
9
|
+
reports is exactly the change it applies, with no risk of the two drifting.
|
|
10
|
+
|
|
11
|
+
See ``galaxy-tool-fmt``'s ``edits.py`` for the cosmetic-tier analogue; the
|
|
12
|
+
difference is that an ``Edit`` is a pure-data union dispatched by ``match/case``
|
|
13
|
+
whereas a ``Change`` carries its mutation as a closure over a ``Cursor`` call
|
|
14
|
+
(``docs/decisions.md`` § on the detect/fix split records why the structural tier
|
|
15
|
+
reuses the cursor rather than re-enumerating every mutation kind).
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
from collections.abc import Callable, Iterable
|
|
21
|
+
from dataclasses import dataclass, field
|
|
22
|
+
|
|
23
|
+
from galaxy_tool_refactor_rules.violation import Violation
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass(frozen=True)
|
|
27
|
+
class Change:
|
|
28
|
+
"""One structural mutation a codemod detected, with the thunk that applies it.
|
|
29
|
+
|
|
30
|
+
Attributes:
|
|
31
|
+
code: The codemod's ``RuleMeta.code`` (e.g. ``"GTR002"``).
|
|
32
|
+
sourceline: 1-based source line of the affected element, or ``0``.
|
|
33
|
+
xpath: Absolute xpath of the affected element.
|
|
34
|
+
message: One-line human-readable description of the change.
|
|
35
|
+
mutate: Zero-argument thunk that performs the mutation when called.
|
|
36
|
+
Excluded from equality and ``repr`` — two changes are equal when
|
|
37
|
+
their diagnostic data matches, independent of closure identity.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
code: str
|
|
41
|
+
sourceline: int
|
|
42
|
+
xpath: str
|
|
43
|
+
message: str
|
|
44
|
+
mutate: Callable[[], None] = field(compare=False, repr=False)
|
|
45
|
+
|
|
46
|
+
def to_violation(self) -> Violation:
|
|
47
|
+
"""Project the change's diagnostic data onto a tier-0.5 ``Violation``."""
|
|
48
|
+
return Violation(
|
|
49
|
+
code=self.code,
|
|
50
|
+
sourceline=self.sourceline,
|
|
51
|
+
xpath=self.xpath,
|
|
52
|
+
message=self.message,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def apply_changes(changes: Iterable[Change], /) -> None:
|
|
57
|
+
"""Apply every change by invoking its ``mutate`` thunk, in iteration order.
|
|
58
|
+
|
|
59
|
+
The single dispatch site for structural mutation: callers that only want
|
|
60
|
+
the report iterate ``detect`` directly and never reach here.
|
|
61
|
+
"""
|
|
62
|
+
for change in changes:
|
|
63
|
+
change.mutate()
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
"""``CodemodCommand`` base class and the detect-dispatch harness.
|
|
2
|
+
|
|
3
|
+
A structural codemod subclasses ``CodemodCommand`` and defines one or more
|
|
4
|
+
``detect_<TagPascalCase>`` methods. ``detect(module)`` walks the lxml tree in
|
|
5
|
+
document order; for each element it looks up ``detect_<TagPascalCase>`` and, if
|
|
6
|
+
present, yields the ``Change``s it returns — **without mutating the tree**. The
|
|
7
|
+
yielded change list *is* the lint report. ``apply(module)`` is derived: it
|
|
8
|
+
materialises ``detect(module)`` and runs each change's ``mutate`` thunk, so the
|
|
9
|
+
change a codemod reports is exactly the change it applies. Comment and
|
|
10
|
+
ProcessingInstruction nodes are skipped by ``Cursor.children()`` so detectors
|
|
11
|
+
only see real elements.
|
|
12
|
+
|
|
13
|
+
Validation-driven codemods (``FixTypos``, ``UpgradeToLatest`` and the per-step
|
|
14
|
+
upgrades) cannot pre-compute a static change list — they branch on
|
|
15
|
+
re-validation — so they override ``apply`` with bespoke logic and supply a
|
|
16
|
+
**coarse** ``detect`` (see ``codemods._coarse_detect``).
|
|
17
|
+
|
|
18
|
+
Dispatch is by **tag name** (``<param>`` → ``detect_Param``,
|
|
19
|
+
``<change_format>`` → ``detect_ChangeFormat``). The architecture targets
|
|
20
|
+
typed-model class names long-term — these coincide with PascalCase tags
|
|
21
|
+
for unambiguous elements like ``<param>`` and ``<tool>``, and diverge
|
|
22
|
+
only for elements with multiple per-context typed classes (``<when>``).
|
|
23
|
+
Per-context dispatch is deferred until a codemod needs it.
|
|
24
|
+
|
|
25
|
+
**Macro-mode handling is not yet implemented.** A future milestone will
|
|
26
|
+
add a per-codemod declaration of how macros should be treated (expand /
|
|
27
|
+
strip / skip / leave as-is) and a harness that honours it. Codemods
|
|
28
|
+
written today operate on the source tree as-parsed; do not assume any
|
|
29
|
+
macro-aware behaviour.
|
|
30
|
+
|
|
31
|
+
See ``docs/architecture.md`` § Cursor-walk constraint and
|
|
32
|
+
``PLAN.md`` § M3 for the design notes.
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
from __future__ import annotations
|
|
36
|
+
|
|
37
|
+
from functools import cache
|
|
38
|
+
from typing import TYPE_CHECKING, ClassVar
|
|
39
|
+
|
|
40
|
+
from galaxy_tool_codemod.change import Change, apply_changes
|
|
41
|
+
from galaxy_tool_codemod.cursor import Cursor
|
|
42
|
+
from galaxy_tool_codemod.eligibility import corpus_test_profile
|
|
43
|
+
|
|
44
|
+
if TYPE_CHECKING:
|
|
45
|
+
from collections.abc import Iterable, Iterator
|
|
46
|
+
|
|
47
|
+
from galaxy_tool_refactor_rules.meta import RuleMeta
|
|
48
|
+
from galaxy_tool_source.document import ToolDocument
|
|
49
|
+
|
|
50
|
+
from galaxy_tool_codemod.module import Module
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@cache
|
|
54
|
+
def _detect_method_name(tag: str) -> str:
|
|
55
|
+
"""Convert an XML tag to its detector method name.
|
|
56
|
+
|
|
57
|
+
``"param"`` → ``"detect_Param"``;
|
|
58
|
+
``"change_format"`` → ``"detect_ChangeFormat"``.
|
|
59
|
+
"""
|
|
60
|
+
parts = tag.split("_")
|
|
61
|
+
pascal = "".join(part[:1].upper() + part[1:] for part in parts)
|
|
62
|
+
return f"detect_{pascal}"
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class CodemodCommand:
|
|
66
|
+
"""Base class for structural-refactor codemods.
|
|
67
|
+
|
|
68
|
+
Every bundled codemod carries a ``meta: ClassVar[RuleMeta]`` GTR descriptor
|
|
69
|
+
(shared with the formatter tier via ``galaxy-tool-refactor-rules``) so the
|
|
70
|
+
two tiers expose one uniform rule registry. The enumerated set of coded
|
|
71
|
+
codemods is ``catalog.coded_codemods()``.
|
|
72
|
+
"""
|
|
73
|
+
|
|
74
|
+
meta: ClassVar[RuleMeta]
|
|
75
|
+
|
|
76
|
+
def detect(self, module: Module, /) -> Iterable[Change]:
|
|
77
|
+
"""Yield the ``Change``s this codemod would make, without mutating.
|
|
78
|
+
|
|
79
|
+
Walks ``module``'s lxml tree in document order, dispatching
|
|
80
|
+
``detect_<Tag>`` for each element and yielding the changes it returns.
|
|
81
|
+
The default walk drives the structural (cursor-walk) codemods;
|
|
82
|
+
validation-driven codemods override this with a coarse detector.
|
|
83
|
+
"""
|
|
84
|
+
yield from self._detect_dispatch(Cursor(module.document.root))
|
|
85
|
+
|
|
86
|
+
def _detect_dispatch(self, cursor: Cursor) -> Iterator[Change]:
|
|
87
|
+
method_name = _detect_method_name(cursor.tag)
|
|
88
|
+
detector = getattr(self, method_name, None)
|
|
89
|
+
if detector is not None:
|
|
90
|
+
yield from detector(cursor)
|
|
91
|
+
for child in cursor.children():
|
|
92
|
+
yield from self._detect_dispatch(child)
|
|
93
|
+
|
|
94
|
+
def apply(self, module: Module, /) -> None:
|
|
95
|
+
"""Apply this codemod by running every detected change's thunk.
|
|
96
|
+
|
|
97
|
+
Detection is materialised first (all reads complete before any
|
|
98
|
+
mutation), then ``apply_changes`` runs the thunks. Mutations apply
|
|
99
|
+
immediately to the underlying tree; atomicity (deep-copy snapshot) is
|
|
100
|
+
the responsibility of whatever harness invokes ``apply`` — for the
|
|
101
|
+
canonical-pipeline CLI that's the app tier; for sweep tooling that's
|
|
102
|
+
the relevant subcommand.
|
|
103
|
+
"""
|
|
104
|
+
apply_changes(list(self.detect(module)))
|
|
105
|
+
|
|
106
|
+
def upgrade_steps_applied(self) -> tuple[str, ...]:
|
|
107
|
+
"""From-versions whose upgrade the last ``apply`` advanced the tool past.
|
|
108
|
+
|
|
109
|
+
Empty for every codemod except an upgrade orchestrator like
|
|
110
|
+
``UpgradeToLatest``; the corpus sweep reads it to keep per-step upgrade
|
|
111
|
+
statistics (how many tools each ``upgrade_vN`` codemod advanced).
|
|
112
|
+
"""
|
|
113
|
+
return ()
|
|
114
|
+
|
|
115
|
+
@classmethod
|
|
116
|
+
def corpus_eligible(cls, document: ToolDocument, /) -> bool:
|
|
117
|
+
"""Whether a corpus sweep should run this codemod on *document*.
|
|
118
|
+
|
|
119
|
+
Default: eligible iff the codemod-sweep policy can pick a test profile
|
|
120
|
+
(i.e. the tool validates somewhere). A codemod that targets a different
|
|
121
|
+
population — e.g. ``FixTypos``, which repairs tools that validate
|
|
122
|
+
nowhere — overrides this. Evaluated on the pre-codemod document.
|
|
123
|
+
"""
|
|
124
|
+
return corpus_test_profile(document) is not None
|
|
125
|
+
|
|
126
|
+
@classmethod
|
|
127
|
+
def corpus_validation_profile(cls, document: ToolDocument, /) -> str | None:
|
|
128
|
+
"""The profile to validate the post-codemod document at.
|
|
129
|
+
|
|
130
|
+
Default mirrors the sweep policy. The sweep evaluates this *after*
|
|
131
|
+
``apply``; for the structural codemods that leave the validating-profile
|
|
132
|
+
set unchanged it equals the pre-codemod choice, so behaviour is the same
|
|
133
|
+
as validating at the policy profile. Codemods that change which profiles
|
|
134
|
+
validate (``FixTypos``) override this to report the post-repair profile.
|
|
135
|
+
"""
|
|
136
|
+
return corpus_test_profile(document)
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"""Codemod implementations bundled with the framework.
|
|
2
|
+
|
|
3
|
+
Each module here defines one codemod (verb-noun name) — see
|
|
4
|
+
``canonical.py`` for the set fmt's CLI runs to produce conformant output.
|
|
5
|
+
Underscore-prefixed modules (e.g. ``_attribute_ordering``) are shared
|
|
6
|
+
helpers, not codemods.
|
|
7
|
+
"""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"""Shared helper for attribute-reordering codemods.
|
|
2
|
+
|
|
3
|
+
Given an element's current attribute names and a priority map (attribute
|
|
4
|
+
name → integer; lower runs first), returns the canonical order:
|
|
5
|
+
priority-ascending, with unknown attributes sorting alphabetically after
|
|
6
|
+
the known ones. Originally lived in ``galaxy-tool-fmt`` as
|
|
7
|
+
``attribute_ordering``; moved here when the attribute-reorder rules
|
|
8
|
+
became structural codemods.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
from collections.abc import Iterable, Mapping
|
|
14
|
+
|
|
15
|
+
_UNKNOWN_PRIORITY = 100
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def canonical_order(
|
|
19
|
+
names: Iterable[str], priority: Mapping[str, int]
|
|
20
|
+
) -> tuple[str, ...]:
|
|
21
|
+
"""Return *names* sorted by *priority*; unknowns alphabetical at the end."""
|
|
22
|
+
return tuple(
|
|
23
|
+
sorted(names, key=lambda name: (priority.get(name, _UNKNOWN_PRIORITY), name))
|
|
24
|
+
)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
"""Shared CDATA-wrapping detect logic for ``WrapCommandCdata`` / ``WrapHelpCdata``.
|
|
2
|
+
|
|
3
|
+
Galaxy ``<command>`` and ``<help>`` bodies are best written inside a
|
|
4
|
+
``<![CDATA[…]]>`` section so shell operators (``&&``, ``<``, ``|``) and markup
|
|
5
|
+
stay literal — the IUC ``tool_xml`` best practices (#34 for ``<command>``, #42 for
|
|
6
|
+
``<help>``). When a body is *pure text* — non-whitespace, no child nodes, not
|
|
7
|
+
already CDATA-wrapped, and free of the ``]]>`` terminator that can't live inside a
|
|
8
|
+
single section — wrapping it is **behaviour-preserving**: lxml already exposes the
|
|
9
|
+
entity-unescaped text, so only the serialised bytes change (entities become literal
|
|
10
|
+
inside CDATA), not the value Galaxy ultimately runs or renders.
|
|
11
|
+
|
|
12
|
+
Mixed-content bodies (text interleaved with child elements or comments) and
|
|
13
|
+
already-wrapped bodies are left untouched; the advisory sub-rules GTR018.2 / GTR019.2
|
|
14
|
+
flag the rare residual these fix sub-rules deliberately skip.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from __future__ import annotations
|
|
18
|
+
|
|
19
|
+
from galaxy_tool_source.cdata import cdata_wrappable
|
|
20
|
+
|
|
21
|
+
from galaxy_tool_codemod.change import Change
|
|
22
|
+
from galaxy_tool_codemod.cursor import Cursor
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def cdata_wrap_change(cursor: Cursor, /, *, code: str, element: str) -> Change | None:
|
|
26
|
+
"""Return a Change wrapping *cursor*'s body in CDATA, or ``None`` if unwrappable.
|
|
27
|
+
|
|
28
|
+
Eligibility is the shared tier-1 ``cdata_wrappable`` predicate (so the advisory
|
|
29
|
+
GTR018.2 / GTR019.2 residual — ``needs_cdata and not cdata_wrappable`` — can never
|
|
30
|
+
drift from what this fix accepts). Unwrappable cases each left for the advisory
|
|
31
|
+
sub-rule: a whitespace-only body, a mixed-content body (any child node), an
|
|
32
|
+
already-wrapped body, or a body containing ``]]>`` (which cannot be expressed in
|
|
33
|
+
one CDATA section).
|
|
34
|
+
"""
|
|
35
|
+
if not cdata_wrappable(cursor.element):
|
|
36
|
+
return None
|
|
37
|
+
text = cursor.text
|
|
38
|
+
if text is None: # cdata_wrappable guarantees non-None; keeps mypy + LBYL happy
|
|
39
|
+
return None
|
|
40
|
+
return Change(
|
|
41
|
+
code=code,
|
|
42
|
+
sourceline=cursor.sourceline,
|
|
43
|
+
xpath=cursor.xpath,
|
|
44
|
+
message=f"<{element}> body is not wrapped in CDATA",
|
|
45
|
+
mutate=lambda: cursor.set_text(text, cdata=True),
|
|
46
|
+
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
"""Coarse detection for validation-driven codemods.
|
|
2
|
+
|
|
3
|
+
The structural reorderers compute a per-occurrence change list directly. The
|
|
4
|
+
validation-driven codemods (``FixTypos``, ``UpdateProfile``, ``UpgradeToLatest``
|
|
5
|
+
and the per-step upgrades) cannot: they branch on re-validation, so there is no
|
|
6
|
+
static change list to pre-compute. Their detect phase is therefore **coarse** —
|
|
7
|
+
it answers only "would applying this codemod change the tool?" by running the
|
|
8
|
+
codemod on a throwaway copy and comparing the serialised tree. When the answer
|
|
9
|
+
is yes it yields a single ``Change`` located at the root ``<tool>`` whose thunk
|
|
10
|
+
runs the real ``apply``; otherwise it yields nothing.
|
|
11
|
+
|
|
12
|
+
This keeps detect/apply parity (detect yields ⇔ apply mutates) for the sweep's
|
|
13
|
+
parity gate without pretending to a precision these codemods cannot offer; the
|
|
14
|
+
per-occurrence lint value concentrates in the structural and detect-only rules.
|
|
15
|
+
See ``docs/decisions.md`` § on the detect/fix split.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import copy
|
|
21
|
+
from collections.abc import Iterator
|
|
22
|
+
|
|
23
|
+
from galaxy_tool_source.document import ToolDocument
|
|
24
|
+
from lxml import etree
|
|
25
|
+
|
|
26
|
+
from galaxy_tool_codemod.change import Change
|
|
27
|
+
from galaxy_tool_codemod.codemod import CodemodCommand
|
|
28
|
+
from galaxy_tool_codemod.module import Module
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def coarse_detect(
|
|
32
|
+
codemod: CodemodCommand, module: Module, /, *, message: str
|
|
33
|
+
) -> Iterator[Change]:
|
|
34
|
+
"""Yield one root-level ``Change`` iff applying *codemod* would alter *module*.
|
|
35
|
+
|
|
36
|
+
Runs a fresh instance of *codemod* on a deep copy of *module* and compares
|
|
37
|
+
the serialised tree before and after. Both snapshots come from the copy, so
|
|
38
|
+
any representation shift introduced by ``deepcopy`` cancels out and only a
|
|
39
|
+
real mutation registers. The yielded change is located on the *original*
|
|
40
|
+
tree's root and its thunk applies *codemod* to the original module.
|
|
41
|
+
|
|
42
|
+
The copy keeps the original's ``source_path`` so the validation-driven
|
|
43
|
+
codemods resolve macro ``<import>``s the same way they do on the real
|
|
44
|
+
document — without it the copy would validate differently and detect would
|
|
45
|
+
drift from apply.
|
|
46
|
+
"""
|
|
47
|
+
work = Module(
|
|
48
|
+
ToolDocument(
|
|
49
|
+
copy.deepcopy(module.document.tree),
|
|
50
|
+
source_path=module.document.source_path,
|
|
51
|
+
)
|
|
52
|
+
)
|
|
53
|
+
before = etree.tostring(work.document.tree)
|
|
54
|
+
type(codemod)().apply(work)
|
|
55
|
+
after = etree.tostring(work.document.tree)
|
|
56
|
+
if after == before:
|
|
57
|
+
return
|
|
58
|
+
root = module.cursor
|
|
59
|
+
yield Change(
|
|
60
|
+
code=codemod.meta.code,
|
|
61
|
+
sourceline=root.sourceline,
|
|
62
|
+
xpath=root.xpath,
|
|
63
|
+
message=message,
|
|
64
|
+
mutate=lambda: codemod.apply(module),
|
|
65
|
+
)
|