galaxy-tool-codemod 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. galaxy_tool_codemod/__init__.py +12 -0
  2. galaxy_tool_codemod/_version.py +21 -0
  3. galaxy_tool_codemod/canonical.py +109 -0
  4. galaxy_tool_codemod/catalog.py +90 -0
  5. galaxy_tool_codemod/certify.py +47 -0
  6. galaxy_tool_codemod/change.py +63 -0
  7. galaxy_tool_codemod/codemod.py +136 -0
  8. galaxy_tool_codemod/codemods/__init__.py +7 -0
  9. galaxy_tool_codemod/codemods/_attribute_ordering.py +24 -0
  10. galaxy_tool_codemod/codemods/_cdata.py +46 -0
  11. galaxy_tool_codemod/codemods/_coarse_detect.py +65 -0
  12. galaxy_tool_codemod/codemods/_interpreter.py +125 -0
  13. galaxy_tool_codemod/codemods/_runtime_gated.py +21 -0
  14. galaxy_tool_codemod/codemods/_validation_repair.py +32 -0
  15. galaxy_tool_codemod/codemods/convert_help_markdown.py +117 -0
  16. galaxy_tool_codemod/codemods/drop_redundant_param_name.py +88 -0
  17. galaxy_tool_codemod/codemods/fix_from_work_dir_whitespace.py +57 -0
  18. galaxy_tool_codemod/codemods/fix_interpreter.py +126 -0
  19. galaxy_tool_codemod/codemods/fix_output_format_input.py +150 -0
  20. galaxy_tool_codemod/codemods/fix_typos.py +184 -0
  21. galaxy_tool_codemod/codemods/normalize_boolean_values.py +138 -0
  22. galaxy_tool_codemod/codemods/reorder_param_attributes.py +61 -0
  23. galaxy_tool_codemod/codemods/reorder_tool_attributes.py +52 -0
  24. galaxy_tool_codemod/codemods/reorder_tool_children.py +68 -0
  25. galaxy_tool_codemod/codemods/repair_help_rst.py +71 -0
  26. galaxy_tool_codemod/codemods/replace_output_element.py +113 -0
  27. galaxy_tool_codemod/codemods/single_quote_command_vars.py +139 -0
  28. galaxy_tool_codemod/codemods/tokenize_version.py +93 -0
  29. galaxy_tool_codemod/codemods/trim_attribute_whitespace.py +84 -0
  30. galaxy_tool_codemod/codemods/update_profile.py +110 -0
  31. galaxy_tool_codemod/codemods/upgrade_19_01.py +96 -0
  32. galaxy_tool_codemod/codemods/upgrade_21_09.py +245 -0
  33. galaxy_tool_codemod/codemods/upgrade_24_0.py +98 -0
  34. galaxy_tool_codemod/codemods/upgrade_24_1.py +74 -0
  35. galaxy_tool_codemod/codemods/upgrade_25_1.py +48 -0
  36. galaxy_tool_codemod/codemods/wrap_command_cdata.py +42 -0
  37. galaxy_tool_codemod/codemods/wrap_help_cdata.py +42 -0
  38. galaxy_tool_codemod/cursor.py +281 -0
  39. galaxy_tool_codemod/datatype_format.py +63 -0
  40. galaxy_tool_codemod/eligibility.py +81 -0
  41. galaxy_tool_codemod/module.py +65 -0
  42. galaxy_tool_codemod/parse.py +47 -0
  43. galaxy_tool_codemod/profile_semantics.py +620 -0
  44. galaxy_tool_codemod/py.typed +0 -0
  45. galaxy_tool_codemod/runtime_fixes.py +77 -0
  46. galaxy_tool_codemod/upgrades.py +137 -0
  47. galaxy_tool_codemod-0.2.0.dist-info/METADATA +134 -0
  48. galaxy_tool_codemod-0.2.0.dist-info/RECORD +50 -0
  49. galaxy_tool_codemod-0.2.0.dist-info/WHEEL +4 -0
  50. galaxy_tool_codemod-0.2.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,12 @@
1
+ """LibCST-shaped framework for structural refactors of Galaxy tool XML.
2
+
3
+ Tier 2 of the Galaxy tool refactoring architecture (see ``README.md``).
4
+ Per dignified-python this package does not re-export from its
5
+ submodules — import the symbols you need directly:
6
+
7
+ from galaxy_tool_codemod.parse import parse_module
8
+ from galaxy_tool_codemod.canonical import canonical_codemods
9
+ from galaxy_tool_codemod.module import Module
10
+ from galaxy_tool_codemod.cursor import Cursor
11
+ from galaxy_tool_codemod.codemod import CodemodCommand
12
+ """
@@ -0,0 +1,21 @@
1
+ """Shared version-parse helper for the codemod tier.
2
+
3
+ ``packaging`` exposes no validity predicate, so a ``try``/``except`` over
4
+ ``Version`` is the sanctioned third-party boundary. Both the profile-semantics
5
+ catalogue (``profile_semantics``) and the runtime-gated-fix crossing gate
6
+ (``runtime_fixes``) need to place a possibly-unparseable profile string, so the
7
+ helper lives here once rather than being mirrored in each (architecture audit
8
+ 2026-06-03, ``../../docs/architecture_audit.md``).
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from packaging.version import InvalidVersion, Version
14
+
15
+
16
+ def version_or_none(value: str, /) -> Version | None:
17
+ """Parse *value* as a ``Version``, or ``None`` if it is not one."""
18
+ try:
19
+ return Version(value)
20
+ except InvalidVersion:
21
+ return None
@@ -0,0 +1,109 @@
1
+ """The bundled codemod pipelines — ordered contracts consumed by the app tier.
2
+
3
+ Tier 2 (this package) and Tier 3 (``galaxy-tool-fmt``) are independent
4
+ siblings of Tier 1 (``galaxy-tool-source``); neither runs the user-facing
5
+ workflow. The orchestration — read a file, apply a pipeline, write the result
6
+ via fmt's serializer — lives in the top-level app tier
7
+ (``galaxy-tool-refactor-cli``). This module only declares *which* codemods run
8
+ and *in what order*; the app consumes these tuples.
9
+
10
+ Two pipelines, separated because profile upgrade is semantic and opt-in while
11
+ canonicalisation is safe and idempotent:
12
+
13
+ ``canonical_codemods()`` — the structural canonical pipeline (the app's ``format``
14
+ command, run before fmt's cosmetic rules), derived from the codemods that declare
15
+ the ``"default"`` ruleset, ordered by ``meta.order``. Front-to-back:
16
+
17
+ 1. ``FixTypos`` — repair near-miss spelling typos. A no-op unless the tool
18
+ validates at no profile, so it only acts on broken tools; running it first
19
+ lets the rest of the pipeline see a validatable tree.
20
+ 2. ``NormalizeBooleanValues`` — canonicalize Python-style boolean attribute
21
+ values (``True``/``Yes``/…) to ``xs:boolean`` (``true``/``false``) on
22
+ schema-boolean attributes. Like ``FixTypos`` a no-op unless the tool validates
23
+ nowhere; behaviour-preserving and the sibling repair ``FixTypos`` cannot reach
24
+ (the lenient model accepts ``True``).
25
+ 3. ``RepairHelpRst`` — repair the deterministically-fixable invalid ``<help>``
26
+ reStructuredText (GTR089.1, the fixable half of the GTR089 partition) behind
27
+ tier 1's behaviour-preserving gate. A no-op on valid or macro-bearing help;
28
+ what it can't reach stays the ``GTR089.2`` advisory residual. See
29
+ ``docs/decisions.md`` §37.
30
+ 4. ``TrimAttributeWhitespace`` / ``ReplaceOutputElement`` /
31
+ ``DropRedundantParamName`` — the planemo-parity fixes (GTR035–GTR037):
32
+ value-level repairs that settle attribute *content* before the reorders tidy
33
+ attribute *order*.
34
+ 5. ``ReorderParamAttributes`` / ``ReorderToolAttributes`` — tidy attribute order
35
+ once the tree is settled.
36
+ 6. ``ReorderToolChildren`` — reorder the root ``<tool>``'s child elements to the
37
+ IUC convention (element-level tidying after attribute-level). Validity-safe:
38
+ the schema's ``<tool>`` content model is order-free (``xs:all``).
39
+ 7. ``WrapCommandCdata`` / ``WrapHelpCdata`` — wrap a pure-text ``<command>`` /
40
+ ``<help>`` body in ``<![CDATA[…]]>`` (IUC #34/#42). Behaviour-preserving — lxml
41
+ exposes the entity-unescaped text, so only the serialised bytes change, not the
42
+ value Galaxy runs/renders. Content-level tidying, so it runs after the
43
+ structural reorders; independent of them (it never touches child order). See
44
+ ``docs/decisions.md`` §29.
45
+ 8. ``SingleQuoteCommandVars`` — single-quote the *provably*-single-valued unquoted
46
+ Cheetah ``$var``\\ s in ``<command>`` (GTR020.1, the fixable half of the GTR020
47
+ partition).
48
+ Acts only on references whose value can never contain whitespace for a working
49
+ tool (bare single-token params, ``$__…__`` path built-ins, space-free attrs),
50
+ so it is behaviour-preserving like the CDATA wraps. It runs **after**
51
+ ``WrapCommandCdata`` so it sees the body already in its canonical CDATA form and
52
+ preserves it. Unlike the rest of this pipeline it changes the default ``format``
53
+ output for tools that were never previously rewritten — a deliberate, data-backed
54
+ reversal of the GTR020.2-stays-advisory stance (``docs/decisions.md`` §30). The
55
+ advisory ``GTR020.2`` check still reports the non-provable residual this skips.
56
+
57
+ It deliberately does **not** change ``profile=`` or apply version migrations —
58
+ that is the upgrade pipeline's job.
59
+
60
+ ``AUTO_UPGRADE_CODEMODS`` — the opt-in profile-upgrade pipeline (the app's
61
+ ``upgrade`` command). Front-to-back:
62
+
63
+ 1. ``FixTypos`` / ``NormalizeBooleanValues`` — repair first, so a broken-and-
64
+ outdated tool becomes validatable and therefore upgradable in one pass.
65
+ 2. ``UpgradeToLatest`` — iteratively upgrade the (now possibly repaired) tool
66
+ toward the latest profile, re-declaring its profile between steps. This
67
+ subsumes ``UpdateProfile`` (it runs it internally each round).
68
+
69
+ ``FixTypos`` / ``NormalizeBooleanValues`` intentionally appear in both pipelines;
70
+ both are idempotent, so running them in whichever pipeline the user invokes is
71
+ harmless.
72
+ """
73
+
74
+ from __future__ import annotations
75
+
76
+ from functools import cache
77
+
78
+ from galaxy_tool_codemod.catalog import coded_codemods
79
+ from galaxy_tool_codemod.codemod import CodemodCommand
80
+ from galaxy_tool_codemod.codemods.fix_typos import FixTypos
81
+ from galaxy_tool_codemod.codemods.normalize_boolean_values import (
82
+ NormalizeBooleanValues,
83
+ )
84
+ from galaxy_tool_codemod.upgrades import UpgradeToLatest
85
+
86
+
87
+ @cache
88
+ def canonical_codemods() -> tuple[type[CodemodCommand], ...]:
89
+ """The structural canonical/``format`` pipeline — **derived, not hardcoded**.
90
+
91
+ Every codemod that declares the ``"default"`` ruleset, ordered by ``meta.order``.
92
+ Membership and application order now live on each codemod's ``RuleMeta``
93
+ (``rulesets`` / ``order``), so this is computed from the rules rather than being
94
+ a second hand-maintained source of truth. The front-to-back order it yields is
95
+ the one documented above (``FixTypos`` → … → ``SingleQuoteCommandVars``).
96
+ """
97
+ return tuple(
98
+ sorted(
99
+ (cls for cls in coded_codemods() if "default" in cls.meta.rulesets),
100
+ key=lambda cls: cls.meta.order,
101
+ )
102
+ )
103
+
104
+
105
+ AUTO_UPGRADE_CODEMODS: tuple[type[CodemodCommand], ...] = (
106
+ FixTypos,
107
+ NormalizeBooleanValues,
108
+ UpgradeToLatest,
109
+ )
@@ -0,0 +1,90 @@
1
+ """The full set of GTR-coded codemods, for documentation and registry use.
2
+
3
+ Distinct from ``canonical_codemods()`` (``canonical.py``): that tuple is the
4
+ *ordered pipeline* fmt's CLI runs, and it omits the single-step ``upgrade_vN``
5
+ codemods because ``UpgradeToLatest`` drives them internally. This catalog lists
6
+ *every* codemod that carries a ``RuleMeta`` GTR code, so a cross-tier rule
7
+ registry (such as the corpus-format stat page) can enumerate them alongside the
8
+ formatter tier's ``all_rules()``.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from galaxy_tool_codemod.codemod import CodemodCommand
14
+ from galaxy_tool_codemod.codemods.convert_help_markdown import (
15
+ ConvertHelpToMarkdown,
16
+ )
17
+ from galaxy_tool_codemod.codemods.drop_redundant_param_name import (
18
+ DropRedundantParamName,
19
+ )
20
+ from galaxy_tool_codemod.codemods.fix_from_work_dir_whitespace import (
21
+ FixFromWorkDirWhitespace,
22
+ )
23
+ from galaxy_tool_codemod.codemods.fix_interpreter import FixInterpreter
24
+ from galaxy_tool_codemod.codemods.fix_output_format_input import (
25
+ FixOutputFormatInput,
26
+ )
27
+ from galaxy_tool_codemod.codemods.fix_typos import FixTypos
28
+ from galaxy_tool_codemod.codemods.normalize_boolean_values import (
29
+ NormalizeBooleanValues,
30
+ )
31
+ from galaxy_tool_codemod.codemods.reorder_param_attributes import (
32
+ ReorderParamAttributes,
33
+ )
34
+ from galaxy_tool_codemod.codemods.reorder_tool_attributes import (
35
+ ReorderToolAttributes,
36
+ )
37
+ from galaxy_tool_codemod.codemods.reorder_tool_children import (
38
+ ReorderToolChildren,
39
+ )
40
+ from galaxy_tool_codemod.codemods.repair_help_rst import RepairHelpRst
41
+ from galaxy_tool_codemod.codemods.replace_output_element import (
42
+ ReplaceOutputElement,
43
+ )
44
+ from galaxy_tool_codemod.codemods.single_quote_command_vars import (
45
+ SingleQuoteCommandVars,
46
+ )
47
+ from galaxy_tool_codemod.codemods.tokenize_version import TokenizeVersion
48
+ from galaxy_tool_codemod.codemods.trim_attribute_whitespace import (
49
+ TrimAttributeWhitespace,
50
+ )
51
+ from galaxy_tool_codemod.codemods.update_profile import UpdateProfile
52
+ from galaxy_tool_codemod.codemods.upgrade_19_01 import Upgrade19_01
53
+ from galaxy_tool_codemod.codemods.upgrade_21_09 import Upgrade21_09
54
+ from galaxy_tool_codemod.codemods.upgrade_24_0 import Upgrade24_0
55
+ from galaxy_tool_codemod.codemods.upgrade_24_1 import Upgrade24_1
56
+ from galaxy_tool_codemod.codemods.upgrade_25_1 import Upgrade25_1
57
+ from galaxy_tool_codemod.codemods.wrap_command_cdata import WrapCommandCdata
58
+ from galaxy_tool_codemod.codemods.wrap_help_cdata import WrapHelpCdata
59
+ from galaxy_tool_codemod.upgrades import UpgradeToLatest
60
+
61
+
62
+ def coded_codemods() -> tuple[type[CodemodCommand], ...]:
63
+ """Return every GTR-coded codemod class, sorted by ``meta.code``."""
64
+ classes: list[type[CodemodCommand]] = [
65
+ FixTypos,
66
+ ReorderParamAttributes,
67
+ ReorderToolAttributes,
68
+ ReorderToolChildren,
69
+ UpdateProfile,
70
+ Upgrade19_01,
71
+ Upgrade21_09,
72
+ Upgrade24_0,
73
+ Upgrade24_1,
74
+ Upgrade25_1,
75
+ UpgradeToLatest,
76
+ FixFromWorkDirWhitespace,
77
+ FixOutputFormatInput,
78
+ FixInterpreter,
79
+ NormalizeBooleanValues,
80
+ RepairHelpRst,
81
+ WrapCommandCdata,
82
+ WrapHelpCdata,
83
+ SingleQuoteCommandVars,
84
+ TrimAttributeWhitespace,
85
+ ReplaceOutputElement,
86
+ DropRedundantParamName,
87
+ ConvertHelpToMarkdown,
88
+ TokenizeVersion,
89
+ ]
90
+ return tuple(sorted(classes, key=lambda cls: cls.meta.code))
@@ -0,0 +1,47 @@
1
+ """The ``EditCertifier`` seam: a pluggable per-edit behaviour-preservation oracle.
2
+
3
+ GTR020.1 (``SingleQuoteCommandVars``) decides, per occurrence, whether single-quoting
4
+ a Cheetah ``$var`` in ``<command>`` preserves behaviour. By **default** it uses the
5
+ tier-1 static policy ``galaxy_tool_source.shell_oracle.quote_is_behavior_preserving``
6
+ — the
7
+ bashlex shell-context classifier composed with the value-domain rule, degrading to the
8
+ pure value-domain ``provably_quotable`` when the optional
9
+ ``galaxy-tool-source[shell-oracle]`` extra is absent.
10
+
11
+ This Protocol reserves the seam (shipped consulting ``None`` = the static policy) for
12
+ the Phase-2 CT3 *render* certifier (``--certify=render``): an ``EditCertifier`` injected
13
+ into the codemod overrides the default and may only *narrow* the candidate set. The
14
+ codemod calls ``should_quote`` with the same arguments as the static policy so the two
15
+ are interchangeable. See
16
+ ``../../docs/upgrade_research/cheetah_bashlex_boundary_oracle.md`` §4.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from typing import TYPE_CHECKING, Protocol, runtime_checkable
22
+
23
+ if TYPE_CHECKING:
24
+ from galaxy_tool_source.command_text import UnquotedVar
25
+
26
+
27
+ @runtime_checkable
28
+ class EditCertifier(Protocol):
29
+ """Certifies whether a single-quote edit on one ``<command>`` occurrence is safe."""
30
+
31
+ def should_quote(
32
+ self,
33
+ body: str,
34
+ /,
35
+ *,
36
+ occurrence: UnquotedVar,
37
+ kinds: dict[str, str],
38
+ structural: set[str],
39
+ ) -> bool:
40
+ """Whether single-quoting *occurrence* in ``<command>`` *body* keeps behaviour.
41
+
42
+ Signature-compatible with
43
+ ``galaxy_tool_source.shell_oracle.quote_is_behavior_preserving`` so a
44
+ certifier and
45
+ the default static policy are drop-in interchangeable.
46
+ """
47
+ ...
@@ -0,0 +1,63 @@
1
+ """``Change`` — one detected structural mutation, applied via a thunk.
2
+
3
+ A codemod's **detect** phase yields ``Change``s without touching the tree: each
4
+ carries the diagnostic data (``code``, ``sourceline``, ``xpath``, ``message`` —
5
+ the same fields as a tier-0.5 ``Violation``) plus a zero-argument ``mutate``
6
+ thunk that performs the mutation through the existing ``Cursor`` primitives. The
7
+ detect list *is* the report; running ``apply_changes`` over it is the fix. One
8
+ mutation site (the thunk body), one source of truth — the change a codemod
9
+ reports is exactly the change it applies, with no risk of the two drifting.
10
+
11
+ See ``galaxy-tool-fmt``'s ``edits.py`` for the cosmetic-tier analogue; the
12
+ difference is that an ``Edit`` is a pure-data union dispatched by ``match/case``
13
+ whereas a ``Change`` carries its mutation as a closure over a ``Cursor`` call
14
+ (``docs/decisions.md`` § on the detect/fix split records why the structural tier
15
+ reuses the cursor rather than re-enumerating every mutation kind).
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ from collections.abc import Callable, Iterable
21
+ from dataclasses import dataclass, field
22
+
23
+ from galaxy_tool_refactor_rules.violation import Violation
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class Change:
28
+ """One structural mutation a codemod detected, with the thunk that applies it.
29
+
30
+ Attributes:
31
+ code: The codemod's ``RuleMeta.code`` (e.g. ``"GTR002"``).
32
+ sourceline: 1-based source line of the affected element, or ``0``.
33
+ xpath: Absolute xpath of the affected element.
34
+ message: One-line human-readable description of the change.
35
+ mutate: Zero-argument thunk that performs the mutation when called.
36
+ Excluded from equality and ``repr`` — two changes are equal when
37
+ their diagnostic data matches, independent of closure identity.
38
+ """
39
+
40
+ code: str
41
+ sourceline: int
42
+ xpath: str
43
+ message: str
44
+ mutate: Callable[[], None] = field(compare=False, repr=False)
45
+
46
+ def to_violation(self) -> Violation:
47
+ """Project the change's diagnostic data onto a tier-0.5 ``Violation``."""
48
+ return Violation(
49
+ code=self.code,
50
+ sourceline=self.sourceline,
51
+ xpath=self.xpath,
52
+ message=self.message,
53
+ )
54
+
55
+
56
+ def apply_changes(changes: Iterable[Change], /) -> None:
57
+ """Apply every change by invoking its ``mutate`` thunk, in iteration order.
58
+
59
+ The single dispatch site for structural mutation: callers that only want
60
+ the report iterate ``detect`` directly and never reach here.
61
+ """
62
+ for change in changes:
63
+ change.mutate()
@@ -0,0 +1,136 @@
1
+ """``CodemodCommand`` base class and the detect-dispatch harness.
2
+
3
+ A structural codemod subclasses ``CodemodCommand`` and defines one or more
4
+ ``detect_<TagPascalCase>`` methods. ``detect(module)`` walks the lxml tree in
5
+ document order; for each element it looks up ``detect_<TagPascalCase>`` and, if
6
+ present, yields the ``Change``s it returns — **without mutating the tree**. The
7
+ yielded change list *is* the lint report. ``apply(module)`` is derived: it
8
+ materialises ``detect(module)`` and runs each change's ``mutate`` thunk, so the
9
+ change a codemod reports is exactly the change it applies. Comment and
10
+ ProcessingInstruction nodes are skipped by ``Cursor.children()`` so detectors
11
+ only see real elements.
12
+
13
+ Validation-driven codemods (``FixTypos``, ``UpgradeToLatest`` and the per-step
14
+ upgrades) cannot pre-compute a static change list — they branch on
15
+ re-validation — so they override ``apply`` with bespoke logic and supply a
16
+ **coarse** ``detect`` (see ``codemods._coarse_detect``).
17
+
18
+ Dispatch is by **tag name** (``<param>`` → ``detect_Param``,
19
+ ``<change_format>`` → ``detect_ChangeFormat``). The architecture targets
20
+ typed-model class names long-term — these coincide with PascalCase tags
21
+ for unambiguous elements like ``<param>`` and ``<tool>``, and diverge
22
+ only for elements with multiple per-context typed classes (``<when>``).
23
+ Per-context dispatch is deferred until a codemod needs it.
24
+
25
+ **Macro-mode handling is not yet implemented.** A future milestone will
26
+ add a per-codemod declaration of how macros should be treated (expand /
27
+ strip / skip / leave as-is) and a harness that honours it. Codemods
28
+ written today operate on the source tree as-parsed; do not assume any
29
+ macro-aware behaviour.
30
+
31
+ See ``docs/architecture.md`` § Cursor-walk constraint and
32
+ ``PLAN.md`` § M3 for the design notes.
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ from functools import cache
38
+ from typing import TYPE_CHECKING, ClassVar
39
+
40
+ from galaxy_tool_codemod.change import Change, apply_changes
41
+ from galaxy_tool_codemod.cursor import Cursor
42
+ from galaxy_tool_codemod.eligibility import corpus_test_profile
43
+
44
+ if TYPE_CHECKING:
45
+ from collections.abc import Iterable, Iterator
46
+
47
+ from galaxy_tool_refactor_rules.meta import RuleMeta
48
+ from galaxy_tool_source.document import ToolDocument
49
+
50
+ from galaxy_tool_codemod.module import Module
51
+
52
+
53
+ @cache
54
+ def _detect_method_name(tag: str) -> str:
55
+ """Convert an XML tag to its detector method name.
56
+
57
+ ``"param"`` → ``"detect_Param"``;
58
+ ``"change_format"`` → ``"detect_ChangeFormat"``.
59
+ """
60
+ parts = tag.split("_")
61
+ pascal = "".join(part[:1].upper() + part[1:] for part in parts)
62
+ return f"detect_{pascal}"
63
+
64
+
65
+ class CodemodCommand:
66
+ """Base class for structural-refactor codemods.
67
+
68
+ Every bundled codemod carries a ``meta: ClassVar[RuleMeta]`` GTR descriptor
69
+ (shared with the formatter tier via ``galaxy-tool-refactor-rules``) so the
70
+ two tiers expose one uniform rule registry. The enumerated set of coded
71
+ codemods is ``catalog.coded_codemods()``.
72
+ """
73
+
74
+ meta: ClassVar[RuleMeta]
75
+
76
+ def detect(self, module: Module, /) -> Iterable[Change]:
77
+ """Yield the ``Change``s this codemod would make, without mutating.
78
+
79
+ Walks ``module``'s lxml tree in document order, dispatching
80
+ ``detect_<Tag>`` for each element and yielding the changes it returns.
81
+ The default walk drives the structural (cursor-walk) codemods;
82
+ validation-driven codemods override this with a coarse detector.
83
+ """
84
+ yield from self._detect_dispatch(Cursor(module.document.root))
85
+
86
+ def _detect_dispatch(self, cursor: Cursor) -> Iterator[Change]:
87
+ method_name = _detect_method_name(cursor.tag)
88
+ detector = getattr(self, method_name, None)
89
+ if detector is not None:
90
+ yield from detector(cursor)
91
+ for child in cursor.children():
92
+ yield from self._detect_dispatch(child)
93
+
94
+ def apply(self, module: Module, /) -> None:
95
+ """Apply this codemod by running every detected change's thunk.
96
+
97
+ Detection is materialised first (all reads complete before any
98
+ mutation), then ``apply_changes`` runs the thunks. Mutations apply
99
+ immediately to the underlying tree; atomicity (deep-copy snapshot) is
100
+ the responsibility of whatever harness invokes ``apply`` — for the
101
+ canonical-pipeline CLI that's the app tier; for sweep tooling that's
102
+ the relevant subcommand.
103
+ """
104
+ apply_changes(list(self.detect(module)))
105
+
106
+ def upgrade_steps_applied(self) -> tuple[str, ...]:
107
+ """From-versions whose upgrade the last ``apply`` advanced the tool past.
108
+
109
+ Empty for every codemod except an upgrade orchestrator like
110
+ ``UpgradeToLatest``; the corpus sweep reads it to keep per-step upgrade
111
+ statistics (how many tools each ``upgrade_vN`` codemod advanced).
112
+ """
113
+ return ()
114
+
115
+ @classmethod
116
+ def corpus_eligible(cls, document: ToolDocument, /) -> bool:
117
+ """Whether a corpus sweep should run this codemod on *document*.
118
+
119
+ Default: eligible iff the codemod-sweep policy can pick a test profile
120
+ (i.e. the tool validates somewhere). A codemod that targets a different
121
+ population — e.g. ``FixTypos``, which repairs tools that validate
122
+ nowhere — overrides this. Evaluated on the pre-codemod document.
123
+ """
124
+ return corpus_test_profile(document) is not None
125
+
126
+ @classmethod
127
+ def corpus_validation_profile(cls, document: ToolDocument, /) -> str | None:
128
+ """The profile to validate the post-codemod document at.
129
+
130
+ Default mirrors the sweep policy. The sweep evaluates this *after*
131
+ ``apply``; for the structural codemods that leave the validating-profile
132
+ set unchanged it equals the pre-codemod choice, so behaviour is the same
133
+ as validating at the policy profile. Codemods that change which profiles
134
+ validate (``FixTypos``) override this to report the post-repair profile.
135
+ """
136
+ return corpus_test_profile(document)
@@ -0,0 +1,7 @@
1
+ """Codemod implementations bundled with the framework.
2
+
3
+ Each module here defines one codemod (verb-noun name) — see
4
+ ``canonical.py`` for the set fmt's CLI runs to produce conformant output.
5
+ Underscore-prefixed modules (e.g. ``_attribute_ordering``) are shared
6
+ helpers, not codemods.
7
+ """
@@ -0,0 +1,24 @@
1
+ """Shared helper for attribute-reordering codemods.
2
+
3
+ Given an element's current attribute names and a priority map (attribute
4
+ name → integer; lower runs first), returns the canonical order:
5
+ priority-ascending, with unknown attributes sorting alphabetically after
6
+ the known ones. Originally lived in ``galaxy-tool-fmt`` as
7
+ ``attribute_ordering``; moved here when the attribute-reorder rules
8
+ became structural codemods.
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from collections.abc import Iterable, Mapping
14
+
15
+ _UNKNOWN_PRIORITY = 100
16
+
17
+
18
+ def canonical_order(
19
+ names: Iterable[str], priority: Mapping[str, int]
20
+ ) -> tuple[str, ...]:
21
+ """Return *names* sorted by *priority*; unknowns alphabetical at the end."""
22
+ return tuple(
23
+ sorted(names, key=lambda name: (priority.get(name, _UNKNOWN_PRIORITY), name))
24
+ )
@@ -0,0 +1,46 @@
1
+ """Shared CDATA-wrapping detect logic for ``WrapCommandCdata`` / ``WrapHelpCdata``.
2
+
3
+ Galaxy ``<command>`` and ``<help>`` bodies are best written inside a
4
+ ``<![CDATA[…]]>`` section so shell operators (``&&``, ``<``, ``|``) and markup
5
+ stay literal — the IUC ``tool_xml`` best practices (#34 for ``<command>``, #42 for
6
+ ``<help>``). When a body is *pure text* — non-whitespace, no child nodes, not
7
+ already CDATA-wrapped, and free of the ``]]>`` terminator that can't live inside a
8
+ single section — wrapping it is **behaviour-preserving**: lxml already exposes the
9
+ entity-unescaped text, so only the serialised bytes change (entities become literal
10
+ inside CDATA), not the value Galaxy ultimately runs or renders.
11
+
12
+ Mixed-content bodies (text interleaved with child elements or comments) and
13
+ already-wrapped bodies are left untouched; the advisory sub-rules GTR018.2 / GTR019.2
14
+ flag the rare residual these fix sub-rules deliberately skip.
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from galaxy_tool_source.cdata import cdata_wrappable
20
+
21
+ from galaxy_tool_codemod.change import Change
22
+ from galaxy_tool_codemod.cursor import Cursor
23
+
24
+
25
+ def cdata_wrap_change(cursor: Cursor, /, *, code: str, element: str) -> Change | None:
26
+ """Return a Change wrapping *cursor*'s body in CDATA, or ``None`` if unwrappable.
27
+
28
+ Eligibility is the shared tier-1 ``cdata_wrappable`` predicate (so the advisory
29
+ GTR018.2 / GTR019.2 residual — ``needs_cdata and not cdata_wrappable`` — can never
30
+ drift from what this fix accepts). Unwrappable cases each left for the advisory
31
+ sub-rule: a whitespace-only body, a mixed-content body (any child node), an
32
+ already-wrapped body, or a body containing ``]]>`` (which cannot be expressed in
33
+ one CDATA section).
34
+ """
35
+ if not cdata_wrappable(cursor.element):
36
+ return None
37
+ text = cursor.text
38
+ if text is None: # cdata_wrappable guarantees non-None; keeps mypy + LBYL happy
39
+ return None
40
+ return Change(
41
+ code=code,
42
+ sourceline=cursor.sourceline,
43
+ xpath=cursor.xpath,
44
+ message=f"<{element}> body is not wrapped in CDATA",
45
+ mutate=lambda: cursor.set_text(text, cdata=True),
46
+ )
@@ -0,0 +1,65 @@
1
+ """Coarse detection for validation-driven codemods.
2
+
3
+ The structural reorderers compute a per-occurrence change list directly. The
4
+ validation-driven codemods (``FixTypos``, ``UpdateProfile``, ``UpgradeToLatest``
5
+ and the per-step upgrades) cannot: they branch on re-validation, so there is no
6
+ static change list to pre-compute. Their detect phase is therefore **coarse** —
7
+ it answers only "would applying this codemod change the tool?" by running the
8
+ codemod on a throwaway copy and comparing the serialised tree. When the answer
9
+ is yes it yields a single ``Change`` located at the root ``<tool>`` whose thunk
10
+ runs the real ``apply``; otherwise it yields nothing.
11
+
12
+ This keeps detect/apply parity (detect yields ⇔ apply mutates) for the sweep's
13
+ parity gate without pretending to a precision these codemods cannot offer; the
14
+ per-occurrence lint value concentrates in the structural and detect-only rules.
15
+ See ``docs/decisions.md`` § on the detect/fix split.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import copy
21
+ from collections.abc import Iterator
22
+
23
+ from galaxy_tool_source.document import ToolDocument
24
+ from lxml import etree
25
+
26
+ from galaxy_tool_codemod.change import Change
27
+ from galaxy_tool_codemod.codemod import CodemodCommand
28
+ from galaxy_tool_codemod.module import Module
29
+
30
+
31
+ def coarse_detect(
32
+ codemod: CodemodCommand, module: Module, /, *, message: str
33
+ ) -> Iterator[Change]:
34
+ """Yield one root-level ``Change`` iff applying *codemod* would alter *module*.
35
+
36
+ Runs a fresh instance of *codemod* on a deep copy of *module* and compares
37
+ the serialised tree before and after. Both snapshots come from the copy, so
38
+ any representation shift introduced by ``deepcopy`` cancels out and only a
39
+ real mutation registers. The yielded change is located on the *original*
40
+ tree's root and its thunk applies *codemod* to the original module.
41
+
42
+ The copy keeps the original's ``source_path`` so the validation-driven
43
+ codemods resolve macro ``<import>``s the same way they do on the real
44
+ document — without it the copy would validate differently and detect would
45
+ drift from apply.
46
+ """
47
+ work = Module(
48
+ ToolDocument(
49
+ copy.deepcopy(module.document.tree),
50
+ source_path=module.document.source_path,
51
+ )
52
+ )
53
+ before = etree.tostring(work.document.tree)
54
+ type(codemod)().apply(work)
55
+ after = etree.tostring(work.document.tree)
56
+ if after == before:
57
+ return
58
+ root = module.cursor
59
+ yield Change(
60
+ code=codemod.meta.code,
61
+ sourceline=root.sourceline,
62
+ xpath=root.xpath,
63
+ message=message,
64
+ mutate=lambda: codemod.apply(module),
65
+ )