galaxy-tool-source 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of galaxy-tool-source might be problematic. Click here for more details.

Files changed (115) hide show
  1. galaxy_tool_source/__init__.py +6 -0
  2. galaxy_tool_source/_codegen.py +159 -0
  3. galaxy_tool_source/binding.py +338 -0
  4. galaxy_tool_source/boolean_values.py +201 -0
  5. galaxy_tool_source/bundle.py +186 -0
  6. galaxy_tool_source/cdata.py +70 -0
  7. galaxy_tool_source/cheetah_cdm.py +165 -0
  8. galaxy_tool_source/cheetah_refs.py +290 -0
  9. galaxy_tool_source/cheetah_rename.py +801 -0
  10. galaxy_tool_source/cli.py +129 -0
  11. galaxy_tool_source/command_text.py +151 -0
  12. galaxy_tool_source/command_vars.py +225 -0
  13. galaxy_tool_source/corrections.py +245 -0
  14. galaxy_tool_source/document.py +196 -0
  15. galaxy_tool_source/macros.py +365 -0
  16. galaxy_tool_source/models/__init__.py +10 -0
  17. galaxy_tool_source/models/any_tool.py +32 -0
  18. galaxy_tool_source/models/registry.py +46 -0
  19. galaxy_tool_source/models/v16_10/__init__.py +201 -0
  20. galaxy_tool_source/models/v16_10/galaxy.py +5576 -0
  21. galaxy_tool_source/models/v17_01/__init__.py +207 -0
  22. galaxy_tool_source/models/v17_01/galaxy.py +5692 -0
  23. galaxy_tool_source/models/v17_05/__init__.py +207 -0
  24. galaxy_tool_source/models/v17_05/galaxy.py +5793 -0
  25. galaxy_tool_source/models/v17_09/__init__.py +209 -0
  26. galaxy_tool_source/models/v17_09/galaxy.py +5878 -0
  27. galaxy_tool_source/models/v18_01/__init__.py +211 -0
  28. galaxy_tool_source/models/v18_01/galaxy.py +5965 -0
  29. galaxy_tool_source/models/v18_05/__init__.py +211 -0
  30. galaxy_tool_source/models/v18_05/galaxy.py +5996 -0
  31. galaxy_tool_source/models/v18_09/__init__.py +211 -0
  32. galaxy_tool_source/models/v18_09/galaxy.py +6044 -0
  33. galaxy_tool_source/models/v19_01/__init__.py +215 -0
  34. galaxy_tool_source/models/v19_01/galaxy.py +6120 -0
  35. galaxy_tool_source/models/v19_05/__init__.py +223 -0
  36. galaxy_tool_source/models/v19_05/galaxy.py +6526 -0
  37. galaxy_tool_source/models/v19_09/__init__.py +239 -0
  38. galaxy_tool_source/models/v19_09/galaxy.py +6779 -0
  39. galaxy_tool_source/models/v20_01/__init__.py +239 -0
  40. galaxy_tool_source/models/v20_01/galaxy.py +6839 -0
  41. galaxy_tool_source/models/v20_05/__init__.py +239 -0
  42. galaxy_tool_source/models/v20_05/galaxy.py +6875 -0
  43. galaxy_tool_source/models/v20_09/__init__.py +241 -0
  44. galaxy_tool_source/models/v20_09/galaxy.py +6950 -0
  45. galaxy_tool_source/models/v21_01/__init__.py +249 -0
  46. galaxy_tool_source/models/v21_01/galaxy.py +7380 -0
  47. galaxy_tool_source/models/v21_05/__init__.py +249 -0
  48. galaxy_tool_source/models/v21_05/galaxy.py +7419 -0
  49. galaxy_tool_source/models/v21_09/__init__.py +259 -0
  50. galaxy_tool_source/models/v21_09/galaxy.py +7620 -0
  51. galaxy_tool_source/models/v22_01/__init__.py +291 -0
  52. galaxy_tool_source/models/v22_01/galaxy.py +8605 -0
  53. galaxy_tool_source/models/v22_05/__init__.py +297 -0
  54. galaxy_tool_source/models/v22_05/galaxy.py +8737 -0
  55. galaxy_tool_source/models/v23_0/__init__.py +297 -0
  56. galaxy_tool_source/models/v23_0/galaxy.py +8807 -0
  57. galaxy_tool_source/models/v23_1/__init__.py +303 -0
  58. galaxy_tool_source/models/v23_1/galaxy.py +9078 -0
  59. galaxy_tool_source/models/v23_2/__init__.py +309 -0
  60. galaxy_tool_source/models/v23_2/galaxy.py +9248 -0
  61. galaxy_tool_source/models/v24_0/__init__.py +319 -0
  62. galaxy_tool_source/models/v24_0/galaxy.py +9460 -0
  63. galaxy_tool_source/models/v24_1/__init__.py +333 -0
  64. galaxy_tool_source/models/v24_1/galaxy.py +9985 -0
  65. galaxy_tool_source/models/v24_2/__init__.py +343 -0
  66. galaxy_tool_source/models/v24_2/galaxy.py +11327 -0
  67. galaxy_tool_source/models/v25_0/__init__.py +347 -0
  68. galaxy_tool_source/models/v25_0/galaxy.py +11437 -0
  69. galaxy_tool_source/models/v25_1/__init__.py +353 -0
  70. galaxy_tool_source/models/v25_1/galaxy.py +11686 -0
  71. galaxy_tool_source/models/v26_0/__init__.py +349 -0
  72. galaxy_tool_source/models/v26_0/galaxy.py +11678 -0
  73. galaxy_tool_source/models/v26_1/__init__.py +355 -0
  74. galaxy_tool_source/models/v26_1/galaxy.py +11805 -0
  75. galaxy_tool_source/profiles.py +190 -0
  76. galaxy_tool_source/py.typed +0 -0
  77. galaxy_tool_source/rst.py +222 -0
  78. galaxy_tool_source/rst_markdown.py +407 -0
  79. galaxy_tool_source/schema/PROVENANCE.md +55 -0
  80. galaxy_tool_source/schema/galaxy-16.10.xsd +5125 -0
  81. galaxy_tool_source/schema/galaxy-17.01.xsd +5214 -0
  82. galaxy_tool_source/schema/galaxy-17.05.xsd +5336 -0
  83. galaxy_tool_source/schema/galaxy-17.09.xsd +5389 -0
  84. galaxy_tool_source/schema/galaxy-18.01.xsd +5431 -0
  85. galaxy_tool_source/schema/galaxy-18.05.xsd +5452 -0
  86. galaxy_tool_source/schema/galaxy-18.09.xsd +5470 -0
  87. galaxy_tool_source/schema/galaxy-19.01.xsd +5507 -0
  88. galaxy_tool_source/schema/galaxy-19.05.xsd +5594 -0
  89. galaxy_tool_source/schema/galaxy-19.09.xsd +5773 -0
  90. galaxy_tool_source/schema/galaxy-20.01.xsd +5793 -0
  91. galaxy_tool_source/schema/galaxy-20.05.xsd +5807 -0
  92. galaxy_tool_source/schema/galaxy-20.09.xsd +5855 -0
  93. galaxy_tool_source/schema/galaxy-21.01.xsd +6250 -0
  94. galaxy_tool_source/schema/galaxy-21.05.xsd +6289 -0
  95. galaxy_tool_source/schema/galaxy-21.09.xsd +6407 -0
  96. galaxy_tool_source/schema/galaxy-22.01.xsd +7045 -0
  97. galaxy_tool_source/schema/galaxy-22.05.xsd +7151 -0
  98. galaxy_tool_source/schema/galaxy-23.0.xsd +7230 -0
  99. galaxy_tool_source/schema/galaxy-23.1.xsd +7353 -0
  100. galaxy_tool_source/schema/galaxy-23.2.xsd +7494 -0
  101. galaxy_tool_source/schema/galaxy-24.0.xsd +7704 -0
  102. galaxy_tool_source/schema/galaxy-24.1.xsd +7966 -0
  103. galaxy_tool_source/schema/galaxy-24.2.xsd +8249 -0
  104. galaxy_tool_source/schema/galaxy-25.0.xsd +8351 -0
  105. galaxy_tool_source/schema/galaxy-25.1.xsd +8569 -0
  106. galaxy_tool_source/schema/galaxy-26.0.xsd +8565 -0
  107. galaxy_tool_source/schema/galaxy-26.1.xsd +8637 -0
  108. galaxy_tool_source/schema/manifest.json +229 -0
  109. galaxy_tool_source/schema_content.py +103 -0
  110. galaxy_tool_source/shell_oracle.py +346 -0
  111. galaxy_tool_source-0.1.0.dist-info/METADATA +126 -0
  112. galaxy_tool_source-0.1.0.dist-info/RECORD +115 -0
  113. galaxy_tool_source-0.1.0.dist-info/WHEEL +4 -0
  114. galaxy_tool_source-0.1.0.dist-info/entry_points.txt +2 -0
  115. galaxy_tool_source-0.1.0.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,6 @@
1
+ """Foundation library for parsing, profile-aware validation, and typed inspection
2
+ of Galaxy tool definition XML.
3
+
4
+ The supported public API is declared in prose in ``README.md`` and ``CLAUDE.md``.
5
+ This package intentionally exposes no re-exports here.
6
+ """
@@ -0,0 +1,159 @@
1
+ """Build-time codegen: generate the per-version xsdata model packages.
2
+
3
+ Each vendored XSD becomes its own model package under ``galaxy_tool_source/models/``
4
+ (``v16_10`` … ``v26_0``). xsdata caches its resolved output path within a
5
+ process, so every version is generated in a fresh subprocess via this module's
6
+ ``python -m galaxy_tool_source._codegen`` entry point.
7
+
8
+ This module is imported by the hatchling build hook, ``scripts/regenerate.py``,
9
+ and the codegen test. It must not import hatchling, and imports xsdata only on
10
+ the ``__main__`` path, so merely importing the module stays cheap.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import logging
17
+ import os
18
+ import shutil
19
+ import subprocess
20
+ import sys
21
+ import tempfile
22
+ from concurrent.futures import ThreadPoolExecutor
23
+ from functools import cache, partial
24
+ from pathlib import Path
25
+
26
+ from galaxy_tool_source.models.registry import version_to_module
27
+
28
+ _MODULE_NAME = "galaxy_tool_source._codegen"
29
+
30
+
31
+ @cache
32
+ def _package_dir() -> Path:
33
+ return Path(__file__).resolve().parent
34
+
35
+
36
+ def _vendored_versions() -> list[str]:
37
+ """Return every vendored version from the schema manifest, oldest first."""
38
+ manifest = json.loads(
39
+ (_package_dir() / "schema" / "manifest.json").read_text(encoding="utf-8")
40
+ )
41
+ schemas: dict[str, object] = manifest["schemas"]
42
+ return sorted(schemas)
43
+
44
+
45
+ def _run_xsdata(version: str, *, models_dir: Path) -> None:
46
+ """Generate one version's model package as ``models_dir/v{slug}/``.
47
+
48
+ Runs xsdata in the current process — only ever called inside the dedicated
49
+ subprocess spawned by ``generate_one``, because xsdata caches its resolved
50
+ output path process-wide. xsdata writes into a throwaway directory; only the
51
+ leaf package is copied into ``models_dir``, so hand-written files alongside
52
+ it (``__init__.py``, ``registry.py``) are never touched.
53
+ """
54
+ from xsdata.codegen.transformer import ResourceTransformer
55
+ from xsdata.models.config import GeneratorConfig
56
+
57
+ module = version_to_module(version)
58
+ config = GeneratorConfig()
59
+ config.output.package = f"galaxy_tool_source.models.{module}"
60
+ # unnest_classes works around an xsdata 26.2 bug: with nested inner classes
61
+ # its circular-reference detector raises KeyError on the Galaxy 24.2+ schema.
62
+ config.output.unnest_classes = True
63
+ with tempfile.TemporaryDirectory() as tmp:
64
+ staged_xsd = Path(tmp) / "galaxy.xsd"
65
+ shutil.copy(_package_dir() / "schema" / f"galaxy-{version}.xsd", staged_xsd)
66
+ os.chdir(tmp) # xsdata writes the package tree relative to cwd
67
+ ResourceTransformer(config=config).process([staged_xsd.as_uri()])
68
+ generated = Path(tmp) / "galaxy_tool_source" / "models" / module
69
+ target = models_dir / module
70
+ shutil.rmtree(target, ignore_errors=True)
71
+ shutil.copytree(generated, target)
72
+
73
+
74
+ def generate_one(version: str, *, models_dir: Path) -> None:
75
+ """Generate one version's model package in a fresh subprocess."""
76
+ pythonpath = os.pathsep.join(
77
+ part
78
+ for part in (str(_package_dir().parent), os.environ.get("PYTHONPATH", ""))
79
+ if part
80
+ )
81
+ result = subprocess.run(
82
+ [sys.executable, "-m", _MODULE_NAME, version, str(models_dir)],
83
+ env={**os.environ, "PYTHONPATH": pythonpath},
84
+ capture_output=True,
85
+ text=True,
86
+ check=False,
87
+ )
88
+ if result.returncode != 0:
89
+ raise RuntimeError(
90
+ f"model codegen failed for Galaxy {version}:\n{result.stderr}"
91
+ )
92
+
93
+
94
+ def _all_present(versions: list[str]) -> bool:
95
+ """Whether every per-version package and ``any_tool.py`` already exist."""
96
+ models_dir = _package_dir() / "models"
97
+ if not (models_dir / "any_tool.py").is_file():
98
+ return False
99
+ return all(
100
+ (models_dir / version_to_module(version)).is_dir() for version in versions
101
+ )
102
+
103
+
104
+ def _write_any_tool(versions: list[str]) -> None:
105
+ """Write ``models/any_tool.py`` — the ``AnyTool`` union over every model."""
106
+ lines = ['"""Generated: the union of every per-version ``Tool`` model."""', ""]
107
+ aliases: list[str] = []
108
+ for version in versions:
109
+ module = version_to_module(version)
110
+ alias = f"_{module}"
111
+ lines.append(f"from galaxy_tool_source.models.{module} import Tool as {alias}")
112
+ aliases.append(alias)
113
+ lines += ["", f"AnyTool = {' | '.join(aliases)}", ""]
114
+ any_tool = _package_dir() / "models" / "any_tool.py"
115
+ any_tool.write_text("\n".join(lines), encoding="utf-8")
116
+
117
+
118
+ def regenerate_all_models(*, force: bool = False) -> None:
119
+ """Generate every vendored version's model package under ``src/``.
120
+
121
+ Skipped when every package and ``any_tool.py`` already exist, unless
122
+ ``force``. Each version is generated in its own subprocess, in parallel.
123
+ """
124
+ versions = _vendored_versions()
125
+ if not force and _all_present(versions):
126
+ return
127
+ workers = min(os.cpu_count() or 4, 8)
128
+ with ThreadPoolExecutor(max_workers=workers) as pool:
129
+ # Iterate to consume — pool.map is lazy and exceptions from any
130
+ # worker surface here; no need to allocate a container we throw away.
131
+ models_dir = _package_dir() / "models"
132
+ for _ in pool.map(partial(generate_one, models_dir=models_dir), versions):
133
+ pass
134
+ _write_any_tool(versions)
135
+
136
+
137
+ def clean_generated() -> None:
138
+ """Remove every generated per-version package and ``any_tool.py``."""
139
+ models_dir = _package_dir() / "models"
140
+ for version in _vendored_versions():
141
+ shutil.rmtree(models_dir / version_to_module(version), ignore_errors=True)
142
+ (models_dir / "any_tool.py").unlink(missing_ok=True)
143
+
144
+
145
+ def _main(argv: list[str]) -> int:
146
+ """Entry point: ``python -m galaxy_tool_source._codegen <version> <models_dir>``."""
147
+ if len(argv) != 3:
148
+ print(
149
+ f"usage: python -m {_MODULE_NAME} <version> <models_dir>",
150
+ file=sys.stderr,
151
+ )
152
+ return 2
153
+ logging.disable(logging.CRITICAL) # xsdata is verbose; stay quiet on success
154
+ _run_xsdata(argv[1], models_dir=Path(argv[2]).resolve())
155
+ return 0
156
+
157
+
158
+ if __name__ == "__main__":
159
+ raise SystemExit(_main(sys.argv))
@@ -0,0 +1,338 @@
1
+ """Parse and validate entry points, plus the result and error types.
2
+
3
+ Parsing and validation live together because validation parses first and both
4
+ share the ``XmlError`` type. The result-returning functions (``parse_tool``,
5
+ ``validate_tool``) are the preferred API and never raise on malformed XML —
6
+ they collect every error into their result. ``load_tool`` is the strict
7
+ variant, raising ``ToolXmlSyntaxError`` instead.
8
+
9
+ XML is always read into ``bytes`` once and parsed once, never decoded to ``str``
10
+ first, so lxml honours the document's own encoding declaration.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from dataclasses import dataclass
16
+ from pathlib import Path
17
+ from typing import BinaryIO
18
+
19
+ from lxml import etree
20
+
21
+ from galaxy_tool_source.document import MacroDocument, ToolDocument
22
+ from galaxy_tool_source.macros import (
23
+ MacroError,
24
+ expand_from_path,
25
+ expand_from_tree,
26
+ has_macros,
27
+ strip_macros,
28
+ )
29
+ from galaxy_tool_source.profiles import (
30
+ available_profiles,
31
+ compiled_schema,
32
+ resolve_profile,
33
+ )
34
+
35
+ Source = str | Path | bytes | BinaryIO
36
+
37
+ _MACRO_MODES = frozenset({"off", "skip", "strip", "expand"})
38
+ _STRING_SOURCE = "<string>"
39
+
40
+
41
+ @dataclass(frozen=True)
42
+ class XmlError:
43
+ """A single XML well-formedness or XSD-validation error."""
44
+
45
+ line: int
46
+ column: int
47
+ message: str
48
+ source: str
49
+
50
+ def __str__(self) -> str:
51
+ return f"{self.source}:{self.line}:{self.column}: {self.message}"
52
+
53
+
54
+ @dataclass(frozen=True)
55
+ class ParseResult:
56
+ """The outcome of a lenient parse."""
57
+
58
+ document: ToolDocument | None
59
+ syntax_errors: list[XmlError]
60
+
61
+ @property
62
+ def well_formed(self) -> bool:
63
+ """Whether the source parsed with no well-formedness errors."""
64
+ return not self.syntax_errors
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class ValidationResult:
69
+ """The outcome of a profile-aware validation."""
70
+
71
+ validated: bool
72
+ schema_version: str
73
+ declared_profile: str | None
74
+ macro_handling: str
75
+ macros_present: bool
76
+ syntax_errors: list[XmlError]
77
+ errors: list[XmlError]
78
+ macro_errors: list[MacroError]
79
+
80
+ @property
81
+ def valid(self) -> bool:
82
+ """Whether validation ran and found no errors of any kind."""
83
+ return (
84
+ self.validated
85
+ and not self.syntax_errors
86
+ and not self.errors
87
+ and not self.macro_errors
88
+ )
89
+
90
+
91
+ class ToolXmlSyntaxError(Exception):
92
+ """Raised by ``load_tool`` when the source XML is not well-formed."""
93
+
94
+ def __init__(self, errors: list[XmlError]) -> None:
95
+ self.errors = errors
96
+ detail = "; ".join(str(error) for error in errors) or "malformed tool XML"
97
+ super().__init__(detail)
98
+
99
+
100
+ def _read_source(source: Source) -> tuple[bytes, Path | None, str]:
101
+ """Read a source into ``(xml_bytes, source_path, source_label)``."""
102
+ if isinstance(source, bytes):
103
+ return source, None, _STRING_SOURCE
104
+ if isinstance(source, str | Path):
105
+ path = Path(source)
106
+ return path.read_bytes(), path, str(path)
107
+ return source.read(), None, _STRING_SOURCE
108
+
109
+
110
+ def _to_xml_error(entry: etree._LogEntry, source: str) -> XmlError:
111
+ """Convert one lxml error-log entry to an ``XmlError``."""
112
+ return XmlError(
113
+ line=entry.line,
114
+ column=entry.column,
115
+ message=entry.message,
116
+ source=source,
117
+ )
118
+
119
+
120
+ def _parse_bytes(
121
+ xml_bytes: bytes, source: str
122
+ ) -> tuple[etree._ElementTree | None, list[XmlError]]:
123
+ """Parse XML bytes leniently, collecting every well-formedness error.
124
+
125
+ CDATA and comments are preserved (``strip_cdata=False``). The parser's error
126
+ log is snapshotted immediately, before anything else touches the parser.
127
+ """
128
+ parser = etree.XMLParser(recover=True, strip_cdata=False)
129
+ # third-party API: no LBYL form — lxml's recover=True parser still raises
130
+ # XMLSyntaxError on pathological input that the recovery path cannot
131
+ # salvage (e.g., a completely binary file with no XML structure).
132
+ try:
133
+ root = etree.fromstring(xml_bytes, parser)
134
+ except etree.XMLSyntaxError:
135
+ root = None
136
+ syntax_errors = [_to_xml_error(entry, source) for entry in parser.error_log]
137
+ if root is None:
138
+ if not syntax_errors:
139
+ syntax_errors = [XmlError(0, 0, "document could not be parsed", source)]
140
+ return None, syntax_errors
141
+ return root.getroottree(), syntax_errors
142
+
143
+
144
+ def load_tool(source: Source) -> ToolDocument:
145
+ """Parse a tool strictly; raise ``ToolXmlSyntaxError`` if it is malformed."""
146
+ xml_bytes, source_path, label = _read_source(source)
147
+ tree, syntax_errors = _parse_bytes(xml_bytes, label)
148
+ if tree is None or syntax_errors:
149
+ raise ToolXmlSyntaxError(syntax_errors)
150
+ return ToolDocument(tree, source_path=source_path)
151
+
152
+
153
+ def parse_tool(source: Source) -> ParseResult:
154
+ """Parse a tool leniently, collecting every well-formedness error.
155
+
156
+ A ``ToolDocument`` is still built from the recovered tree whenever recovery
157
+ yields a usable root.
158
+ """
159
+ xml_bytes, source_path, label = _read_source(source)
160
+ tree, syntax_errors = _parse_bytes(xml_bytes, label)
161
+ document = None if tree is None else ToolDocument(tree, source_path=source_path)
162
+ return ParseResult(document=document, syntax_errors=syntax_errors)
163
+
164
+
165
+ def load_macros(source: Source) -> MacroDocument:
166
+ """Parse a macro-library file strictly; raise ``ToolXmlSyntaxError`` if malformed.
167
+
168
+ The macro-file counterpart to ``load_tool``. A macro library (a ``<macros>``
169
+ root holding ``<token>`` / ``<xml>`` / ``<macro>`` / ``<import>``) has no
170
+ standalone XSD, so this only parses — there is no ``validate`` for macro
171
+ files. The returned ``MacroDocument`` carries the mutable tree (CDATA,
172
+ comments, and attribute order preserved) and ``source_path``. The root tag
173
+ is not enforced; callers that need to distinguish a macro file from a tool
174
+ inspect ``document.root.tag``.
175
+ """
176
+ xml_bytes, source_path, label = _read_source(source)
177
+ tree, syntax_errors = _parse_bytes(xml_bytes, label)
178
+ if tree is None or syntax_errors:
179
+ raise ToolXmlSyntaxError(syntax_errors)
180
+ return MacroDocument(tree, source_path=source_path)
181
+
182
+
183
+ def _source_label(document: ToolDocument) -> str:
184
+ """Return the error-message source label for a document."""
185
+ return str(document.source_path) if document.source_path else _STRING_SOURCE
186
+
187
+
188
+ def _schema_errors(
189
+ schema: etree.XMLSchema, tree: etree._ElementTree, source: str
190
+ ) -> list[XmlError]:
191
+ """Validate a tree against a compiled schema; return any schema errors."""
192
+ if schema.validate(tree):
193
+ return []
194
+ return [_to_xml_error(entry, source) for entry in schema.error_log]
195
+
196
+
197
+ def _tree_to_validate(
198
+ document: ToolDocument,
199
+ *,
200
+ macro_handling: str,
201
+ macros_present: bool,
202
+ path_target: Path | None,
203
+ ) -> tuple[etree._ElementTree | None, list[MacroError]]:
204
+ """Select the tree to validate per ``macro_handling``.
205
+
206
+ Returns ``(tree, macro_errors)``; a ``None`` tree means XSD validation must
207
+ be skipped (``skip`` mode with macros, or expansion produced no tree).
208
+ """
209
+ if not macros_present or macro_handling == "off":
210
+ return document.tree, []
211
+ if macro_handling == "skip":
212
+ return None, []
213
+ if macro_handling == "strip":
214
+ return strip_macros(document.tree), []
215
+ if path_target is not None:
216
+ return expand_from_path(path_target)
217
+ source_dir = document.source_path.parent if document.source_path else None
218
+ return expand_from_tree(document.root, source_dir=source_dir)
219
+
220
+
221
+ def validate_tool(
222
+ target: Source | ToolDocument,
223
+ *,
224
+ profile: str | None = None,
225
+ on_missing: str = "nearest",
226
+ macro_handling: str = "expand",
227
+ ) -> ValidationResult:
228
+ """Validate a tool against the profile-appropriate vendored XSD.
229
+
230
+ ``target`` is a source (path, ``bytes``, or binary stream) or an already
231
+ parsed ``ToolDocument``. The profile is resolved from ``profile``, then the
232
+ tool's own ``profile`` attribute, then — when neither is set — Galaxy's
233
+ ``16.01`` legacy default, which resolves to the nearest vendored XSD (``16.10``,
234
+ the oldest). See ``profiles.resolve_profile`` / ``docs/decisions.md`` §1.5.
235
+
236
+ Because the Galaxy XSD is a post-macro-expansion schema, ``macro_handling``
237
+ controls how macros are dealt with before validation: ``off`` validates the
238
+ tree as-is, ``skip`` skips validation when macros are present, ``strip``
239
+ validates the tree with ``<expand>``/``<macros>`` removed, and ``expand``
240
+ (the default) validates the fully expanded tool. The ``ToolDocument``'s tree
241
+ is never mutated. Raises ``UnknownProfileError`` only when
242
+ ``on_missing="exact"``.
243
+ """
244
+ if macro_handling not in _MACRO_MODES:
245
+ raise ValueError(f"macro_handling must be one of {sorted(_MACRO_MODES)}")
246
+
247
+ if isinstance(target, ToolDocument):
248
+ document: ToolDocument | None = target
249
+ syntax_errors: list[XmlError] = []
250
+ path_target = None
251
+ else:
252
+ result = parse_tool(target)
253
+ document = result.document
254
+ syntax_errors = result.syntax_errors
255
+ path_target = Path(target) if isinstance(target, str | Path) else None
256
+
257
+ if document is None:
258
+ return ValidationResult(
259
+ validated=False,
260
+ schema_version="",
261
+ declared_profile=None,
262
+ macro_handling=macro_handling,
263
+ macros_present=False,
264
+ syntax_errors=syntax_errors,
265
+ errors=[],
266
+ macro_errors=[],
267
+ )
268
+
269
+ declared_profile = document.profile
270
+ macros_present = has_macros(document.root)
271
+ schema_version = resolve_profile(
272
+ profile if profile is not None else declared_profile, on_missing=on_missing
273
+ )
274
+ tree, macro_errors = _tree_to_validate(
275
+ document,
276
+ macro_handling=macro_handling,
277
+ macros_present=macros_present,
278
+ path_target=path_target,
279
+ )
280
+
281
+ if tree is None:
282
+ return ValidationResult(
283
+ validated=False,
284
+ schema_version=schema_version,
285
+ declared_profile=declared_profile,
286
+ macro_handling=macro_handling,
287
+ macros_present=macros_present,
288
+ syntax_errors=syntax_errors,
289
+ errors=[],
290
+ macro_errors=macro_errors,
291
+ )
292
+
293
+ errors = _schema_errors(
294
+ compiled_schema(schema_version), tree, _source_label(document)
295
+ )
296
+ return ValidationResult(
297
+ validated=True,
298
+ schema_version=schema_version,
299
+ declared_profile=declared_profile,
300
+ macro_handling=macro_handling,
301
+ macros_present=macros_present,
302
+ syntax_errors=syntax_errors,
303
+ errors=errors,
304
+ macro_errors=macro_errors,
305
+ )
306
+
307
+
308
+ def newest_valid_profile(target: Source | ToolDocument) -> str | None:
309
+ """Return the newest vendored profile whose XSD the tool satisfies.
310
+
311
+ The tool is validated — with macros expanded — against each vendored profile
312
+ from newest to oldest, and the first profile that validates cleanly is
313
+ returned. ``None`` means no vendored profile validates, including when the
314
+ tool is malformed or its macros cannot be expanded.
315
+
316
+ The scan stops at the first (newest) profile that validates and assumes
317
+ nothing about the older ones — a tool's valid profiles are often *not* a
318
+ contiguous range of releases (2.58% have gaps; see
319
+ ``docs/decisions.md`` §10.3). It is O(1) when the tool validates at the
320
+ latest profile, which is the case for 90.1% of unique tools in the
321
+ 2026-05-27 combined sweep (§10.5).
322
+ """
323
+ # Prefer a filesystem path: validate_tool then resolves macros via
324
+ # expand_from_path, which follows transitive <import>s. A ToolDocument may
325
+ # carry a mutated tree, so it is validated as-is.
326
+ if isinstance(target, str | Path):
327
+ probe: Source | ToolDocument = Path(target)
328
+ elif isinstance(target, ToolDocument):
329
+ probe = target
330
+ else:
331
+ parsed = parse_tool(target).document
332
+ if parsed is None:
333
+ return None
334
+ probe = parsed
335
+ for version in reversed(available_profiles()):
336
+ if validate_tool(probe, profile=version).valid:
337
+ return version
338
+ return None