galaxy-tool-source 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of galaxy-tool-source might be problematic. Click here for more details.
- galaxy_tool_source/__init__.py +6 -0
- galaxy_tool_source/_codegen.py +159 -0
- galaxy_tool_source/binding.py +338 -0
- galaxy_tool_source/boolean_values.py +201 -0
- galaxy_tool_source/bundle.py +186 -0
- galaxy_tool_source/cdata.py +70 -0
- galaxy_tool_source/cheetah_cdm.py +165 -0
- galaxy_tool_source/cheetah_refs.py +290 -0
- galaxy_tool_source/cheetah_rename.py +801 -0
- galaxy_tool_source/cli.py +129 -0
- galaxy_tool_source/command_text.py +151 -0
- galaxy_tool_source/command_vars.py +225 -0
- galaxy_tool_source/corrections.py +245 -0
- galaxy_tool_source/document.py +196 -0
- galaxy_tool_source/macros.py +365 -0
- galaxy_tool_source/models/__init__.py +10 -0
- galaxy_tool_source/models/any_tool.py +32 -0
- galaxy_tool_source/models/registry.py +46 -0
- galaxy_tool_source/models/v16_10/__init__.py +201 -0
- galaxy_tool_source/models/v16_10/galaxy.py +5576 -0
- galaxy_tool_source/models/v17_01/__init__.py +207 -0
- galaxy_tool_source/models/v17_01/galaxy.py +5692 -0
- galaxy_tool_source/models/v17_05/__init__.py +207 -0
- galaxy_tool_source/models/v17_05/galaxy.py +5793 -0
- galaxy_tool_source/models/v17_09/__init__.py +209 -0
- galaxy_tool_source/models/v17_09/galaxy.py +5878 -0
- galaxy_tool_source/models/v18_01/__init__.py +211 -0
- galaxy_tool_source/models/v18_01/galaxy.py +5965 -0
- galaxy_tool_source/models/v18_05/__init__.py +211 -0
- galaxy_tool_source/models/v18_05/galaxy.py +5996 -0
- galaxy_tool_source/models/v18_09/__init__.py +211 -0
- galaxy_tool_source/models/v18_09/galaxy.py +6044 -0
- galaxy_tool_source/models/v19_01/__init__.py +215 -0
- galaxy_tool_source/models/v19_01/galaxy.py +6120 -0
- galaxy_tool_source/models/v19_05/__init__.py +223 -0
- galaxy_tool_source/models/v19_05/galaxy.py +6526 -0
- galaxy_tool_source/models/v19_09/__init__.py +239 -0
- galaxy_tool_source/models/v19_09/galaxy.py +6779 -0
- galaxy_tool_source/models/v20_01/__init__.py +239 -0
- galaxy_tool_source/models/v20_01/galaxy.py +6839 -0
- galaxy_tool_source/models/v20_05/__init__.py +239 -0
- galaxy_tool_source/models/v20_05/galaxy.py +6875 -0
- galaxy_tool_source/models/v20_09/__init__.py +241 -0
- galaxy_tool_source/models/v20_09/galaxy.py +6950 -0
- galaxy_tool_source/models/v21_01/__init__.py +249 -0
- galaxy_tool_source/models/v21_01/galaxy.py +7380 -0
- galaxy_tool_source/models/v21_05/__init__.py +249 -0
- galaxy_tool_source/models/v21_05/galaxy.py +7419 -0
- galaxy_tool_source/models/v21_09/__init__.py +259 -0
- galaxy_tool_source/models/v21_09/galaxy.py +7620 -0
- galaxy_tool_source/models/v22_01/__init__.py +291 -0
- galaxy_tool_source/models/v22_01/galaxy.py +8605 -0
- galaxy_tool_source/models/v22_05/__init__.py +297 -0
- galaxy_tool_source/models/v22_05/galaxy.py +8737 -0
- galaxy_tool_source/models/v23_0/__init__.py +297 -0
- galaxy_tool_source/models/v23_0/galaxy.py +8807 -0
- galaxy_tool_source/models/v23_1/__init__.py +303 -0
- galaxy_tool_source/models/v23_1/galaxy.py +9078 -0
- galaxy_tool_source/models/v23_2/__init__.py +309 -0
- galaxy_tool_source/models/v23_2/galaxy.py +9248 -0
- galaxy_tool_source/models/v24_0/__init__.py +319 -0
- galaxy_tool_source/models/v24_0/galaxy.py +9460 -0
- galaxy_tool_source/models/v24_1/__init__.py +333 -0
- galaxy_tool_source/models/v24_1/galaxy.py +9985 -0
- galaxy_tool_source/models/v24_2/__init__.py +343 -0
- galaxy_tool_source/models/v24_2/galaxy.py +11327 -0
- galaxy_tool_source/models/v25_0/__init__.py +347 -0
- galaxy_tool_source/models/v25_0/galaxy.py +11437 -0
- galaxy_tool_source/models/v25_1/__init__.py +353 -0
- galaxy_tool_source/models/v25_1/galaxy.py +11686 -0
- galaxy_tool_source/models/v26_0/__init__.py +349 -0
- galaxy_tool_source/models/v26_0/galaxy.py +11678 -0
- galaxy_tool_source/models/v26_1/__init__.py +355 -0
- galaxy_tool_source/models/v26_1/galaxy.py +11805 -0
- galaxy_tool_source/profiles.py +190 -0
- galaxy_tool_source/py.typed +0 -0
- galaxy_tool_source/rst.py +222 -0
- galaxy_tool_source/rst_markdown.py +407 -0
- galaxy_tool_source/schema/PROVENANCE.md +55 -0
- galaxy_tool_source/schema/galaxy-16.10.xsd +5125 -0
- galaxy_tool_source/schema/galaxy-17.01.xsd +5214 -0
- galaxy_tool_source/schema/galaxy-17.05.xsd +5336 -0
- galaxy_tool_source/schema/galaxy-17.09.xsd +5389 -0
- galaxy_tool_source/schema/galaxy-18.01.xsd +5431 -0
- galaxy_tool_source/schema/galaxy-18.05.xsd +5452 -0
- galaxy_tool_source/schema/galaxy-18.09.xsd +5470 -0
- galaxy_tool_source/schema/galaxy-19.01.xsd +5507 -0
- galaxy_tool_source/schema/galaxy-19.05.xsd +5594 -0
- galaxy_tool_source/schema/galaxy-19.09.xsd +5773 -0
- galaxy_tool_source/schema/galaxy-20.01.xsd +5793 -0
- galaxy_tool_source/schema/galaxy-20.05.xsd +5807 -0
- galaxy_tool_source/schema/galaxy-20.09.xsd +5855 -0
- galaxy_tool_source/schema/galaxy-21.01.xsd +6250 -0
- galaxy_tool_source/schema/galaxy-21.05.xsd +6289 -0
- galaxy_tool_source/schema/galaxy-21.09.xsd +6407 -0
- galaxy_tool_source/schema/galaxy-22.01.xsd +7045 -0
- galaxy_tool_source/schema/galaxy-22.05.xsd +7151 -0
- galaxy_tool_source/schema/galaxy-23.0.xsd +7230 -0
- galaxy_tool_source/schema/galaxy-23.1.xsd +7353 -0
- galaxy_tool_source/schema/galaxy-23.2.xsd +7494 -0
- galaxy_tool_source/schema/galaxy-24.0.xsd +7704 -0
- galaxy_tool_source/schema/galaxy-24.1.xsd +7966 -0
- galaxy_tool_source/schema/galaxy-24.2.xsd +8249 -0
- galaxy_tool_source/schema/galaxy-25.0.xsd +8351 -0
- galaxy_tool_source/schema/galaxy-25.1.xsd +8569 -0
- galaxy_tool_source/schema/galaxy-26.0.xsd +8565 -0
- galaxy_tool_source/schema/galaxy-26.1.xsd +8637 -0
- galaxy_tool_source/schema/manifest.json +229 -0
- galaxy_tool_source/schema_content.py +103 -0
- galaxy_tool_source/shell_oracle.py +346 -0
- galaxy_tool_source-0.1.0.dist-info/METADATA +126 -0
- galaxy_tool_source-0.1.0.dist-info/RECORD +115 -0
- galaxy_tool_source-0.1.0.dist-info/WHEEL +4 -0
- galaxy_tool_source-0.1.0.dist-info/entry_points.txt +2 -0
- galaxy_tool_source-0.1.0.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
"""Build-time codegen: generate the per-version xsdata model packages.
|
|
2
|
+
|
|
3
|
+
Each vendored XSD becomes its own model package under ``galaxy_tool_source/models/``
|
|
4
|
+
(``v16_10`` … ``v26_0``). xsdata caches its resolved output path within a
|
|
5
|
+
process, so every version is generated in a fresh subprocess via this module's
|
|
6
|
+
``python -m galaxy_tool_source._codegen`` entry point.
|
|
7
|
+
|
|
8
|
+
This module is imported by the hatchling build hook, ``scripts/regenerate.py``,
|
|
9
|
+
and the codegen test. It must not import hatchling, and imports xsdata only on
|
|
10
|
+
the ``__main__`` path, so merely importing the module stays cheap.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import json
|
|
16
|
+
import logging
|
|
17
|
+
import os
|
|
18
|
+
import shutil
|
|
19
|
+
import subprocess
|
|
20
|
+
import sys
|
|
21
|
+
import tempfile
|
|
22
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
23
|
+
from functools import cache, partial
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
from galaxy_tool_source.models.registry import version_to_module
|
|
27
|
+
|
|
28
|
+
_MODULE_NAME = "galaxy_tool_source._codegen"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@cache
|
|
32
|
+
def _package_dir() -> Path:
|
|
33
|
+
return Path(__file__).resolve().parent
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _vendored_versions() -> list[str]:
|
|
37
|
+
"""Return every vendored version from the schema manifest, oldest first."""
|
|
38
|
+
manifest = json.loads(
|
|
39
|
+
(_package_dir() / "schema" / "manifest.json").read_text(encoding="utf-8")
|
|
40
|
+
)
|
|
41
|
+
schemas: dict[str, object] = manifest["schemas"]
|
|
42
|
+
return sorted(schemas)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _run_xsdata(version: str, *, models_dir: Path) -> None:
|
|
46
|
+
"""Generate one version's model package as ``models_dir/v{slug}/``.
|
|
47
|
+
|
|
48
|
+
Runs xsdata in the current process — only ever called inside the dedicated
|
|
49
|
+
subprocess spawned by ``generate_one``, because xsdata caches its resolved
|
|
50
|
+
output path process-wide. xsdata writes into a throwaway directory; only the
|
|
51
|
+
leaf package is copied into ``models_dir``, so hand-written files alongside
|
|
52
|
+
it (``__init__.py``, ``registry.py``) are never touched.
|
|
53
|
+
"""
|
|
54
|
+
from xsdata.codegen.transformer import ResourceTransformer
|
|
55
|
+
from xsdata.models.config import GeneratorConfig
|
|
56
|
+
|
|
57
|
+
module = version_to_module(version)
|
|
58
|
+
config = GeneratorConfig()
|
|
59
|
+
config.output.package = f"galaxy_tool_source.models.{module}"
|
|
60
|
+
# unnest_classes works around an xsdata 26.2 bug: with nested inner classes
|
|
61
|
+
# its circular-reference detector raises KeyError on the Galaxy 24.2+ schema.
|
|
62
|
+
config.output.unnest_classes = True
|
|
63
|
+
with tempfile.TemporaryDirectory() as tmp:
|
|
64
|
+
staged_xsd = Path(tmp) / "galaxy.xsd"
|
|
65
|
+
shutil.copy(_package_dir() / "schema" / f"galaxy-{version}.xsd", staged_xsd)
|
|
66
|
+
os.chdir(tmp) # xsdata writes the package tree relative to cwd
|
|
67
|
+
ResourceTransformer(config=config).process([staged_xsd.as_uri()])
|
|
68
|
+
generated = Path(tmp) / "galaxy_tool_source" / "models" / module
|
|
69
|
+
target = models_dir / module
|
|
70
|
+
shutil.rmtree(target, ignore_errors=True)
|
|
71
|
+
shutil.copytree(generated, target)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def generate_one(version: str, *, models_dir: Path) -> None:
|
|
75
|
+
"""Generate one version's model package in a fresh subprocess."""
|
|
76
|
+
pythonpath = os.pathsep.join(
|
|
77
|
+
part
|
|
78
|
+
for part in (str(_package_dir().parent), os.environ.get("PYTHONPATH", ""))
|
|
79
|
+
if part
|
|
80
|
+
)
|
|
81
|
+
result = subprocess.run(
|
|
82
|
+
[sys.executable, "-m", _MODULE_NAME, version, str(models_dir)],
|
|
83
|
+
env={**os.environ, "PYTHONPATH": pythonpath},
|
|
84
|
+
capture_output=True,
|
|
85
|
+
text=True,
|
|
86
|
+
check=False,
|
|
87
|
+
)
|
|
88
|
+
if result.returncode != 0:
|
|
89
|
+
raise RuntimeError(
|
|
90
|
+
f"model codegen failed for Galaxy {version}:\n{result.stderr}"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def _all_present(versions: list[str]) -> bool:
|
|
95
|
+
"""Whether every per-version package and ``any_tool.py`` already exist."""
|
|
96
|
+
models_dir = _package_dir() / "models"
|
|
97
|
+
if not (models_dir / "any_tool.py").is_file():
|
|
98
|
+
return False
|
|
99
|
+
return all(
|
|
100
|
+
(models_dir / version_to_module(version)).is_dir() for version in versions
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _write_any_tool(versions: list[str]) -> None:
|
|
105
|
+
"""Write ``models/any_tool.py`` — the ``AnyTool`` union over every model."""
|
|
106
|
+
lines = ['"""Generated: the union of every per-version ``Tool`` model."""', ""]
|
|
107
|
+
aliases: list[str] = []
|
|
108
|
+
for version in versions:
|
|
109
|
+
module = version_to_module(version)
|
|
110
|
+
alias = f"_{module}"
|
|
111
|
+
lines.append(f"from galaxy_tool_source.models.{module} import Tool as {alias}")
|
|
112
|
+
aliases.append(alias)
|
|
113
|
+
lines += ["", f"AnyTool = {' | '.join(aliases)}", ""]
|
|
114
|
+
any_tool = _package_dir() / "models" / "any_tool.py"
|
|
115
|
+
any_tool.write_text("\n".join(lines), encoding="utf-8")
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def regenerate_all_models(*, force: bool = False) -> None:
|
|
119
|
+
"""Generate every vendored version's model package under ``src/``.
|
|
120
|
+
|
|
121
|
+
Skipped when every package and ``any_tool.py`` already exist, unless
|
|
122
|
+
``force``. Each version is generated in its own subprocess, in parallel.
|
|
123
|
+
"""
|
|
124
|
+
versions = _vendored_versions()
|
|
125
|
+
if not force and _all_present(versions):
|
|
126
|
+
return
|
|
127
|
+
workers = min(os.cpu_count() or 4, 8)
|
|
128
|
+
with ThreadPoolExecutor(max_workers=workers) as pool:
|
|
129
|
+
# Iterate to consume — pool.map is lazy and exceptions from any
|
|
130
|
+
# worker surface here; no need to allocate a container we throw away.
|
|
131
|
+
models_dir = _package_dir() / "models"
|
|
132
|
+
for _ in pool.map(partial(generate_one, models_dir=models_dir), versions):
|
|
133
|
+
pass
|
|
134
|
+
_write_any_tool(versions)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def clean_generated() -> None:
|
|
138
|
+
"""Remove every generated per-version package and ``any_tool.py``."""
|
|
139
|
+
models_dir = _package_dir() / "models"
|
|
140
|
+
for version in _vendored_versions():
|
|
141
|
+
shutil.rmtree(models_dir / version_to_module(version), ignore_errors=True)
|
|
142
|
+
(models_dir / "any_tool.py").unlink(missing_ok=True)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def _main(argv: list[str]) -> int:
|
|
146
|
+
"""Entry point: ``python -m galaxy_tool_source._codegen <version> <models_dir>``."""
|
|
147
|
+
if len(argv) != 3:
|
|
148
|
+
print(
|
|
149
|
+
f"usage: python -m {_MODULE_NAME} <version> <models_dir>",
|
|
150
|
+
file=sys.stderr,
|
|
151
|
+
)
|
|
152
|
+
return 2
|
|
153
|
+
logging.disable(logging.CRITICAL) # xsdata is verbose; stay quiet on success
|
|
154
|
+
_run_xsdata(argv[1], models_dir=Path(argv[2]).resolve())
|
|
155
|
+
return 0
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
if __name__ == "__main__":
|
|
159
|
+
raise SystemExit(_main(sys.argv))
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
"""Parse and validate entry points, plus the result and error types.
|
|
2
|
+
|
|
3
|
+
Parsing and validation live together because validation parses first and both
|
|
4
|
+
share the ``XmlError`` type. The result-returning functions (``parse_tool``,
|
|
5
|
+
``validate_tool``) are the preferred API and never raise on malformed XML —
|
|
6
|
+
they collect every error into their result. ``load_tool`` is the strict
|
|
7
|
+
variant, raising ``ToolXmlSyntaxError`` instead.
|
|
8
|
+
|
|
9
|
+
XML is always read into ``bytes`` once and parsed once, never decoded to ``str``
|
|
10
|
+
first, so lxml honours the document's own encoding declaration.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
from dataclasses import dataclass
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import BinaryIO
|
|
18
|
+
|
|
19
|
+
from lxml import etree
|
|
20
|
+
|
|
21
|
+
from galaxy_tool_source.document import MacroDocument, ToolDocument
|
|
22
|
+
from galaxy_tool_source.macros import (
|
|
23
|
+
MacroError,
|
|
24
|
+
expand_from_path,
|
|
25
|
+
expand_from_tree,
|
|
26
|
+
has_macros,
|
|
27
|
+
strip_macros,
|
|
28
|
+
)
|
|
29
|
+
from galaxy_tool_source.profiles import (
|
|
30
|
+
available_profiles,
|
|
31
|
+
compiled_schema,
|
|
32
|
+
resolve_profile,
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
Source = str | Path | bytes | BinaryIO
|
|
36
|
+
|
|
37
|
+
_MACRO_MODES = frozenset({"off", "skip", "strip", "expand"})
|
|
38
|
+
_STRING_SOURCE = "<string>"
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(frozen=True)
|
|
42
|
+
class XmlError:
|
|
43
|
+
"""A single XML well-formedness or XSD-validation error."""
|
|
44
|
+
|
|
45
|
+
line: int
|
|
46
|
+
column: int
|
|
47
|
+
message: str
|
|
48
|
+
source: str
|
|
49
|
+
|
|
50
|
+
def __str__(self) -> str:
|
|
51
|
+
return f"{self.source}:{self.line}:{self.column}: {self.message}"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass(frozen=True)
|
|
55
|
+
class ParseResult:
|
|
56
|
+
"""The outcome of a lenient parse."""
|
|
57
|
+
|
|
58
|
+
document: ToolDocument | None
|
|
59
|
+
syntax_errors: list[XmlError]
|
|
60
|
+
|
|
61
|
+
@property
|
|
62
|
+
def well_formed(self) -> bool:
|
|
63
|
+
"""Whether the source parsed with no well-formedness errors."""
|
|
64
|
+
return not self.syntax_errors
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@dataclass(frozen=True)
|
|
68
|
+
class ValidationResult:
|
|
69
|
+
"""The outcome of a profile-aware validation."""
|
|
70
|
+
|
|
71
|
+
validated: bool
|
|
72
|
+
schema_version: str
|
|
73
|
+
declared_profile: str | None
|
|
74
|
+
macro_handling: str
|
|
75
|
+
macros_present: bool
|
|
76
|
+
syntax_errors: list[XmlError]
|
|
77
|
+
errors: list[XmlError]
|
|
78
|
+
macro_errors: list[MacroError]
|
|
79
|
+
|
|
80
|
+
@property
|
|
81
|
+
def valid(self) -> bool:
|
|
82
|
+
"""Whether validation ran and found no errors of any kind."""
|
|
83
|
+
return (
|
|
84
|
+
self.validated
|
|
85
|
+
and not self.syntax_errors
|
|
86
|
+
and not self.errors
|
|
87
|
+
and not self.macro_errors
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ToolXmlSyntaxError(Exception):
|
|
92
|
+
"""Raised by ``load_tool`` when the source XML is not well-formed."""
|
|
93
|
+
|
|
94
|
+
def __init__(self, errors: list[XmlError]) -> None:
|
|
95
|
+
self.errors = errors
|
|
96
|
+
detail = "; ".join(str(error) for error in errors) or "malformed tool XML"
|
|
97
|
+
super().__init__(detail)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def _read_source(source: Source) -> tuple[bytes, Path | None, str]:
|
|
101
|
+
"""Read a source into ``(xml_bytes, source_path, source_label)``."""
|
|
102
|
+
if isinstance(source, bytes):
|
|
103
|
+
return source, None, _STRING_SOURCE
|
|
104
|
+
if isinstance(source, str | Path):
|
|
105
|
+
path = Path(source)
|
|
106
|
+
return path.read_bytes(), path, str(path)
|
|
107
|
+
return source.read(), None, _STRING_SOURCE
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def _to_xml_error(entry: etree._LogEntry, source: str) -> XmlError:
|
|
111
|
+
"""Convert one lxml error-log entry to an ``XmlError``."""
|
|
112
|
+
return XmlError(
|
|
113
|
+
line=entry.line,
|
|
114
|
+
column=entry.column,
|
|
115
|
+
message=entry.message,
|
|
116
|
+
source=source,
|
|
117
|
+
)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def _parse_bytes(
|
|
121
|
+
xml_bytes: bytes, source: str
|
|
122
|
+
) -> tuple[etree._ElementTree | None, list[XmlError]]:
|
|
123
|
+
"""Parse XML bytes leniently, collecting every well-formedness error.
|
|
124
|
+
|
|
125
|
+
CDATA and comments are preserved (``strip_cdata=False``). The parser's error
|
|
126
|
+
log is snapshotted immediately, before anything else touches the parser.
|
|
127
|
+
"""
|
|
128
|
+
parser = etree.XMLParser(recover=True, strip_cdata=False)
|
|
129
|
+
# third-party API: no LBYL form — lxml's recover=True parser still raises
|
|
130
|
+
# XMLSyntaxError on pathological input that the recovery path cannot
|
|
131
|
+
# salvage (e.g., a completely binary file with no XML structure).
|
|
132
|
+
try:
|
|
133
|
+
root = etree.fromstring(xml_bytes, parser)
|
|
134
|
+
except etree.XMLSyntaxError:
|
|
135
|
+
root = None
|
|
136
|
+
syntax_errors = [_to_xml_error(entry, source) for entry in parser.error_log]
|
|
137
|
+
if root is None:
|
|
138
|
+
if not syntax_errors:
|
|
139
|
+
syntax_errors = [XmlError(0, 0, "document could not be parsed", source)]
|
|
140
|
+
return None, syntax_errors
|
|
141
|
+
return root.getroottree(), syntax_errors
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def load_tool(source: Source) -> ToolDocument:
|
|
145
|
+
"""Parse a tool strictly; raise ``ToolXmlSyntaxError`` if it is malformed."""
|
|
146
|
+
xml_bytes, source_path, label = _read_source(source)
|
|
147
|
+
tree, syntax_errors = _parse_bytes(xml_bytes, label)
|
|
148
|
+
if tree is None or syntax_errors:
|
|
149
|
+
raise ToolXmlSyntaxError(syntax_errors)
|
|
150
|
+
return ToolDocument(tree, source_path=source_path)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def parse_tool(source: Source) -> ParseResult:
|
|
154
|
+
"""Parse a tool leniently, collecting every well-formedness error.
|
|
155
|
+
|
|
156
|
+
A ``ToolDocument`` is still built from the recovered tree whenever recovery
|
|
157
|
+
yields a usable root.
|
|
158
|
+
"""
|
|
159
|
+
xml_bytes, source_path, label = _read_source(source)
|
|
160
|
+
tree, syntax_errors = _parse_bytes(xml_bytes, label)
|
|
161
|
+
document = None if tree is None else ToolDocument(tree, source_path=source_path)
|
|
162
|
+
return ParseResult(document=document, syntax_errors=syntax_errors)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def load_macros(source: Source) -> MacroDocument:
|
|
166
|
+
"""Parse a macro-library file strictly; raise ``ToolXmlSyntaxError`` if malformed.
|
|
167
|
+
|
|
168
|
+
The macro-file counterpart to ``load_tool``. A macro library (a ``<macros>``
|
|
169
|
+
root holding ``<token>`` / ``<xml>`` / ``<macro>`` / ``<import>``) has no
|
|
170
|
+
standalone XSD, so this only parses — there is no ``validate`` for macro
|
|
171
|
+
files. The returned ``MacroDocument`` carries the mutable tree (CDATA,
|
|
172
|
+
comments, and attribute order preserved) and ``source_path``. The root tag
|
|
173
|
+
is not enforced; callers that need to distinguish a macro file from a tool
|
|
174
|
+
inspect ``document.root.tag``.
|
|
175
|
+
"""
|
|
176
|
+
xml_bytes, source_path, label = _read_source(source)
|
|
177
|
+
tree, syntax_errors = _parse_bytes(xml_bytes, label)
|
|
178
|
+
if tree is None or syntax_errors:
|
|
179
|
+
raise ToolXmlSyntaxError(syntax_errors)
|
|
180
|
+
return MacroDocument(tree, source_path=source_path)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def _source_label(document: ToolDocument) -> str:
|
|
184
|
+
"""Return the error-message source label for a document."""
|
|
185
|
+
return str(document.source_path) if document.source_path else _STRING_SOURCE
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def _schema_errors(
|
|
189
|
+
schema: etree.XMLSchema, tree: etree._ElementTree, source: str
|
|
190
|
+
) -> list[XmlError]:
|
|
191
|
+
"""Validate a tree against a compiled schema; return any schema errors."""
|
|
192
|
+
if schema.validate(tree):
|
|
193
|
+
return []
|
|
194
|
+
return [_to_xml_error(entry, source) for entry in schema.error_log]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def _tree_to_validate(
|
|
198
|
+
document: ToolDocument,
|
|
199
|
+
*,
|
|
200
|
+
macro_handling: str,
|
|
201
|
+
macros_present: bool,
|
|
202
|
+
path_target: Path | None,
|
|
203
|
+
) -> tuple[etree._ElementTree | None, list[MacroError]]:
|
|
204
|
+
"""Select the tree to validate per ``macro_handling``.
|
|
205
|
+
|
|
206
|
+
Returns ``(tree, macro_errors)``; a ``None`` tree means XSD validation must
|
|
207
|
+
be skipped (``skip`` mode with macros, or expansion produced no tree).
|
|
208
|
+
"""
|
|
209
|
+
if not macros_present or macro_handling == "off":
|
|
210
|
+
return document.tree, []
|
|
211
|
+
if macro_handling == "skip":
|
|
212
|
+
return None, []
|
|
213
|
+
if macro_handling == "strip":
|
|
214
|
+
return strip_macros(document.tree), []
|
|
215
|
+
if path_target is not None:
|
|
216
|
+
return expand_from_path(path_target)
|
|
217
|
+
source_dir = document.source_path.parent if document.source_path else None
|
|
218
|
+
return expand_from_tree(document.root, source_dir=source_dir)
|
|
219
|
+
|
|
220
|
+
|
|
221
|
+
def validate_tool(
|
|
222
|
+
target: Source | ToolDocument,
|
|
223
|
+
*,
|
|
224
|
+
profile: str | None = None,
|
|
225
|
+
on_missing: str = "nearest",
|
|
226
|
+
macro_handling: str = "expand",
|
|
227
|
+
) -> ValidationResult:
|
|
228
|
+
"""Validate a tool against the profile-appropriate vendored XSD.
|
|
229
|
+
|
|
230
|
+
``target`` is a source (path, ``bytes``, or binary stream) or an already
|
|
231
|
+
parsed ``ToolDocument``. The profile is resolved from ``profile``, then the
|
|
232
|
+
tool's own ``profile`` attribute, then — when neither is set — Galaxy's
|
|
233
|
+
``16.01`` legacy default, which resolves to the nearest vendored XSD (``16.10``,
|
|
234
|
+
the oldest). See ``profiles.resolve_profile`` / ``docs/decisions.md`` §1.5.
|
|
235
|
+
|
|
236
|
+
Because the Galaxy XSD is a post-macro-expansion schema, ``macro_handling``
|
|
237
|
+
controls how macros are dealt with before validation: ``off`` validates the
|
|
238
|
+
tree as-is, ``skip`` skips validation when macros are present, ``strip``
|
|
239
|
+
validates the tree with ``<expand>``/``<macros>`` removed, and ``expand``
|
|
240
|
+
(the default) validates the fully expanded tool. The ``ToolDocument``'s tree
|
|
241
|
+
is never mutated. Raises ``UnknownProfileError`` only when
|
|
242
|
+
``on_missing="exact"``.
|
|
243
|
+
"""
|
|
244
|
+
if macro_handling not in _MACRO_MODES:
|
|
245
|
+
raise ValueError(f"macro_handling must be one of {sorted(_MACRO_MODES)}")
|
|
246
|
+
|
|
247
|
+
if isinstance(target, ToolDocument):
|
|
248
|
+
document: ToolDocument | None = target
|
|
249
|
+
syntax_errors: list[XmlError] = []
|
|
250
|
+
path_target = None
|
|
251
|
+
else:
|
|
252
|
+
result = parse_tool(target)
|
|
253
|
+
document = result.document
|
|
254
|
+
syntax_errors = result.syntax_errors
|
|
255
|
+
path_target = Path(target) if isinstance(target, str | Path) else None
|
|
256
|
+
|
|
257
|
+
if document is None:
|
|
258
|
+
return ValidationResult(
|
|
259
|
+
validated=False,
|
|
260
|
+
schema_version="",
|
|
261
|
+
declared_profile=None,
|
|
262
|
+
macro_handling=macro_handling,
|
|
263
|
+
macros_present=False,
|
|
264
|
+
syntax_errors=syntax_errors,
|
|
265
|
+
errors=[],
|
|
266
|
+
macro_errors=[],
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
declared_profile = document.profile
|
|
270
|
+
macros_present = has_macros(document.root)
|
|
271
|
+
schema_version = resolve_profile(
|
|
272
|
+
profile if profile is not None else declared_profile, on_missing=on_missing
|
|
273
|
+
)
|
|
274
|
+
tree, macro_errors = _tree_to_validate(
|
|
275
|
+
document,
|
|
276
|
+
macro_handling=macro_handling,
|
|
277
|
+
macros_present=macros_present,
|
|
278
|
+
path_target=path_target,
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
if tree is None:
|
|
282
|
+
return ValidationResult(
|
|
283
|
+
validated=False,
|
|
284
|
+
schema_version=schema_version,
|
|
285
|
+
declared_profile=declared_profile,
|
|
286
|
+
macro_handling=macro_handling,
|
|
287
|
+
macros_present=macros_present,
|
|
288
|
+
syntax_errors=syntax_errors,
|
|
289
|
+
errors=[],
|
|
290
|
+
macro_errors=macro_errors,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
errors = _schema_errors(
|
|
294
|
+
compiled_schema(schema_version), tree, _source_label(document)
|
|
295
|
+
)
|
|
296
|
+
return ValidationResult(
|
|
297
|
+
validated=True,
|
|
298
|
+
schema_version=schema_version,
|
|
299
|
+
declared_profile=declared_profile,
|
|
300
|
+
macro_handling=macro_handling,
|
|
301
|
+
macros_present=macros_present,
|
|
302
|
+
syntax_errors=syntax_errors,
|
|
303
|
+
errors=errors,
|
|
304
|
+
macro_errors=macro_errors,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def newest_valid_profile(target: Source | ToolDocument) -> str | None:
|
|
309
|
+
"""Return the newest vendored profile whose XSD the tool satisfies.
|
|
310
|
+
|
|
311
|
+
The tool is validated — with macros expanded — against each vendored profile
|
|
312
|
+
from newest to oldest, and the first profile that validates cleanly is
|
|
313
|
+
returned. ``None`` means no vendored profile validates, including when the
|
|
314
|
+
tool is malformed or its macros cannot be expanded.
|
|
315
|
+
|
|
316
|
+
The scan stops at the first (newest) profile that validates and assumes
|
|
317
|
+
nothing about the older ones — a tool's valid profiles are often *not* a
|
|
318
|
+
contiguous range of releases (2.58% have gaps; see
|
|
319
|
+
``docs/decisions.md`` §10.3). It is O(1) when the tool validates at the
|
|
320
|
+
latest profile, which is the case for 90.1% of unique tools in the
|
|
321
|
+
2026-05-27 combined sweep (§10.5).
|
|
322
|
+
"""
|
|
323
|
+
# Prefer a filesystem path: validate_tool then resolves macros via
|
|
324
|
+
# expand_from_path, which follows transitive <import>s. A ToolDocument may
|
|
325
|
+
# carry a mutated tree, so it is validated as-is.
|
|
326
|
+
if isinstance(target, str | Path):
|
|
327
|
+
probe: Source | ToolDocument = Path(target)
|
|
328
|
+
elif isinstance(target, ToolDocument):
|
|
329
|
+
probe = target
|
|
330
|
+
else:
|
|
331
|
+
parsed = parse_tool(target).document
|
|
332
|
+
if parsed is None:
|
|
333
|
+
return None
|
|
334
|
+
probe = parsed
|
|
335
|
+
for version in reversed(available_profiles()):
|
|
336
|
+
if validate_tool(probe, profile=version).valid:
|
|
337
|
+
return version
|
|
338
|
+
return None
|