bactopia 2.0.0__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bactopia-2.0.0 → bactopia-2.0.2}/PKG-INFO +11 -11
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/catalog.py +37 -79
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/module_rules.py +87 -2
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/subworkflow_rules.py +254 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/runner.py +4 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/nf.py +200 -43
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/outputs.py +11 -4
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/nextflow.config.j2 +5 -5
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/utils.py +4 -2
- {bactopia-2.0.0 → bactopia-2.0.2}/pyproject.toml +11 -11
- {bactopia-2.0.0 → bactopia-2.0.2}/LICENSE +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/README.md +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/atb.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/atb_downloader.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/atb_formatter.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/citations.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/datasets.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/download.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/helpers/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/helpers/merge_schemas.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/jsonify.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/lint.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/bracken_to_excel.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/check_assembly_accession.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/check_fastqs.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/cleanup_coverage.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/kraken_bracken_summary.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/mask_consensus.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/scrubber_summary.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/teton_prepare.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/prepare.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/prune.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pubmlst/build.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pubmlst/setup.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/review.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/search.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/status.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/summary.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/testing.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/update.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/workflows.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/ena.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/ncbi.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/constants.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/utils.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/models.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/workflow_rules.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parse.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/amrfinderplus.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/annotator.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/ariba.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/assembler.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/blast.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/citations.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/coverage.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/error.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/gather.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/generic.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/kraken.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/mapping.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/mlst.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/nextflow.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/parsables.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/qc.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/sketcher.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/variants.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/versions.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/workflows.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/reports/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/reports/templates/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/summary.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/__init__.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/logos.py +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/params.config.j2 +0 -0
- {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/process.config.j2 +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: bactopia
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: A Python package for working with Bactopia
|
|
5
5
|
License: MIT
|
|
6
6
|
License-File: LICENSE
|
|
@@ -15,16 +15,16 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.12
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.13
|
|
17
17
|
Classifier: Programming Language :: Python :: 3.14
|
|
18
|
-
Requires-Dist: biopython (>=1.80
|
|
19
|
-
Requires-Dist: jinja2 (>=3.1.6
|
|
20
|
-
Requires-Dist: openpyxl (>=3.1.0
|
|
21
|
-
Requires-Dist: pandas (>=2.2.0
|
|
22
|
-
Requires-Dist: pyyaml (>=6.0
|
|
23
|
-
Requires-Dist: rauth (>=0.7.3
|
|
24
|
-
Requires-Dist: requests (>=2.28.2
|
|
25
|
-
Requires-Dist: rich (>=13.3.1
|
|
26
|
-
Requires-Dist: rich-click (>=1.6.1
|
|
27
|
-
Requires-Dist: tqdm (>=4.66.5
|
|
18
|
+
Requires-Dist: biopython (>=1.80)
|
|
19
|
+
Requires-Dist: jinja2 (>=3.1.6)
|
|
20
|
+
Requires-Dist: openpyxl (>=3.1.0)
|
|
21
|
+
Requires-Dist: pandas (>=2.2.0)
|
|
22
|
+
Requires-Dist: pyyaml (>=6.0)
|
|
23
|
+
Requires-Dist: rauth (>=0.7.3)
|
|
24
|
+
Requires-Dist: requests (>=2.28.2)
|
|
25
|
+
Requires-Dist: rich (>=13.3.1)
|
|
26
|
+
Requires-Dist: rich-click (>=1.6.1)
|
|
27
|
+
Requires-Dist: tqdm (>=4.66.5)
|
|
28
28
|
Project-URL: Homepage, https://bactopia.github.io/
|
|
29
29
|
Project-URL: Repository, https://github.com/bactopia/bactopia-py
|
|
30
30
|
Description-Content-Type: text/markdown
|
|
@@ -16,7 +16,9 @@ from rich.logging import RichHandler
|
|
|
16
16
|
import bactopia
|
|
17
17
|
from bactopia.nf import (
|
|
18
18
|
find_main_nf,
|
|
19
|
+
get_bactopia_version,
|
|
19
20
|
parse_groovydoc_full,
|
|
21
|
+
parse_includes,
|
|
20
22
|
parse_main_nf_structure,
|
|
21
23
|
parse_module_config_full,
|
|
22
24
|
parse_workflow_config,
|
|
@@ -28,71 +30,6 @@ rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)
|
|
|
28
30
|
click.rich_click.USE_RICH_MARKUP = True
|
|
29
31
|
|
|
30
32
|
|
|
31
|
-
def _parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
|
|
32
|
-
"""Parse include statements from a main.nf file.
|
|
33
|
-
|
|
34
|
-
Resolves source paths against the file's directory and the repo root
|
|
35
|
-
to derive normalized component keys (lowercase, underscore-separated).
|
|
36
|
-
|
|
37
|
-
Returns dict with:
|
|
38
|
-
modules: list of module keys (e.g., "abricate_run")
|
|
39
|
-
subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
|
|
40
|
-
plugins: list of plugin function names
|
|
41
|
-
"""
|
|
42
|
-
result = {"modules": [], "subworkflows": [], "plugins": []}
|
|
43
|
-
if not main_nf.exists():
|
|
44
|
-
return result
|
|
45
|
-
|
|
46
|
-
try:
|
|
47
|
-
text = main_nf.read_text()
|
|
48
|
-
except OSError:
|
|
49
|
-
return result
|
|
50
|
-
|
|
51
|
-
seen_modules = set()
|
|
52
|
-
seen_subworkflows = set()
|
|
53
|
-
|
|
54
|
-
for m in re.finditer(
|
|
55
|
-
r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
|
|
56
|
-
text,
|
|
57
|
-
):
|
|
58
|
-
source = m.group(2)
|
|
59
|
-
|
|
60
|
-
if "plugin/" in source:
|
|
61
|
-
result["plugins"].append(m.group(1))
|
|
62
|
-
continue
|
|
63
|
-
|
|
64
|
-
# Resolve the source path relative to the file's directory
|
|
65
|
-
# Nextflow source paths omit .nf extension; parent of resolved path
|
|
66
|
-
# is the component directory
|
|
67
|
-
resolved = (main_nf.parent / source).resolve()
|
|
68
|
-
|
|
69
|
-
try:
|
|
70
|
-
rel_str = str(resolved.relative_to(bactopia_path))
|
|
71
|
-
except ValueError:
|
|
72
|
-
continue
|
|
73
|
-
|
|
74
|
-
if rel_str.startswith("modules/"):
|
|
75
|
-
# e.g., "modules/abricate/run/main" -> "abricate/run"
|
|
76
|
-
component = rel_str.removeprefix("modules/")
|
|
77
|
-
if component.endswith("/main"):
|
|
78
|
-
component = component[:-5]
|
|
79
|
-
key = component.replace("/", "_")
|
|
80
|
-
if key not in seen_modules:
|
|
81
|
-
seen_modules.add(key)
|
|
82
|
-
result["modules"].append(key)
|
|
83
|
-
elif rel_str.startswith("subworkflows/"):
|
|
84
|
-
# e.g., "subworkflows/bactopia/gather/main" -> "bactopia/gather"
|
|
85
|
-
component = rel_str.removeprefix("subworkflows/")
|
|
86
|
-
if component.endswith("/main"):
|
|
87
|
-
component = component[:-5]
|
|
88
|
-
key = component.replace("/", "_")
|
|
89
|
-
if key not in seen_subworkflows:
|
|
90
|
-
seen_subworkflows.add(key)
|
|
91
|
-
result["subworkflows"].append(key)
|
|
92
|
-
|
|
93
|
-
return result
|
|
94
|
-
|
|
95
|
-
|
|
96
33
|
def _extract_description(groovydoc: dict) -> str:
|
|
97
34
|
"""Extract the first line description from GroovyDoc raw lines."""
|
|
98
35
|
if not groovydoc.get("has_doc") or not groovydoc.get("raw_lines"):
|
|
@@ -116,7 +53,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
|
|
|
116
53
|
Dict mapping channel names to lists of field names, e.g.,
|
|
117
54
|
{"sample_outputs": ["gff", "gbk", ...], "run_outputs": []}.
|
|
118
55
|
"""
|
|
119
|
-
field_pattern = re.compile(r"\*\s*-\s*`(\w
|
|
56
|
+
field_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
|
|
120
57
|
output_pattern = re.compile(r"\*\s*@output\s+(\S+)")
|
|
121
58
|
tag_pattern = re.compile(r"\*\s*@(?!output)\w+")
|
|
122
59
|
|
|
@@ -140,7 +77,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
|
|
|
140
77
|
if current_channel is not None:
|
|
141
78
|
fm = field_pattern.search(line)
|
|
142
79
|
if fm:
|
|
143
|
-
channels[current_channel].append(fm.group(1))
|
|
80
|
+
channels[current_channel].append(fm.group(1).rstrip("?"))
|
|
144
81
|
|
|
145
82
|
return channels
|
|
146
83
|
|
|
@@ -210,14 +147,16 @@ def _clean_scope(raw: str) -> str:
|
|
|
210
147
|
return raw.strip().strip('"').strip("'")
|
|
211
148
|
|
|
212
149
|
|
|
213
|
-
def _build_module_entry(
|
|
150
|
+
def _build_module_entry(
|
|
151
|
+
component_name: str, main_nf: Path, bactopia_path: Path
|
|
152
|
+
) -> dict:
|
|
214
153
|
"""Build a catalog entry for a module."""
|
|
215
154
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
216
155
|
config = parse_module_config_full(main_nf.parent / "module.config")
|
|
217
156
|
|
|
218
157
|
entry = {
|
|
219
158
|
"description": _extract_description(groovydoc),
|
|
220
|
-
"path": str(main_nf.parent.relative_to(
|
|
159
|
+
"path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
|
|
221
160
|
}
|
|
222
161
|
|
|
223
162
|
# Scope and process_name from config
|
|
@@ -235,6 +174,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
|
|
|
235
174
|
fields = groovydoc["doc_input_records"][0].get("fields", [])
|
|
236
175
|
if fields:
|
|
237
176
|
entry["takes"] = [f for f in fields if f != "meta"]
|
|
177
|
+
optional_input = groovydoc.get("doc_optional_input_fields", set())
|
|
178
|
+
if optional_input:
|
|
179
|
+
takes_opt = [f for f in entry["takes"] if f in optional_input]
|
|
180
|
+
if takes_opt:
|
|
181
|
+
entry["takes_optional"] = takes_opt
|
|
238
182
|
|
|
239
183
|
# Emits from GroovyDoc @output (named fields only)
|
|
240
184
|
if groovydoc.get("doc_output_fields"):
|
|
@@ -242,6 +186,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
|
|
|
242
186
|
named = [f for f in groovydoc["doc_output_fields"] if f not in standard]
|
|
243
187
|
if named:
|
|
244
188
|
entry["emits"] = named
|
|
189
|
+
optional_output = groovydoc.get("doc_optional_output_fields", set())
|
|
190
|
+
if optional_output:
|
|
191
|
+
emits_opt = [f for f in named if f in optional_output]
|
|
192
|
+
if emits_opt:
|
|
193
|
+
entry["emits_optional"] = emits_opt
|
|
245
194
|
|
|
246
195
|
# Tags from GroovyDoc @tags
|
|
247
196
|
parsed_tags = _parse_tags(groovydoc)
|
|
@@ -256,11 +205,11 @@ def _build_subworkflow_entry(
|
|
|
256
205
|
) -> dict:
|
|
257
206
|
"""Build a catalog entry for a subworkflow."""
|
|
258
207
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
259
|
-
includes =
|
|
208
|
+
includes = parse_includes(main_nf, bactopia_path)
|
|
260
209
|
|
|
261
210
|
entry = {
|
|
262
211
|
"description": _extract_description(groovydoc),
|
|
263
|
-
"path": str(main_nf.parent.relative_to(
|
|
212
|
+
"path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
|
|
264
213
|
}
|
|
265
214
|
|
|
266
215
|
# Takes from GroovyDoc @input
|
|
@@ -268,8 +217,18 @@ def _build_subworkflow_entry(
|
|
|
268
217
|
fields = groovydoc["doc_input_records"][0].get("fields", [])
|
|
269
218
|
if fields:
|
|
270
219
|
entry["takes"] = [f for f in fields if f != "meta"]
|
|
220
|
+
optional_input = groovydoc.get("doc_optional_input_fields", set())
|
|
221
|
+
if optional_input:
|
|
222
|
+
takes_opt = [f for f in entry["takes"] if f in optional_input]
|
|
223
|
+
if takes_opt:
|
|
224
|
+
entry["takes_optional"] = takes_opt
|
|
271
225
|
if groovydoc.get("doc_input_params"):
|
|
272
226
|
entry["takes_params"] = groovydoc["doc_input_params"]
|
|
227
|
+
optional_params = groovydoc.get("doc_optional_input_params", set())
|
|
228
|
+
if optional_params:
|
|
229
|
+
params_opt = [p for p in entry["takes_params"] if p in optional_params]
|
|
230
|
+
if params_opt:
|
|
231
|
+
entry["takes_params_optional"] = params_opt
|
|
273
232
|
|
|
274
233
|
# Emits from GroovyDoc @output -- structured as channel -> fields dict
|
|
275
234
|
tags = groovydoc.get("tags", {})
|
|
@@ -305,14 +264,14 @@ def _build_workflow_entry(
|
|
|
305
264
|
) -> dict:
|
|
306
265
|
"""Build a catalog entry for a workflow."""
|
|
307
266
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
308
|
-
includes =
|
|
267
|
+
includes = parse_includes(main_nf, bactopia_path)
|
|
309
268
|
|
|
310
269
|
# Determine type
|
|
311
270
|
is_tool = "bactopia-tools/" in str(main_nf)
|
|
312
|
-
wf_path = str(main_nf.parent.relative_to(
|
|
271
|
+
wf_path = str(main_nf.parent.relative_to(bactopia_path))
|
|
313
272
|
# Add trailing slash for tool/named workflow directories, but not for the
|
|
314
273
|
# root bactopia workflow which uses a Nextflow convention path
|
|
315
|
-
if is_tool or wf_path != "
|
|
274
|
+
if is_tool or wf_path != ".":
|
|
316
275
|
wf_path += "/"
|
|
317
276
|
entry = {
|
|
318
277
|
"description": _extract_description(groovydoc),
|
|
@@ -345,14 +304,11 @@ def generate_catalog(bactopia_path: Path) -> dict:
|
|
|
345
304
|
The catalog dict ready for JSON serialization.
|
|
346
305
|
"""
|
|
347
306
|
# Extract versions from nextflow.config
|
|
348
|
-
bactopia_version =
|
|
307
|
+
bactopia_version = get_bactopia_version(bactopia_path)
|
|
349
308
|
plugin_version = "unknown"
|
|
350
309
|
nf_config = bactopia_path / "nextflow.config"
|
|
351
310
|
if nf_config.exists():
|
|
352
311
|
for line in nf_config.read_text().splitlines():
|
|
353
|
-
m = re.match(r"\s*params\.bactopia_version\s*=\s*['\"]([^'\"]+)['\"]", line)
|
|
354
|
-
if m:
|
|
355
|
-
bactopia_version = m.group(1)
|
|
356
312
|
m = re.match(r"\s*id\s+['\"]nf-bactopia@([^'\"]+)['\"]", line)
|
|
357
313
|
if m:
|
|
358
314
|
plugin_version = m.group(1)
|
|
@@ -381,7 +337,9 @@ def generate_catalog(bactopia_path: Path) -> dict:
|
|
|
381
337
|
component_name = str(rel).replace("modules/", "")
|
|
382
338
|
# Normalize key: slash to underscore (e.g., "abricate/run" -> "abricate_run")
|
|
383
339
|
key = component_name.replace("/", "_")
|
|
384
|
-
catalog["modules"][key] = _build_module_entry(
|
|
340
|
+
catalog["modules"][key] = _build_module_entry(
|
|
341
|
+
component_name, main_nf, bactopia_path
|
|
342
|
+
)
|
|
385
343
|
|
|
386
344
|
# Subworkflows
|
|
387
345
|
subworkflows_dir = bactopia_path / "subworkflows"
|
|
@@ -291,7 +291,7 @@ PASSTHROUGH_OUTPUT_FIELDS = {"r1", "r2", "se", "lr"}
|
|
|
291
291
|
|
|
292
292
|
|
|
293
293
|
def rule_m017(component: str, ctx: dict) -> list[LintResult]:
|
|
294
|
-
"""prefix = task.ext.prefix ?: "${
|
|
294
|
+
"""prefix = task.ext.prefix ?: "${_meta.name}" present."""
|
|
295
295
|
rid = "M017"
|
|
296
296
|
if ctx["structure"]["has_prefix_definition"]:
|
|
297
297
|
return [_pass(rid, component, "prefix definition present")]
|
|
@@ -299,7 +299,7 @@ def rule_m017(component: str, ctx: dict) -> list[LintResult]:
|
|
|
299
299
|
_fail(
|
|
300
300
|
rid,
|
|
301
301
|
component,
|
|
302
|
-
'Missing: prefix = task.ext.prefix ?: "${
|
|
302
|
+
'Missing: prefix = task.ext.prefix ?: "${_meta.name}"',
|
|
303
303
|
)
|
|
304
304
|
]
|
|
305
305
|
|
|
@@ -958,6 +958,90 @@ def rule_m032(component: str, ctx: dict) -> list[LintResult]:
|
|
|
958
958
|
return [_fail(rid, component, f"@input record field mismatch: {'; '.join(msgs)}")]
|
|
959
959
|
|
|
960
960
|
|
|
961
|
+
def rule_m033(component: str, ctx: dict) -> list[LintResult]:
|
|
962
|
+
"""Optionality markers (?) match between GroovyDoc and code."""
|
|
963
|
+
rid = "M033"
|
|
964
|
+
doc = ctx["groovydoc"]
|
|
965
|
+
struct = ctx["structure"]
|
|
966
|
+
if not doc["has_doc"]:
|
|
967
|
+
return [] # M006 covers this
|
|
968
|
+
|
|
969
|
+
mismatches = []
|
|
970
|
+
|
|
971
|
+
# --- Input record field optionality ---
|
|
972
|
+
doc_records = doc.get("doc_input_records", [])
|
|
973
|
+
code_input_fields = struct.get("input_record_fields", [])
|
|
974
|
+
if doc_records and code_input_fields:
|
|
975
|
+
doc_optional = doc.get("doc_optional_input_fields", set())
|
|
976
|
+
code_optional = struct.get("code_optional_input_fields", set())
|
|
977
|
+
common = set(doc_records[0]["fields"]) & set(code_input_fields)
|
|
978
|
+
for field in sorted(common):
|
|
979
|
+
in_doc = field in doc_optional
|
|
980
|
+
in_code = field in code_optional
|
|
981
|
+
if in_doc and not in_code:
|
|
982
|
+
mismatches.append(
|
|
983
|
+
f"input record field '{field}': doc has ? but code does not"
|
|
984
|
+
)
|
|
985
|
+
elif in_code and not in_doc:
|
|
986
|
+
mismatches.append(
|
|
987
|
+
f"input record field '{field}': code has ? but doc does not"
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
# --- Input params optionality ---
|
|
991
|
+
doc_params = doc.get("doc_input_params", [])
|
|
992
|
+
code_params = struct.get("input_params", [])
|
|
993
|
+
if doc_params and code_params:
|
|
994
|
+
doc_opt_params = doc.get("doc_optional_input_params", set())
|
|
995
|
+
code_opt_params = struct.get("code_optional_input_params", set())
|
|
996
|
+
common = set(doc_params) & set(code_params)
|
|
997
|
+
for param in sorted(common):
|
|
998
|
+
in_doc = param in doc_opt_params
|
|
999
|
+
in_code = param in code_opt_params
|
|
1000
|
+
if in_doc and not in_code:
|
|
1001
|
+
mismatches.append(f"input param '{param}': doc has ? but code does not")
|
|
1002
|
+
elif in_code and not in_doc:
|
|
1003
|
+
mismatches.append(f"input param '{param}': code has ? but doc does not")
|
|
1004
|
+
|
|
1005
|
+
# --- Output record field optionality ---
|
|
1006
|
+
doc_output_fields = doc.get("doc_output_fields", [])
|
|
1007
|
+
code_output_fields = struct.get("output_record_fields", [])
|
|
1008
|
+
if doc_output_fields and code_output_fields:
|
|
1009
|
+
doc_opt_output = doc.get("doc_optional_output_fields", set())
|
|
1010
|
+
code_opt_output = struct.get("code_optional_output_fields", set())
|
|
1011
|
+
common = set(doc_output_fields) & set(code_output_fields)
|
|
1012
|
+
common -= STANDARD_OUTPUT_FIELDS
|
|
1013
|
+
for field in sorted(common):
|
|
1014
|
+
in_doc = field in doc_opt_output
|
|
1015
|
+
in_code = field in code_opt_output
|
|
1016
|
+
if in_doc and not in_code:
|
|
1017
|
+
mismatches.append(
|
|
1018
|
+
f"output field '{field}': doc has ? but code missing optional: true"
|
|
1019
|
+
)
|
|
1020
|
+
elif in_code and not in_doc:
|
|
1021
|
+
mismatches.append(
|
|
1022
|
+
f"output field '{field}': code has optional: true but doc missing ?"
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
if mismatches:
|
|
1026
|
+
return [_fail(rid, component, f"Optionality mismatch: {'; '.join(mismatches)}")]
|
|
1027
|
+
|
|
1028
|
+
# Only PASS if there was something to check
|
|
1029
|
+
has_checks = (
|
|
1030
|
+
(doc_records and code_input_fields)
|
|
1031
|
+
or (doc_params and code_params)
|
|
1032
|
+
or (doc_output_fields and code_output_fields)
|
|
1033
|
+
)
|
|
1034
|
+
if has_checks:
|
|
1035
|
+
return [
|
|
1036
|
+
_pass(
|
|
1037
|
+
rid,
|
|
1038
|
+
component,
|
|
1039
|
+
"Optionality markers match between GroovyDoc and code",
|
|
1040
|
+
)
|
|
1041
|
+
]
|
|
1042
|
+
return []
|
|
1043
|
+
|
|
1044
|
+
|
|
961
1045
|
def rule_m034(component: str, ctx: dict) -> list[LintResult]:
|
|
962
1046
|
"""@output does not describe standard fields (meta, results, logs, nf_logs, versions)."""
|
|
963
1047
|
rid = "M034"
|
|
@@ -1158,6 +1242,7 @@ MODULE_RULES = [
|
|
|
1158
1242
|
# GroovyDoc accuracy
|
|
1159
1243
|
rule_m031,
|
|
1160
1244
|
rule_m032,
|
|
1245
|
+
rule_m033,
|
|
1161
1246
|
rule_m034,
|
|
1162
1247
|
rule_m035,
|
|
1163
1248
|
rule_m036,
|
|
@@ -285,6 +285,252 @@ def rule_s016(component: str, ctx: dict) -> list[LintResult]:
|
|
|
285
285
|
]
|
|
286
286
|
|
|
287
287
|
|
|
288
|
+
def _parse_doc_component_list(tag_value: str) -> set[str]:
|
|
289
|
+
"""Parse a @modules or @subworkflows tag value into a set of normalized names.
|
|
290
|
+
|
|
291
|
+
Handles comma-separated names with optional 'as alias' notation.
|
|
292
|
+
E.g., "prokka as prokka_module, csvtk_concat" -> {"prokka", "csvtk_concat"}
|
|
293
|
+
"""
|
|
294
|
+
names = set()
|
|
295
|
+
if not tag_value:
|
|
296
|
+
return names
|
|
297
|
+
for entry in tag_value.split(","):
|
|
298
|
+
entry = entry.strip()
|
|
299
|
+
if not entry:
|
|
300
|
+
continue
|
|
301
|
+
# Handle "name as alias" notation -- extract base name
|
|
302
|
+
parts = entry.split()
|
|
303
|
+
if len(parts) >= 3 and parts[1] == "as":
|
|
304
|
+
names.add(parts[0])
|
|
305
|
+
else:
|
|
306
|
+
names.add(parts[0])
|
|
307
|
+
return names
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def rule_s017(component: str, ctx: dict) -> list[LintResult]:
|
|
311
|
+
"""@modules match actual module includes."""
|
|
312
|
+
rid = "S017"
|
|
313
|
+
doc = ctx["groovydoc"]
|
|
314
|
+
if not doc["has_doc"]:
|
|
315
|
+
return []
|
|
316
|
+
includes = ctx.get("includes", {})
|
|
317
|
+
actual_modules = set(includes.get("modules", []))
|
|
318
|
+
doc_value = doc["tags"].get("modules", "")
|
|
319
|
+
doc_modules = _parse_doc_component_list(doc_value)
|
|
320
|
+
# Skip if neither GroovyDoc nor includes mention modules
|
|
321
|
+
if not actual_modules and not doc_modules:
|
|
322
|
+
return []
|
|
323
|
+
if doc_modules == actual_modules:
|
|
324
|
+
return [_pass(rid, component, "@modules match actual includes")]
|
|
325
|
+
missing = actual_modules - doc_modules
|
|
326
|
+
extra = doc_modules - actual_modules
|
|
327
|
+
parts = []
|
|
328
|
+
if missing:
|
|
329
|
+
parts.append(f"missing from @modules: {', '.join(sorted(missing))}")
|
|
330
|
+
if extra:
|
|
331
|
+
parts.append(f"extra in @modules: {', '.join(sorted(extra))}")
|
|
332
|
+
return [_fail(rid, component, f"@modules mismatch: {'; '.join(parts)}")]
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def rule_s018(component: str, ctx: dict) -> list[LintResult]:
|
|
336
|
+
"""@subworkflows match actual subworkflow includes."""
|
|
337
|
+
rid = "S018"
|
|
338
|
+
doc = ctx["groovydoc"]
|
|
339
|
+
if not doc["has_doc"]:
|
|
340
|
+
return []
|
|
341
|
+
includes = ctx.get("includes", {})
|
|
342
|
+
actual_subs = set(includes.get("subworkflows", []))
|
|
343
|
+
doc_value = doc["tags"].get("subworkflows", "")
|
|
344
|
+
doc_subs = _parse_doc_component_list(doc_value)
|
|
345
|
+
# Skip if neither GroovyDoc nor includes mention subworkflows
|
|
346
|
+
if not actual_subs and not doc_subs:
|
|
347
|
+
return []
|
|
348
|
+
if doc_subs == actual_subs:
|
|
349
|
+
return [_pass(rid, component, "@subworkflows match actual includes")]
|
|
350
|
+
missing = actual_subs - doc_subs
|
|
351
|
+
extra = doc_subs - actual_subs
|
|
352
|
+
parts = []
|
|
353
|
+
if missing:
|
|
354
|
+
parts.append(f"missing from @subworkflows: {', '.join(sorted(missing))}")
|
|
355
|
+
if extra:
|
|
356
|
+
parts.append(f"extra in @subworkflows: {', '.join(sorted(extra))}")
|
|
357
|
+
return [_fail(rid, component, f"@subworkflows mismatch: {'; '.join(parts)}")]
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def rule_s019(component: str, ctx: dict) -> list[LintResult]:
|
|
361
|
+
"""@citation keys exist in data/citations.yml."""
|
|
362
|
+
rid = "S019"
|
|
363
|
+
doc = ctx["groovydoc"]
|
|
364
|
+
if not doc["has_doc"]:
|
|
365
|
+
return []
|
|
366
|
+
citation_value = doc["tags"].get("citation", "")
|
|
367
|
+
if not citation_value:
|
|
368
|
+
return [] # S003 covers missing @citation
|
|
369
|
+
citation_keys = ctx.get("citation_keys", set())
|
|
370
|
+
if not citation_keys:
|
|
371
|
+
return [] # citations.yml not available -- skip check
|
|
372
|
+
keys = [k.strip() for k in citation_value.split(",")]
|
|
373
|
+
invalid = [k for k in keys if k and k not in citation_keys]
|
|
374
|
+
if invalid:
|
|
375
|
+
return [
|
|
376
|
+
_fail(
|
|
377
|
+
rid,
|
|
378
|
+
component,
|
|
379
|
+
f"@citation keys not in citations.yml: {', '.join(invalid)}",
|
|
380
|
+
)
|
|
381
|
+
]
|
|
382
|
+
return [_pass(rid, component, "All @citation keys are valid")]
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def rule_s020(component: str, ctx: dict) -> list[LintResult]:
|
|
386
|
+
"""@tags complexity value is valid."""
|
|
387
|
+
rid = "S020"
|
|
388
|
+
tags = ctx["groovydoc"]["tags"]
|
|
389
|
+
tags_value = tags.get("tags", "")
|
|
390
|
+
if not tags_value:
|
|
391
|
+
return []
|
|
392
|
+
parsed = _parse_tags_field(tags_value)
|
|
393
|
+
complexity = parsed.get("complexity", "")
|
|
394
|
+
if not complexity:
|
|
395
|
+
return []
|
|
396
|
+
valid = {"simple", "moderate", "complex"}
|
|
397
|
+
if complexity in valid:
|
|
398
|
+
return [_pass(rid, component, f"complexity:{complexity} is valid")]
|
|
399
|
+
return [
|
|
400
|
+
_warn(
|
|
401
|
+
rid,
|
|
402
|
+
component,
|
|
403
|
+
f"Invalid complexity value '{complexity}', expected one of: {', '.join(sorted(valid))}",
|
|
404
|
+
)
|
|
405
|
+
]
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def rule_s021(component: str, ctx: dict) -> list[LintResult]:
|
|
409
|
+
"""@tags input-type value is valid."""
|
|
410
|
+
rid = "S021"
|
|
411
|
+
tags = ctx["groovydoc"]["tags"]
|
|
412
|
+
tags_value = tags.get("tags", "")
|
|
413
|
+
if not tags_value:
|
|
414
|
+
return []
|
|
415
|
+
parsed = _parse_tags_field(tags_value)
|
|
416
|
+
input_type = parsed.get("input-type", "")
|
|
417
|
+
if not input_type:
|
|
418
|
+
return []
|
|
419
|
+
valid = {"none", "single", "multiple", "parameter"}
|
|
420
|
+
if input_type in valid:
|
|
421
|
+
return [_pass(rid, component, f"input-type:{input_type} is valid")]
|
|
422
|
+
return [
|
|
423
|
+
_warn(
|
|
424
|
+
rid,
|
|
425
|
+
component,
|
|
426
|
+
f"Invalid input-type value '{input_type}', expected one of: {', '.join(sorted(valid))}",
|
|
427
|
+
)
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def rule_s022(component: str, ctx: dict) -> list[LintResult]:
|
|
432
|
+
"""@tags output-type value is valid."""
|
|
433
|
+
rid = "S022"
|
|
434
|
+
tags = ctx["groovydoc"]["tags"]
|
|
435
|
+
tags_value = tags.get("tags", "")
|
|
436
|
+
if not tags_value:
|
|
437
|
+
return []
|
|
438
|
+
parsed = _parse_tags_field(tags_value)
|
|
439
|
+
output_type = parsed.get("output-type", "")
|
|
440
|
+
if not output_type:
|
|
441
|
+
return []
|
|
442
|
+
valid = {"single", "multiple"}
|
|
443
|
+
if output_type in valid:
|
|
444
|
+
return [_pass(rid, component, f"output-type:{output_type} is valid")]
|
|
445
|
+
return [
|
|
446
|
+
_warn(
|
|
447
|
+
rid,
|
|
448
|
+
component,
|
|
449
|
+
f"Invalid output-type value '{output_type}', expected one of: {', '.join(sorted(valid))}",
|
|
450
|
+
)
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
VALID_FEATURES = {
|
|
455
|
+
"aggregation",
|
|
456
|
+
"alternative-execution",
|
|
457
|
+
"archive-output",
|
|
458
|
+
"components",
|
|
459
|
+
"compression",
|
|
460
|
+
"conditional-input",
|
|
461
|
+
"conditional-logic",
|
|
462
|
+
"database-dependent",
|
|
463
|
+
"internet-access",
|
|
464
|
+
"no-test",
|
|
465
|
+
"resource-download",
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def rule_s023(component: str, ctx: dict) -> list[LintResult]:
|
|
470
|
+
"""@tags features values are valid."""
|
|
471
|
+
rid = "S023"
|
|
472
|
+
tags = ctx["groovydoc"]["tags"]
|
|
473
|
+
tags_value = tags.get("tags", "")
|
|
474
|
+
if not tags_value:
|
|
475
|
+
return []
|
|
476
|
+
parsed = _parse_tags_field(tags_value)
|
|
477
|
+
features = parsed.get("features", "")
|
|
478
|
+
if not features:
|
|
479
|
+
return []
|
|
480
|
+
feature_list = [f.strip() for f in features.split(",")]
|
|
481
|
+
invalid = [f for f in feature_list if f and f not in VALID_FEATURES]
|
|
482
|
+
if invalid:
|
|
483
|
+
return [
|
|
484
|
+
_fail(
|
|
485
|
+
rid,
|
|
486
|
+
component,
|
|
487
|
+
f"Invalid feature values: {', '.join(invalid)} "
|
|
488
|
+
f"(valid: {', '.join(sorted(VALID_FEATURES))})",
|
|
489
|
+
)
|
|
490
|
+
]
|
|
491
|
+
return [_pass(rid, component, "All feature values are valid")]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# Canonical tag order for subworkflows
|
|
495
|
+
SUBWORKFLOW_TAG_ORDER = [
|
|
496
|
+
"status",
|
|
497
|
+
"keywords",
|
|
498
|
+
"tags",
|
|
499
|
+
"citation",
|
|
500
|
+
"modules",
|
|
501
|
+
"subworkflows",
|
|
502
|
+
"note",
|
|
503
|
+
"input",
|
|
504
|
+
"output",
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def rule_s024(component: str, ctx: dict) -> list[LintResult]:
|
|
509
|
+
"""GroovyDoc tag ordering."""
|
|
510
|
+
rid = "S024"
|
|
511
|
+
doc = ctx["groovydoc"]
|
|
512
|
+
if not doc["has_doc"]:
|
|
513
|
+
return []
|
|
514
|
+
actual_order = doc.get("doc_tag_order", [])
|
|
515
|
+
if not actual_order:
|
|
516
|
+
return []
|
|
517
|
+
known_order = [t for t in actual_order if t in SUBWORKFLOW_TAG_ORDER]
|
|
518
|
+
expected_positions = {t: i for i, t in enumerate(SUBWORKFLOW_TAG_ORDER)}
|
|
519
|
+
for i in range(len(known_order) - 1):
|
|
520
|
+
curr = known_order[i]
|
|
521
|
+
nxt = known_order[i + 1]
|
|
522
|
+
if expected_positions[curr] > expected_positions[nxt]:
|
|
523
|
+
return [
|
|
524
|
+
_warn(
|
|
525
|
+
rid,
|
|
526
|
+
component,
|
|
527
|
+
f"Tag ordering incorrect: @{curr} appears before @{nxt} "
|
|
528
|
+
f"(expected: {' -> '.join('@' + t for t in SUBWORKFLOW_TAG_ORDER if t in known_order)})",
|
|
529
|
+
)
|
|
530
|
+
]
|
|
531
|
+
return [_pass(rid, component, "GroovyDoc tag ordering is correct")]
|
|
532
|
+
|
|
533
|
+
|
|
288
534
|
SUBWORKFLOW_RULES = [
|
|
289
535
|
rule_s001,
|
|
290
536
|
rule_s002,
|
|
@@ -302,4 +548,12 @@ SUBWORKFLOW_RULES = [
|
|
|
302
548
|
rule_s014,
|
|
303
549
|
rule_s015,
|
|
304
550
|
rule_s016,
|
|
551
|
+
rule_s017,
|
|
552
|
+
rule_s018,
|
|
553
|
+
rule_s019,
|
|
554
|
+
rule_s020,
|
|
555
|
+
rule_s021,
|
|
556
|
+
rule_s022,
|
|
557
|
+
rule_s023,
|
|
558
|
+
rule_s024,
|
|
305
559
|
]
|
|
@@ -12,6 +12,7 @@ from bactopia.nf import (
|
|
|
12
12
|
check_file_whitespace,
|
|
13
13
|
find_main_nf,
|
|
14
14
|
parse_groovydoc_full,
|
|
15
|
+
parse_includes,
|
|
15
16
|
parse_main_nf_structure,
|
|
16
17
|
parse_module_config_full,
|
|
17
18
|
parse_schema_json,
|
|
@@ -220,6 +221,9 @@ def run_lint(
|
|
|
220
221
|
if "/utils/" in component_name:
|
|
221
222
|
continue
|
|
222
223
|
ctx = _build_simple_context(main_nf)
|
|
224
|
+
ctx["citation_keys"] = citation_keys
|
|
225
|
+
ctx["bactopia_path"] = bactopia_path
|
|
226
|
+
ctx["includes"] = parse_includes(main_nf, bactopia_path)
|
|
223
227
|
ignored = _collect_ignores(main_nf.parent)
|
|
224
228
|
results = _run_rules(component_name, ctx, SUBWORKFLOW_RULES, ignored)
|
|
225
229
|
all_results.extend(results)
|