bactopia 2.0.1__tar.gz → 2.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {bactopia-2.0.1 → bactopia-2.0.2}/PKG-INFO +1 -1
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/catalog.py +35 -75
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/module_rules.py +87 -2
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/subworkflow_rules.py +254 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/runner.py +4 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/nf.py +152 -23
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/outputs.py +11 -4
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/nextflow/nextflow.config.j2 +5 -5
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/utils.py +4 -2
- {bactopia-2.0.1 → bactopia-2.0.2}/pyproject.toml +1 -1
- {bactopia-2.0.1 → bactopia-2.0.2}/LICENSE +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/README.md +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/atb.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/atb/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/atb/atb_downloader.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/atb/atb_formatter.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/citations.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/datasets.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/download.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/helpers/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/helpers/merge_schemas.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/jsonify.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/lint.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/bracken_to_excel.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/check_assembly_accession.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/check_fastqs.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/cleanup_coverage.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/kraken_bracken_summary.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/mask_consensus.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/scrubber_summary.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pipeline/teton_prepare.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/prepare.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/prune.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pubmlst/build.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/pubmlst/setup.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/review.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/search.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/status.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/summary.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/testing.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/update.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/workflows.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/ena.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/ncbi.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/pubmlst/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/pubmlst/constants.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/databases/pubmlst/utils.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/models.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/workflow_rules.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parse.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/amrfinderplus.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/annotator.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/ariba.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/assembler.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/blast.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/citations.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/coverage.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/error.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/gather.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/generic.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/kraken.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/mapping.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/mlst.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/nextflow.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/parsables.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/qc.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/sketcher.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/variants.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/versions.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/parsers/workflows.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/reports/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/reports/templates/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/summary.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/__init__.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/logos.py +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/nextflow/params.config.j2 +0 -0
- {bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/nextflow/process.config.j2 +0 -0
|
@@ -18,6 +18,7 @@ from bactopia.nf import (
|
|
|
18
18
|
find_main_nf,
|
|
19
19
|
get_bactopia_version,
|
|
20
20
|
parse_groovydoc_full,
|
|
21
|
+
parse_includes,
|
|
21
22
|
parse_main_nf_structure,
|
|
22
23
|
parse_module_config_full,
|
|
23
24
|
parse_workflow_config,
|
|
@@ -29,71 +30,6 @@ rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)
|
|
|
29
30
|
click.rich_click.USE_RICH_MARKUP = True
|
|
30
31
|
|
|
31
32
|
|
|
32
|
-
def _parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
|
|
33
|
-
"""Parse include statements from a main.nf file.
|
|
34
|
-
|
|
35
|
-
Resolves source paths against the file's directory and the repo root
|
|
36
|
-
to derive normalized component keys (lowercase, underscore-separated).
|
|
37
|
-
|
|
38
|
-
Returns dict with:
|
|
39
|
-
modules: list of module keys (e.g., "abricate_run")
|
|
40
|
-
subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
|
|
41
|
-
plugins: list of plugin function names
|
|
42
|
-
"""
|
|
43
|
-
result = {"modules": [], "subworkflows": [], "plugins": []}
|
|
44
|
-
if not main_nf.exists():
|
|
45
|
-
return result
|
|
46
|
-
|
|
47
|
-
try:
|
|
48
|
-
text = main_nf.read_text()
|
|
49
|
-
except OSError:
|
|
50
|
-
return result
|
|
51
|
-
|
|
52
|
-
seen_modules = set()
|
|
53
|
-
seen_subworkflows = set()
|
|
54
|
-
|
|
55
|
-
for m in re.finditer(
|
|
56
|
-
r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
|
|
57
|
-
text,
|
|
58
|
-
):
|
|
59
|
-
source = m.group(2)
|
|
60
|
-
|
|
61
|
-
if "plugin/" in source:
|
|
62
|
-
result["plugins"].append(m.group(1))
|
|
63
|
-
continue
|
|
64
|
-
|
|
65
|
-
# Resolve the source path relative to the file's directory
|
|
66
|
-
# Nextflow source paths omit .nf extension; parent of resolved path
|
|
67
|
-
# is the component directory
|
|
68
|
-
resolved = (main_nf.parent / source).resolve()
|
|
69
|
-
|
|
70
|
-
try:
|
|
71
|
-
rel_str = str(resolved.relative_to(bactopia_path))
|
|
72
|
-
except ValueError:
|
|
73
|
-
continue
|
|
74
|
-
|
|
75
|
-
if rel_str.startswith("modules/"):
|
|
76
|
-
# e.g., "modules/abricate/run/main" -> "abricate/run"
|
|
77
|
-
component = rel_str.removeprefix("modules/")
|
|
78
|
-
if component.endswith("/main"):
|
|
79
|
-
component = component[:-5]
|
|
80
|
-
key = component.replace("/", "_")
|
|
81
|
-
if key not in seen_modules:
|
|
82
|
-
seen_modules.add(key)
|
|
83
|
-
result["modules"].append(key)
|
|
84
|
-
elif rel_str.startswith("subworkflows/"):
|
|
85
|
-
# e.g., "subworkflows/bactopia/gather/main" -> "bactopia/gather"
|
|
86
|
-
component = rel_str.removeprefix("subworkflows/")
|
|
87
|
-
if component.endswith("/main"):
|
|
88
|
-
component = component[:-5]
|
|
89
|
-
key = component.replace("/", "_")
|
|
90
|
-
if key not in seen_subworkflows:
|
|
91
|
-
seen_subworkflows.add(key)
|
|
92
|
-
result["subworkflows"].append(key)
|
|
93
|
-
|
|
94
|
-
return result
|
|
95
|
-
|
|
96
|
-
|
|
97
33
|
def _extract_description(groovydoc: dict) -> str:
|
|
98
34
|
"""Extract the first line description from GroovyDoc raw lines."""
|
|
99
35
|
if not groovydoc.get("has_doc") or not groovydoc.get("raw_lines"):
|
|
@@ -117,7 +53,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
|
|
|
117
53
|
Dict mapping channel names to lists of field names, e.g.,
|
|
118
54
|
{"sample_outputs": ["gff", "gbk", ...], "run_outputs": []}.
|
|
119
55
|
"""
|
|
120
|
-
field_pattern = re.compile(r"\*\s*-\s*`(\w
|
|
56
|
+
field_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
|
|
121
57
|
output_pattern = re.compile(r"\*\s*@output\s+(\S+)")
|
|
122
58
|
tag_pattern = re.compile(r"\*\s*@(?!output)\w+")
|
|
123
59
|
|
|
@@ -141,7 +77,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
|
|
|
141
77
|
if current_channel is not None:
|
|
142
78
|
fm = field_pattern.search(line)
|
|
143
79
|
if fm:
|
|
144
|
-
channels[current_channel].append(fm.group(1))
|
|
80
|
+
channels[current_channel].append(fm.group(1).rstrip("?"))
|
|
145
81
|
|
|
146
82
|
return channels
|
|
147
83
|
|
|
@@ -211,14 +147,16 @@ def _clean_scope(raw: str) -> str:
|
|
|
211
147
|
return raw.strip().strip('"').strip("'")
|
|
212
148
|
|
|
213
149
|
|
|
214
|
-
def _build_module_entry(
|
|
150
|
+
def _build_module_entry(
|
|
151
|
+
component_name: str, main_nf: Path, bactopia_path: Path
|
|
152
|
+
) -> dict:
|
|
215
153
|
"""Build a catalog entry for a module."""
|
|
216
154
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
217
155
|
config = parse_module_config_full(main_nf.parent / "module.config")
|
|
218
156
|
|
|
219
157
|
entry = {
|
|
220
158
|
"description": _extract_description(groovydoc),
|
|
221
|
-
"path": str(main_nf.parent.relative_to(
|
|
159
|
+
"path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
|
|
222
160
|
}
|
|
223
161
|
|
|
224
162
|
# Scope and process_name from config
|
|
@@ -236,6 +174,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
|
|
|
236
174
|
fields = groovydoc["doc_input_records"][0].get("fields", [])
|
|
237
175
|
if fields:
|
|
238
176
|
entry["takes"] = [f for f in fields if f != "meta"]
|
|
177
|
+
optional_input = groovydoc.get("doc_optional_input_fields", set())
|
|
178
|
+
if optional_input:
|
|
179
|
+
takes_opt = [f for f in entry["takes"] if f in optional_input]
|
|
180
|
+
if takes_opt:
|
|
181
|
+
entry["takes_optional"] = takes_opt
|
|
239
182
|
|
|
240
183
|
# Emits from GroovyDoc @output (named fields only)
|
|
241
184
|
if groovydoc.get("doc_output_fields"):
|
|
@@ -243,6 +186,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
|
|
|
243
186
|
named = [f for f in groovydoc["doc_output_fields"] if f not in standard]
|
|
244
187
|
if named:
|
|
245
188
|
entry["emits"] = named
|
|
189
|
+
optional_output = groovydoc.get("doc_optional_output_fields", set())
|
|
190
|
+
if optional_output:
|
|
191
|
+
emits_opt = [f for f in named if f in optional_output]
|
|
192
|
+
if emits_opt:
|
|
193
|
+
entry["emits_optional"] = emits_opt
|
|
246
194
|
|
|
247
195
|
# Tags from GroovyDoc @tags
|
|
248
196
|
parsed_tags = _parse_tags(groovydoc)
|
|
@@ -257,11 +205,11 @@ def _build_subworkflow_entry(
|
|
|
257
205
|
) -> dict:
|
|
258
206
|
"""Build a catalog entry for a subworkflow."""
|
|
259
207
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
260
|
-
includes =
|
|
208
|
+
includes = parse_includes(main_nf, bactopia_path)
|
|
261
209
|
|
|
262
210
|
entry = {
|
|
263
211
|
"description": _extract_description(groovydoc),
|
|
264
|
-
"path": str(main_nf.parent.relative_to(
|
|
212
|
+
"path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
|
|
265
213
|
}
|
|
266
214
|
|
|
267
215
|
# Takes from GroovyDoc @input
|
|
@@ -269,8 +217,18 @@ def _build_subworkflow_entry(
|
|
|
269
217
|
fields = groovydoc["doc_input_records"][0].get("fields", [])
|
|
270
218
|
if fields:
|
|
271
219
|
entry["takes"] = [f for f in fields if f != "meta"]
|
|
220
|
+
optional_input = groovydoc.get("doc_optional_input_fields", set())
|
|
221
|
+
if optional_input:
|
|
222
|
+
takes_opt = [f for f in entry["takes"] if f in optional_input]
|
|
223
|
+
if takes_opt:
|
|
224
|
+
entry["takes_optional"] = takes_opt
|
|
272
225
|
if groovydoc.get("doc_input_params"):
|
|
273
226
|
entry["takes_params"] = groovydoc["doc_input_params"]
|
|
227
|
+
optional_params = groovydoc.get("doc_optional_input_params", set())
|
|
228
|
+
if optional_params:
|
|
229
|
+
params_opt = [p for p in entry["takes_params"] if p in optional_params]
|
|
230
|
+
if params_opt:
|
|
231
|
+
entry["takes_params_optional"] = params_opt
|
|
274
232
|
|
|
275
233
|
# Emits from GroovyDoc @output -- structured as channel -> fields dict
|
|
276
234
|
tags = groovydoc.get("tags", {})
|
|
@@ -306,14 +264,14 @@ def _build_workflow_entry(
|
|
|
306
264
|
) -> dict:
|
|
307
265
|
"""Build a catalog entry for a workflow."""
|
|
308
266
|
groovydoc = parse_groovydoc_full(main_nf)
|
|
309
|
-
includes =
|
|
267
|
+
includes = parse_includes(main_nf, bactopia_path)
|
|
310
268
|
|
|
311
269
|
# Determine type
|
|
312
270
|
is_tool = "bactopia-tools/" in str(main_nf)
|
|
313
|
-
wf_path = str(main_nf.parent.relative_to(
|
|
271
|
+
wf_path = str(main_nf.parent.relative_to(bactopia_path))
|
|
314
272
|
# Add trailing slash for tool/named workflow directories, but not for the
|
|
315
273
|
# root bactopia workflow which uses a Nextflow convention path
|
|
316
|
-
if is_tool or wf_path != "
|
|
274
|
+
if is_tool or wf_path != ".":
|
|
317
275
|
wf_path += "/"
|
|
318
276
|
entry = {
|
|
319
277
|
"description": _extract_description(groovydoc),
|
|
@@ -379,7 +337,9 @@ def generate_catalog(bactopia_path: Path) -> dict:
|
|
|
379
337
|
component_name = str(rel).replace("modules/", "")
|
|
380
338
|
# Normalize key: slash to underscore (e.g., "abricate/run" -> "abricate_run")
|
|
381
339
|
key = component_name.replace("/", "_")
|
|
382
|
-
catalog["modules"][key] = _build_module_entry(
|
|
340
|
+
catalog["modules"][key] = _build_module_entry(
|
|
341
|
+
component_name, main_nf, bactopia_path
|
|
342
|
+
)
|
|
383
343
|
|
|
384
344
|
# Subworkflows
|
|
385
345
|
subworkflows_dir = bactopia_path / "subworkflows"
|
|
@@ -291,7 +291,7 @@ PASSTHROUGH_OUTPUT_FIELDS = {"r1", "r2", "se", "lr"}
|
|
|
291
291
|
|
|
292
292
|
|
|
293
293
|
def rule_m017(component: str, ctx: dict) -> list[LintResult]:
|
|
294
|
-
"""prefix = task.ext.prefix ?: "${
|
|
294
|
+
"""prefix = task.ext.prefix ?: "${_meta.name}" present."""
|
|
295
295
|
rid = "M017"
|
|
296
296
|
if ctx["structure"]["has_prefix_definition"]:
|
|
297
297
|
return [_pass(rid, component, "prefix definition present")]
|
|
@@ -299,7 +299,7 @@ def rule_m017(component: str, ctx: dict) -> list[LintResult]:
|
|
|
299
299
|
_fail(
|
|
300
300
|
rid,
|
|
301
301
|
component,
|
|
302
|
-
'Missing: prefix = task.ext.prefix ?: "${
|
|
302
|
+
'Missing: prefix = task.ext.prefix ?: "${_meta.name}"',
|
|
303
303
|
)
|
|
304
304
|
]
|
|
305
305
|
|
|
@@ -958,6 +958,90 @@ def rule_m032(component: str, ctx: dict) -> list[LintResult]:
|
|
|
958
958
|
return [_fail(rid, component, f"@input record field mismatch: {'; '.join(msgs)}")]
|
|
959
959
|
|
|
960
960
|
|
|
961
|
+
def rule_m033(component: str, ctx: dict) -> list[LintResult]:
|
|
962
|
+
"""Optionality markers (?) match between GroovyDoc and code."""
|
|
963
|
+
rid = "M033"
|
|
964
|
+
doc = ctx["groovydoc"]
|
|
965
|
+
struct = ctx["structure"]
|
|
966
|
+
if not doc["has_doc"]:
|
|
967
|
+
return [] # M006 covers this
|
|
968
|
+
|
|
969
|
+
mismatches = []
|
|
970
|
+
|
|
971
|
+
# --- Input record field optionality ---
|
|
972
|
+
doc_records = doc.get("doc_input_records", [])
|
|
973
|
+
code_input_fields = struct.get("input_record_fields", [])
|
|
974
|
+
if doc_records and code_input_fields:
|
|
975
|
+
doc_optional = doc.get("doc_optional_input_fields", set())
|
|
976
|
+
code_optional = struct.get("code_optional_input_fields", set())
|
|
977
|
+
common = set(doc_records[0]["fields"]) & set(code_input_fields)
|
|
978
|
+
for field in sorted(common):
|
|
979
|
+
in_doc = field in doc_optional
|
|
980
|
+
in_code = field in code_optional
|
|
981
|
+
if in_doc and not in_code:
|
|
982
|
+
mismatches.append(
|
|
983
|
+
f"input record field '{field}': doc has ? but code does not"
|
|
984
|
+
)
|
|
985
|
+
elif in_code and not in_doc:
|
|
986
|
+
mismatches.append(
|
|
987
|
+
f"input record field '{field}': code has ? but doc does not"
|
|
988
|
+
)
|
|
989
|
+
|
|
990
|
+
# --- Input params optionality ---
|
|
991
|
+
doc_params = doc.get("doc_input_params", [])
|
|
992
|
+
code_params = struct.get("input_params", [])
|
|
993
|
+
if doc_params and code_params:
|
|
994
|
+
doc_opt_params = doc.get("doc_optional_input_params", set())
|
|
995
|
+
code_opt_params = struct.get("code_optional_input_params", set())
|
|
996
|
+
common = set(doc_params) & set(code_params)
|
|
997
|
+
for param in sorted(common):
|
|
998
|
+
in_doc = param in doc_opt_params
|
|
999
|
+
in_code = param in code_opt_params
|
|
1000
|
+
if in_doc and not in_code:
|
|
1001
|
+
mismatches.append(f"input param '{param}': doc has ? but code does not")
|
|
1002
|
+
elif in_code and not in_doc:
|
|
1003
|
+
mismatches.append(f"input param '{param}': code has ? but doc does not")
|
|
1004
|
+
|
|
1005
|
+
# --- Output record field optionality ---
|
|
1006
|
+
doc_output_fields = doc.get("doc_output_fields", [])
|
|
1007
|
+
code_output_fields = struct.get("output_record_fields", [])
|
|
1008
|
+
if doc_output_fields and code_output_fields:
|
|
1009
|
+
doc_opt_output = doc.get("doc_optional_output_fields", set())
|
|
1010
|
+
code_opt_output = struct.get("code_optional_output_fields", set())
|
|
1011
|
+
common = set(doc_output_fields) & set(code_output_fields)
|
|
1012
|
+
common -= STANDARD_OUTPUT_FIELDS
|
|
1013
|
+
for field in sorted(common):
|
|
1014
|
+
in_doc = field in doc_opt_output
|
|
1015
|
+
in_code = field in code_opt_output
|
|
1016
|
+
if in_doc and not in_code:
|
|
1017
|
+
mismatches.append(
|
|
1018
|
+
f"output field '{field}': doc has ? but code missing optional: true"
|
|
1019
|
+
)
|
|
1020
|
+
elif in_code and not in_doc:
|
|
1021
|
+
mismatches.append(
|
|
1022
|
+
f"output field '{field}': code has optional: true but doc missing ?"
|
|
1023
|
+
)
|
|
1024
|
+
|
|
1025
|
+
if mismatches:
|
|
1026
|
+
return [_fail(rid, component, f"Optionality mismatch: {'; '.join(mismatches)}")]
|
|
1027
|
+
|
|
1028
|
+
# Only PASS if there was something to check
|
|
1029
|
+
has_checks = (
|
|
1030
|
+
(doc_records and code_input_fields)
|
|
1031
|
+
or (doc_params and code_params)
|
|
1032
|
+
or (doc_output_fields and code_output_fields)
|
|
1033
|
+
)
|
|
1034
|
+
if has_checks:
|
|
1035
|
+
return [
|
|
1036
|
+
_pass(
|
|
1037
|
+
rid,
|
|
1038
|
+
component,
|
|
1039
|
+
"Optionality markers match between GroovyDoc and code",
|
|
1040
|
+
)
|
|
1041
|
+
]
|
|
1042
|
+
return []
|
|
1043
|
+
|
|
1044
|
+
|
|
961
1045
|
def rule_m034(component: str, ctx: dict) -> list[LintResult]:
|
|
962
1046
|
"""@output does not describe standard fields (meta, results, logs, nf_logs, versions)."""
|
|
963
1047
|
rid = "M034"
|
|
@@ -1158,6 +1242,7 @@ MODULE_RULES = [
|
|
|
1158
1242
|
# GroovyDoc accuracy
|
|
1159
1243
|
rule_m031,
|
|
1160
1244
|
rule_m032,
|
|
1245
|
+
rule_m033,
|
|
1161
1246
|
rule_m034,
|
|
1162
1247
|
rule_m035,
|
|
1163
1248
|
rule_m036,
|
|
@@ -285,6 +285,252 @@ def rule_s016(component: str, ctx: dict) -> list[LintResult]:
|
|
|
285
285
|
]
|
|
286
286
|
|
|
287
287
|
|
|
288
|
+
def _parse_doc_component_list(tag_value: str) -> set[str]:
|
|
289
|
+
"""Parse a @modules or @subworkflows tag value into a set of normalized names.
|
|
290
|
+
|
|
291
|
+
Handles comma-separated names with optional 'as alias' notation.
|
|
292
|
+
E.g., "prokka as prokka_module, csvtk_concat" -> {"prokka", "csvtk_concat"}
|
|
293
|
+
"""
|
|
294
|
+
names = set()
|
|
295
|
+
if not tag_value:
|
|
296
|
+
return names
|
|
297
|
+
for entry in tag_value.split(","):
|
|
298
|
+
entry = entry.strip()
|
|
299
|
+
if not entry:
|
|
300
|
+
continue
|
|
301
|
+
# Handle "name as alias" notation -- extract base name
|
|
302
|
+
parts = entry.split()
|
|
303
|
+
if len(parts) >= 3 and parts[1] == "as":
|
|
304
|
+
names.add(parts[0])
|
|
305
|
+
else:
|
|
306
|
+
names.add(parts[0])
|
|
307
|
+
return names
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def rule_s017(component: str, ctx: dict) -> list[LintResult]:
|
|
311
|
+
"""@modules match actual module includes."""
|
|
312
|
+
rid = "S017"
|
|
313
|
+
doc = ctx["groovydoc"]
|
|
314
|
+
if not doc["has_doc"]:
|
|
315
|
+
return []
|
|
316
|
+
includes = ctx.get("includes", {})
|
|
317
|
+
actual_modules = set(includes.get("modules", []))
|
|
318
|
+
doc_value = doc["tags"].get("modules", "")
|
|
319
|
+
doc_modules = _parse_doc_component_list(doc_value)
|
|
320
|
+
# Skip if neither GroovyDoc nor includes mention modules
|
|
321
|
+
if not actual_modules and not doc_modules:
|
|
322
|
+
return []
|
|
323
|
+
if doc_modules == actual_modules:
|
|
324
|
+
return [_pass(rid, component, "@modules match actual includes")]
|
|
325
|
+
missing = actual_modules - doc_modules
|
|
326
|
+
extra = doc_modules - actual_modules
|
|
327
|
+
parts = []
|
|
328
|
+
if missing:
|
|
329
|
+
parts.append(f"missing from @modules: {', '.join(sorted(missing))}")
|
|
330
|
+
if extra:
|
|
331
|
+
parts.append(f"extra in @modules: {', '.join(sorted(extra))}")
|
|
332
|
+
return [_fail(rid, component, f"@modules mismatch: {'; '.join(parts)}")]
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
def rule_s018(component: str, ctx: dict) -> list[LintResult]:
|
|
336
|
+
"""@subworkflows match actual subworkflow includes."""
|
|
337
|
+
rid = "S018"
|
|
338
|
+
doc = ctx["groovydoc"]
|
|
339
|
+
if not doc["has_doc"]:
|
|
340
|
+
return []
|
|
341
|
+
includes = ctx.get("includes", {})
|
|
342
|
+
actual_subs = set(includes.get("subworkflows", []))
|
|
343
|
+
doc_value = doc["tags"].get("subworkflows", "")
|
|
344
|
+
doc_subs = _parse_doc_component_list(doc_value)
|
|
345
|
+
# Skip if neither GroovyDoc nor includes mention subworkflows
|
|
346
|
+
if not actual_subs and not doc_subs:
|
|
347
|
+
return []
|
|
348
|
+
if doc_subs == actual_subs:
|
|
349
|
+
return [_pass(rid, component, "@subworkflows match actual includes")]
|
|
350
|
+
missing = actual_subs - doc_subs
|
|
351
|
+
extra = doc_subs - actual_subs
|
|
352
|
+
parts = []
|
|
353
|
+
if missing:
|
|
354
|
+
parts.append(f"missing from @subworkflows: {', '.join(sorted(missing))}")
|
|
355
|
+
if extra:
|
|
356
|
+
parts.append(f"extra in @subworkflows: {', '.join(sorted(extra))}")
|
|
357
|
+
return [_fail(rid, component, f"@subworkflows mismatch: {'; '.join(parts)}")]
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def rule_s019(component: str, ctx: dict) -> list[LintResult]:
|
|
361
|
+
"""@citation keys exist in data/citations.yml."""
|
|
362
|
+
rid = "S019"
|
|
363
|
+
doc = ctx["groovydoc"]
|
|
364
|
+
if not doc["has_doc"]:
|
|
365
|
+
return []
|
|
366
|
+
citation_value = doc["tags"].get("citation", "")
|
|
367
|
+
if not citation_value:
|
|
368
|
+
return [] # S003 covers missing @citation
|
|
369
|
+
citation_keys = ctx.get("citation_keys", set())
|
|
370
|
+
if not citation_keys:
|
|
371
|
+
return [] # citations.yml not available -- skip check
|
|
372
|
+
keys = [k.strip() for k in citation_value.split(",")]
|
|
373
|
+
invalid = [k for k in keys if k and k not in citation_keys]
|
|
374
|
+
if invalid:
|
|
375
|
+
return [
|
|
376
|
+
_fail(
|
|
377
|
+
rid,
|
|
378
|
+
component,
|
|
379
|
+
f"@citation keys not in citations.yml: {', '.join(invalid)}",
|
|
380
|
+
)
|
|
381
|
+
]
|
|
382
|
+
return [_pass(rid, component, "All @citation keys are valid")]
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def rule_s020(component: str, ctx: dict) -> list[LintResult]:
|
|
386
|
+
"""@tags complexity value is valid."""
|
|
387
|
+
rid = "S020"
|
|
388
|
+
tags = ctx["groovydoc"]["tags"]
|
|
389
|
+
tags_value = tags.get("tags", "")
|
|
390
|
+
if not tags_value:
|
|
391
|
+
return []
|
|
392
|
+
parsed = _parse_tags_field(tags_value)
|
|
393
|
+
complexity = parsed.get("complexity", "")
|
|
394
|
+
if not complexity:
|
|
395
|
+
return []
|
|
396
|
+
valid = {"simple", "moderate", "complex"}
|
|
397
|
+
if complexity in valid:
|
|
398
|
+
return [_pass(rid, component, f"complexity:{complexity} is valid")]
|
|
399
|
+
return [
|
|
400
|
+
_warn(
|
|
401
|
+
rid,
|
|
402
|
+
component,
|
|
403
|
+
f"Invalid complexity value '{complexity}', expected one of: {', '.join(sorted(valid))}",
|
|
404
|
+
)
|
|
405
|
+
]
|
|
406
|
+
|
|
407
|
+
|
|
408
|
+
def rule_s021(component: str, ctx: dict) -> list[LintResult]:
|
|
409
|
+
"""@tags input-type value is valid."""
|
|
410
|
+
rid = "S021"
|
|
411
|
+
tags = ctx["groovydoc"]["tags"]
|
|
412
|
+
tags_value = tags.get("tags", "")
|
|
413
|
+
if not tags_value:
|
|
414
|
+
return []
|
|
415
|
+
parsed = _parse_tags_field(tags_value)
|
|
416
|
+
input_type = parsed.get("input-type", "")
|
|
417
|
+
if not input_type:
|
|
418
|
+
return []
|
|
419
|
+
valid = {"none", "single", "multiple", "parameter"}
|
|
420
|
+
if input_type in valid:
|
|
421
|
+
return [_pass(rid, component, f"input-type:{input_type} is valid")]
|
|
422
|
+
return [
|
|
423
|
+
_warn(
|
|
424
|
+
rid,
|
|
425
|
+
component,
|
|
426
|
+
f"Invalid input-type value '{input_type}', expected one of: {', '.join(sorted(valid))}",
|
|
427
|
+
)
|
|
428
|
+
]
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
def rule_s022(component: str, ctx: dict) -> list[LintResult]:
|
|
432
|
+
"""@tags output-type value is valid."""
|
|
433
|
+
rid = "S022"
|
|
434
|
+
tags = ctx["groovydoc"]["tags"]
|
|
435
|
+
tags_value = tags.get("tags", "")
|
|
436
|
+
if not tags_value:
|
|
437
|
+
return []
|
|
438
|
+
parsed = _parse_tags_field(tags_value)
|
|
439
|
+
output_type = parsed.get("output-type", "")
|
|
440
|
+
if not output_type:
|
|
441
|
+
return []
|
|
442
|
+
valid = {"single", "multiple"}
|
|
443
|
+
if output_type in valid:
|
|
444
|
+
return [_pass(rid, component, f"output-type:{output_type} is valid")]
|
|
445
|
+
return [
|
|
446
|
+
_warn(
|
|
447
|
+
rid,
|
|
448
|
+
component,
|
|
449
|
+
f"Invalid output-type value '{output_type}', expected one of: {', '.join(sorted(valid))}",
|
|
450
|
+
)
|
|
451
|
+
]
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
VALID_FEATURES = {
|
|
455
|
+
"aggregation",
|
|
456
|
+
"alternative-execution",
|
|
457
|
+
"archive-output",
|
|
458
|
+
"components",
|
|
459
|
+
"compression",
|
|
460
|
+
"conditional-input",
|
|
461
|
+
"conditional-logic",
|
|
462
|
+
"database-dependent",
|
|
463
|
+
"internet-access",
|
|
464
|
+
"no-test",
|
|
465
|
+
"resource-download",
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
def rule_s023(component: str, ctx: dict) -> list[LintResult]:
|
|
470
|
+
"""@tags features values are valid."""
|
|
471
|
+
rid = "S023"
|
|
472
|
+
tags = ctx["groovydoc"]["tags"]
|
|
473
|
+
tags_value = tags.get("tags", "")
|
|
474
|
+
if not tags_value:
|
|
475
|
+
return []
|
|
476
|
+
parsed = _parse_tags_field(tags_value)
|
|
477
|
+
features = parsed.get("features", "")
|
|
478
|
+
if not features:
|
|
479
|
+
return []
|
|
480
|
+
feature_list = [f.strip() for f in features.split(",")]
|
|
481
|
+
invalid = [f for f in feature_list if f and f not in VALID_FEATURES]
|
|
482
|
+
if invalid:
|
|
483
|
+
return [
|
|
484
|
+
_fail(
|
|
485
|
+
rid,
|
|
486
|
+
component,
|
|
487
|
+
f"Invalid feature values: {', '.join(invalid)} "
|
|
488
|
+
f"(valid: {', '.join(sorted(VALID_FEATURES))})",
|
|
489
|
+
)
|
|
490
|
+
]
|
|
491
|
+
return [_pass(rid, component, "All feature values are valid")]
|
|
492
|
+
|
|
493
|
+
|
|
494
|
+
# Canonical tag order for subworkflows
|
|
495
|
+
SUBWORKFLOW_TAG_ORDER = [
|
|
496
|
+
"status",
|
|
497
|
+
"keywords",
|
|
498
|
+
"tags",
|
|
499
|
+
"citation",
|
|
500
|
+
"modules",
|
|
501
|
+
"subworkflows",
|
|
502
|
+
"note",
|
|
503
|
+
"input",
|
|
504
|
+
"output",
|
|
505
|
+
]
|
|
506
|
+
|
|
507
|
+
|
|
508
|
+
def rule_s024(component: str, ctx: dict) -> list[LintResult]:
|
|
509
|
+
"""GroovyDoc tag ordering."""
|
|
510
|
+
rid = "S024"
|
|
511
|
+
doc = ctx["groovydoc"]
|
|
512
|
+
if not doc["has_doc"]:
|
|
513
|
+
return []
|
|
514
|
+
actual_order = doc.get("doc_tag_order", [])
|
|
515
|
+
if not actual_order:
|
|
516
|
+
return []
|
|
517
|
+
known_order = [t for t in actual_order if t in SUBWORKFLOW_TAG_ORDER]
|
|
518
|
+
expected_positions = {t: i for i, t in enumerate(SUBWORKFLOW_TAG_ORDER)}
|
|
519
|
+
for i in range(len(known_order) - 1):
|
|
520
|
+
curr = known_order[i]
|
|
521
|
+
nxt = known_order[i + 1]
|
|
522
|
+
if expected_positions[curr] > expected_positions[nxt]:
|
|
523
|
+
return [
|
|
524
|
+
_warn(
|
|
525
|
+
rid,
|
|
526
|
+
component,
|
|
527
|
+
f"Tag ordering incorrect: @{curr} appears before @{nxt} "
|
|
528
|
+
f"(expected: {' -> '.join('@' + t for t in SUBWORKFLOW_TAG_ORDER if t in known_order)})",
|
|
529
|
+
)
|
|
530
|
+
]
|
|
531
|
+
return [_pass(rid, component, "GroovyDoc tag ordering is correct")]
|
|
532
|
+
|
|
533
|
+
|
|
288
534
|
SUBWORKFLOW_RULES = [
|
|
289
535
|
rule_s001,
|
|
290
536
|
rule_s002,
|
|
@@ -302,4 +548,12 @@ SUBWORKFLOW_RULES = [
|
|
|
302
548
|
rule_s014,
|
|
303
549
|
rule_s015,
|
|
304
550
|
rule_s016,
|
|
551
|
+
rule_s017,
|
|
552
|
+
rule_s018,
|
|
553
|
+
rule_s019,
|
|
554
|
+
rule_s020,
|
|
555
|
+
rule_s021,
|
|
556
|
+
rule_s022,
|
|
557
|
+
rule_s023,
|
|
558
|
+
rule_s024,
|
|
305
559
|
]
|
|
@@ -12,6 +12,7 @@ from bactopia.nf import (
|
|
|
12
12
|
check_file_whitespace,
|
|
13
13
|
find_main_nf,
|
|
14
14
|
parse_groovydoc_full,
|
|
15
|
+
parse_includes,
|
|
15
16
|
parse_main_nf_structure,
|
|
16
17
|
parse_module_config_full,
|
|
17
18
|
parse_schema_json,
|
|
@@ -220,6 +221,9 @@ def run_lint(
|
|
|
220
221
|
if "/utils/" in component_name:
|
|
221
222
|
continue
|
|
222
223
|
ctx = _build_simple_context(main_nf)
|
|
224
|
+
ctx["citation_keys"] = citation_keys
|
|
225
|
+
ctx["bactopia_path"] = bactopia_path
|
|
226
|
+
ctx["includes"] = parse_includes(main_nf, bactopia_path)
|
|
223
227
|
ignored = _collect_ignores(main_nf.parent)
|
|
224
228
|
results = _run_rules(component_name, ctx, SUBWORKFLOW_RULES, ignored)
|
|
225
229
|
all_results.extend(results)
|
|
@@ -281,6 +281,11 @@ def parse_workflows(bactopia_path, input_wf, include_merlin=False, build_all=Fal
|
|
|
281
281
|
# Fallback: derive path from key
|
|
282
282
|
module_path = f"modules/{module.replace('_', '/')}"
|
|
283
283
|
config_path = f"{bactopia_path}/{module_path}/module.config"
|
|
284
|
+
if not Path(config_path).exists():
|
|
285
|
+
logging.warning(
|
|
286
|
+
f"module.config not found at {config_path} for module '{module}', skipping"
|
|
287
|
+
)
|
|
288
|
+
continue
|
|
284
289
|
logging.debug(f"Adding module {module}: {config_path}")
|
|
285
290
|
final_workflows[wf][module] = config_path
|
|
286
291
|
|
|
@@ -494,11 +499,15 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
|
|
|
494
499
|
"raw_lines": [],
|
|
495
500
|
"links": [],
|
|
496
501
|
# Parsed GroovyDoc fields for lint rules M031-M037
|
|
497
|
-
"doc_output_fields": [], # field names from @output record(...)
|
|
498
|
-
"doc_input_records": [], # list of {fields: [...]} per @input record(...)
|
|
499
|
-
"doc_input_params": [], # non-record @input names
|
|
500
|
-
"doc_output_described_fields": [], # fields with description lines
|
|
502
|
+
"doc_output_fields": [], # field names from @output record(...), ? stripped
|
|
503
|
+
"doc_input_records": [], # list of {fields: [...]} per @input record(...), ? stripped
|
|
504
|
+
"doc_input_params": [], # non-record @input names, ? stripped
|
|
505
|
+
"doc_output_described_fields": [], # fields with description lines, ? stripped
|
|
501
506
|
"doc_tag_order": [], # ordered list of tag names as they appear
|
|
507
|
+
# Optionality tracking (base names of fields that had ? suffix in GroovyDoc)
|
|
508
|
+
"doc_optional_output_fields": set(),
|
|
509
|
+
"doc_optional_input_fields": set(),
|
|
510
|
+
"doc_optional_input_params": set(),
|
|
502
511
|
}
|
|
503
512
|
lines = _read_lines(main_nf)
|
|
504
513
|
if not lines:
|
|
@@ -511,19 +520,30 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
|
|
|
511
520
|
result["raw_lines"] = doc_lines
|
|
512
521
|
|
|
513
522
|
# Extract tags with their values
|
|
523
|
+
# Multi-value tags are stored as lists; single-value tags as strings.
|
|
524
|
+
# Continuation lines (lines with * but no @tag) are appended to the
|
|
525
|
+
# previous single-value tag (e.g., multi-line @modules or @subworkflows).
|
|
526
|
+
multi_value_tags = {"input", "output", "note", "publish", "section", "results"}
|
|
514
527
|
tag_pattern = re.compile(r"\*\s*@(\w+)\s*(.*)")
|
|
528
|
+
continuation_pattern = re.compile(r"\*\s+([^@\s].+)")
|
|
529
|
+
last_single_tag = None
|
|
515
530
|
for line in doc_lines:
|
|
516
531
|
m = tag_pattern.search(line)
|
|
517
532
|
if m:
|
|
518
533
|
tag_name = m.group(1)
|
|
519
534
|
tag_value = m.group(2).strip()
|
|
520
|
-
|
|
521
|
-
# store as a list
|
|
522
|
-
if tag_name in ("input", "output", "note", "publish", "section", "results"):
|
|
535
|
+
if tag_name in multi_value_tags:
|
|
523
536
|
result["tags"].setdefault(tag_name, [])
|
|
524
537
|
result["tags"][tag_name].append(tag_value)
|
|
538
|
+
last_single_tag = None
|
|
525
539
|
else:
|
|
526
540
|
result["tags"][tag_name] = tag_value
|
|
541
|
+
last_single_tag = tag_name
|
|
542
|
+
elif last_single_tag:
|
|
543
|
+
# Continuation line for a single-value tag
|
|
544
|
+
cm = continuation_pattern.search(line)
|
|
545
|
+
if cm:
|
|
546
|
+
result["tags"][last_single_tag] += " " + cm.group(1).strip()
|
|
527
547
|
|
|
528
548
|
# Extract URLs
|
|
529
549
|
url_pattern = re.compile(r"https?://[^\s\)>]+")
|
|
@@ -546,31 +566,52 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
|
|
|
546
566
|
seen_tags.append(tag_name)
|
|
547
567
|
result["doc_tag_order"] = seen_tags
|
|
548
568
|
|
|
549
|
-
# Parse @output record(...) fields
|
|
569
|
+
# Parse @output record(...) fields (strip ? suffix, track optionality)
|
|
550
570
|
output_tags = result["tags"].get("output", [])
|
|
551
571
|
for oval in output_tags:
|
|
552
572
|
record_match = re.match(r"record\(([^)]+)\)", oval)
|
|
553
573
|
if record_match:
|
|
554
|
-
fields = [
|
|
574
|
+
fields = []
|
|
575
|
+
for raw in record_match.group(1).split(","):
|
|
576
|
+
raw = raw.strip()
|
|
577
|
+
if raw.endswith("?"):
|
|
578
|
+
base = raw[:-1]
|
|
579
|
+
result["doc_optional_output_fields"].add(base)
|
|
580
|
+
fields.append(base)
|
|
581
|
+
else:
|
|
582
|
+
fields.append(raw)
|
|
555
583
|
result["doc_output_fields"] = fields
|
|
556
584
|
|
|
557
|
-
# Parse @input blocks
|
|
585
|
+
# Parse @input blocks (strip ? suffix, track optionality)
|
|
558
586
|
input_tags = result["tags"].get("input", [])
|
|
559
587
|
for ival in input_tags:
|
|
560
588
|
# Check for record(meta, ...) syntax
|
|
561
589
|
record_match = re.match(r"record\(([^)]+)\)", ival)
|
|
562
590
|
if record_match:
|
|
563
|
-
fields = [
|
|
591
|
+
fields = []
|
|
592
|
+
for raw in record_match.group(1).split(","):
|
|
593
|
+
raw = raw.strip()
|
|
594
|
+
if raw.endswith("?"):
|
|
595
|
+
base = raw[:-1]
|
|
596
|
+
result["doc_optional_input_fields"].add(base)
|
|
597
|
+
fields.append(base)
|
|
598
|
+
else:
|
|
599
|
+
fields.append(raw)
|
|
564
600
|
result["doc_input_records"].append({"fields": fields})
|
|
565
601
|
else:
|
|
566
|
-
# Non-record input (e.g., "db", "proteins")
|
|
602
|
+
# Non-record input (e.g., "db", "proteins", "proteins?")
|
|
567
603
|
param_name = ival.split()[0] if ival.strip() else ""
|
|
568
604
|
if param_name:
|
|
569
|
-
|
|
605
|
+
if param_name.endswith("?"):
|
|
606
|
+
base = param_name[:-1]
|
|
607
|
+
result["doc_optional_input_params"].add(base)
|
|
608
|
+
result["doc_input_params"].append(base)
|
|
609
|
+
else:
|
|
610
|
+
result["doc_input_params"].append(param_name)
|
|
570
611
|
|
|
571
612
|
# Parse @output description lines to find which fields are described
|
|
572
|
-
# Pattern: * - `field`: description
|
|
573
|
-
desc_pattern = re.compile(r"\*\s*-\s*`(\w
|
|
613
|
+
# Pattern: * - `field`: description (field may have ? suffix)
|
|
614
|
+
desc_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
|
|
574
615
|
in_output_section = False
|
|
575
616
|
for line in doc_lines:
|
|
576
617
|
if re.search(r"\*\s*@output", line):
|
|
@@ -582,7 +623,8 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
|
|
|
582
623
|
if in_output_section:
|
|
583
624
|
dm = desc_pattern.search(line)
|
|
584
625
|
if dm:
|
|
585
|
-
|
|
626
|
+
field_name = dm.group(1).rstrip("?")
|
|
627
|
+
result["doc_output_described_fields"].append(field_name)
|
|
586
628
|
|
|
587
629
|
return result
|
|
588
630
|
|
|
@@ -634,9 +676,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
|
|
|
634
676
|
"output_has_nf_logs": False,
|
|
635
677
|
"output_versions_uses_files": False,
|
|
636
678
|
"output_generic_using_file": [],
|
|
637
|
-
# Input parsing for M031/M032
|
|
679
|
+
# Input parsing for M031/M032/M033
|
|
638
680
|
"input_record_fields": [], # fields from (meta: Map, field: Type): Record
|
|
639
681
|
"input_params": [], # non-record input names (db, proteins, etc.)
|
|
682
|
+
# Optionality tracking for M033
|
|
683
|
+
"code_optional_input_fields": set(), # input record fields with Type?
|
|
684
|
+
"code_optional_input_params": set(), # non-record input params with Type?
|
|
685
|
+
"code_optional_output_fields": set(), # output fields with optional: true
|
|
640
686
|
# Workflow-specific fields (W011-W020)
|
|
641
687
|
"first_line": "",
|
|
642
688
|
"todos": [], # list of {"line_num": int, "text": str}
|
|
@@ -719,6 +765,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
|
|
|
719
765
|
prefix_text = record_text[line_start : fm.start()].strip()
|
|
720
766
|
if not prefix_text.startswith("//"):
|
|
721
767
|
result["output_record_fields"].append(field_name)
|
|
768
|
+
# Check for optional: true on the same line (M033)
|
|
769
|
+
line_end = record_text.find("\n", fm.end())
|
|
770
|
+
if line_end == -1:
|
|
771
|
+
line_end = len(record_text)
|
|
772
|
+
rest_of_line = record_text[fm.end() : line_end]
|
|
773
|
+
if re.search(r"optional\s*:\s*true", rest_of_line):
|
|
774
|
+
result["code_optional_output_fields"].add(field_name)
|
|
722
775
|
|
|
723
776
|
# --- Output record detail parsing (M023-M030) ---
|
|
724
777
|
|
|
@@ -827,8 +880,11 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
|
|
|
827
880
|
if record_input_match:
|
|
828
881
|
for part in record_input_match.group(1).split(","):
|
|
829
882
|
part = part.strip()
|
|
830
|
-
|
|
883
|
+
pieces = part.split(":")
|
|
884
|
+
name = pieces[0].strip()
|
|
831
885
|
result["input_record_fields"].append(name)
|
|
886
|
+
if len(pieces) > 1 and pieces[1].strip().endswith("?"):
|
|
887
|
+
result["code_optional_input_fields"].add(name)
|
|
832
888
|
# Match non-record inputs: name: Type (one per line, not inside parens)
|
|
833
889
|
for line in input_text.split("\n"):
|
|
834
890
|
stripped = line.strip()
|
|
@@ -837,10 +893,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
|
|
|
837
893
|
continue
|
|
838
894
|
if stripped.startswith("("):
|
|
839
895
|
continue
|
|
840
|
-
# Match "name: Type" or "name
|
|
841
|
-
param_match = re.match(r"(\w+)\s*:\s
|
|
896
|
+
# Match "name: Type" or "name: Type?" (optional)
|
|
897
|
+
param_match = re.match(r"(\w+)\s*:\s*(\w+\??)", stripped)
|
|
842
898
|
if param_match:
|
|
843
|
-
|
|
899
|
+
param_name = param_match.group(1)
|
|
900
|
+
result["input_params"].append(param_name)
|
|
901
|
+
if param_match.group(2).endswith("?"):
|
|
902
|
+
result["code_optional_input_params"].add(param_name)
|
|
844
903
|
|
|
845
904
|
# Check for versions.yml in script block
|
|
846
905
|
result["has_versions_yml"] = "versions.yml" in full_text
|
|
@@ -860,10 +919,10 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
|
|
|
860
919
|
re.search(r"\btuple\b", full_text, re.IGNORECASE)
|
|
861
920
|
)
|
|
862
921
|
|
|
863
|
-
# Check for prefix = task.ext.prefix ?: "${
|
|
922
|
+
# Check for prefix = task.ext.prefix ?: "${_meta.name}" (M017)
|
|
864
923
|
result["has_prefix_definition"] = bool(
|
|
865
924
|
re.search(
|
|
866
|
-
r'prefix\s*=\s*task\.ext\.prefix\s*\?:\s*"\$\{
|
|
925
|
+
r'prefix\s*=\s*task\.ext\.prefix\s*\?:\s*"\$\{_meta\.name\}"', full_text
|
|
867
926
|
)
|
|
868
927
|
)
|
|
869
928
|
|
|
@@ -1431,3 +1490,73 @@ def parse_workflow_config(config_path: Path) -> dict:
|
|
|
1431
1490
|
result["ext_raw"] = ext_str_match.group(1)
|
|
1432
1491
|
result["ext"] = None # String format is invalid -- rule will flag this
|
|
1433
1492
|
return result
|
|
1493
|
+
|
|
1494
|
+
|
|
1495
|
+
def parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
|
|
1496
|
+
"""Parse include statements from a main.nf file.
|
|
1497
|
+
|
|
1498
|
+
Resolves source paths against the file's directory and the repo root
|
|
1499
|
+
to derive normalized component keys (lowercase, underscore-separated).
|
|
1500
|
+
|
|
1501
|
+
Args:
|
|
1502
|
+
main_nf: Path to a main.nf file.
|
|
1503
|
+
bactopia_path: Root path of the Bactopia repo.
|
|
1504
|
+
|
|
1505
|
+
Returns:
|
|
1506
|
+
A dict with:
|
|
1507
|
+
modules: list of module keys (e.g., "abricate_run")
|
|
1508
|
+
subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
|
|
1509
|
+
plugins: list of plugin function names
|
|
1510
|
+
"""
|
|
1511
|
+
result: dict[str, list[str]] = {"modules": [], "subworkflows": [], "plugins": []}
|
|
1512
|
+
if not main_nf.exists():
|
|
1513
|
+
return result
|
|
1514
|
+
|
|
1515
|
+
try:
|
|
1516
|
+
text = main_nf.read_text()
|
|
1517
|
+
except OSError:
|
|
1518
|
+
return result
|
|
1519
|
+
|
|
1520
|
+
seen_modules: set[str] = set()
|
|
1521
|
+
seen_subworkflows: set[str] = set()
|
|
1522
|
+
|
|
1523
|
+
for m in re.finditer(
|
|
1524
|
+
r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
|
|
1525
|
+
text,
|
|
1526
|
+
):
|
|
1527
|
+
source = m.group(2)
|
|
1528
|
+
|
|
1529
|
+
if "plugin/" in source:
|
|
1530
|
+
result["plugins"].append(m.group(1))
|
|
1531
|
+
continue
|
|
1532
|
+
|
|
1533
|
+
# Resolve the source path relative to the file's directory
|
|
1534
|
+
# Nextflow source paths omit .nf extension; parent of resolved path
|
|
1535
|
+
# is the component directory
|
|
1536
|
+
resolved = (main_nf.parent / source).resolve()
|
|
1537
|
+
|
|
1538
|
+
try:
|
|
1539
|
+
rel_str = str(resolved.relative_to(bactopia_path))
|
|
1540
|
+
except ValueError:
|
|
1541
|
+
continue
|
|
1542
|
+
|
|
1543
|
+
if rel_str.startswith("modules/"):
|
|
1544
|
+
# e.g., "modules/abricate/run/main" -> "abricate_run"
|
|
1545
|
+
component = rel_str.removeprefix("modules/")
|
|
1546
|
+
if component.endswith("/main"):
|
|
1547
|
+
component = component[:-5]
|
|
1548
|
+
key = component.replace("/", "_")
|
|
1549
|
+
if key not in seen_modules:
|
|
1550
|
+
seen_modules.add(key)
|
|
1551
|
+
result["modules"].append(key)
|
|
1552
|
+
elif rel_str.startswith("subworkflows/"):
|
|
1553
|
+
# e.g., "subworkflows/bactopia/gather/main" -> "bactopia_gather"
|
|
1554
|
+
component = rel_str.removeprefix("subworkflows/")
|
|
1555
|
+
if component.endswith("/main"):
|
|
1556
|
+
component = component[:-5]
|
|
1557
|
+
key = component.replace("/", "_")
|
|
1558
|
+
if key not in seen_subworkflows:
|
|
1559
|
+
seen_subworkflows.add(key)
|
|
1560
|
+
result["subworkflows"].append(key)
|
|
1561
|
+
|
|
1562
|
+
return result
|
|
@@ -74,10 +74,13 @@ def parse_declared_outputs(meta_dir: Path) -> set[str]:
|
|
|
74
74
|
|
|
75
75
|
for _channel, records in data.items():
|
|
76
76
|
for rec in records:
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
77
|
+
if isinstance(rec, dict):
|
|
78
|
+
for field_name, field_val in rec.items():
|
|
79
|
+
if field_name == "meta":
|
|
80
|
+
continue
|
|
81
|
+
_collect_paths(field_val, declared, declared_dirs)
|
|
82
|
+
else:
|
|
83
|
+
_collect_paths(rec, declared, declared_dirs)
|
|
81
84
|
|
|
82
85
|
# Expand directory entries: all files under a declared directory are declared
|
|
83
86
|
for dir_path in declared_dirs:
|
|
@@ -262,6 +265,10 @@ def scan_test_outputs(test_dir: Path) -> dict:
|
|
|
262
265
|
if not meta_dir.exists() or not work_dir.exists():
|
|
263
266
|
continue
|
|
264
267
|
|
|
268
|
+
# Skip if no output_*.json files (e.g., workflow-level tests don't produce them)
|
|
269
|
+
if not list(meta_dir.glob("output_*.json")):
|
|
270
|
+
continue
|
|
271
|
+
|
|
265
272
|
# Map work dir hashes to process names
|
|
266
273
|
trace_map = _parse_trace_csv(meta_dir)
|
|
267
274
|
|
|
@@ -69,31 +69,31 @@ includeConfig "{{ profiles }}"
|
|
|
69
69
|
timeline {
|
|
70
70
|
enabled = true
|
|
71
71
|
overwrite = true
|
|
72
|
-
file = "${params.infodir}/${params.
|
|
72
|
+
file = "${params.infodir}/${params.wf}-timeline.html"
|
|
73
73
|
}
|
|
74
74
|
|
|
75
75
|
report {
|
|
76
76
|
enabled = true
|
|
77
77
|
overwrite = true
|
|
78
|
-
file = "${params.infodir}/${params.
|
|
78
|
+
file = "${params.infodir}/${params.wf}-report.html"
|
|
79
79
|
}
|
|
80
80
|
|
|
81
81
|
trace {
|
|
82
82
|
enabled = true
|
|
83
83
|
overwrite = true
|
|
84
|
-
file = "${params.infodir}/${params.
|
|
84
|
+
file = "${params.infodir}/${params.wf}-trace.txt"
|
|
85
85
|
fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem'
|
|
86
86
|
}
|
|
87
87
|
|
|
88
88
|
dag {
|
|
89
89
|
enabled = true
|
|
90
90
|
overwrite = true
|
|
91
|
-
file = "${params.infodir}/${params.
|
|
91
|
+
file = "${params.infodir}/${params.wf}-dag.svg"
|
|
92
92
|
}
|
|
93
93
|
|
|
94
94
|
// Plugins
|
|
95
95
|
plugins {
|
|
96
|
-
id 'nf-bactopia@
|
|
96
|
+
id 'nf-bactopia@2.0.0'
|
|
97
97
|
}
|
|
98
98
|
|
|
99
99
|
bactopia {
|
|
@@ -48,11 +48,13 @@ def execute(
|
|
|
48
48
|
else:
|
|
49
49
|
return command.returncode
|
|
50
50
|
except subprocess.CalledProcessError as e:
|
|
51
|
-
logging.error(f'"{cmd}" return exit code {e.returncode}')
|
|
52
|
-
logging.error(e)
|
|
53
51
|
if allow_fail:
|
|
52
|
+
logging.debug(f'"{cmd}" return exit code {e.returncode}')
|
|
53
|
+
logging.debug(e)
|
|
54
54
|
return None
|
|
55
55
|
else:
|
|
56
|
+
logging.error(f'"{cmd}" return exit code {e.returncode}')
|
|
57
|
+
logging.error(e)
|
|
56
58
|
sys.exit(e.returncode)
|
|
57
59
|
|
|
58
60
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|