bactopia 2.0.0__tar.gz → 2.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {bactopia-2.0.0 → bactopia-2.0.2}/PKG-INFO +11 -11
  2. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/catalog.py +37 -79
  3. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/module_rules.py +87 -2
  4. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/subworkflow_rules.py +254 -0
  5. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/runner.py +4 -0
  6. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/nf.py +200 -43
  7. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/outputs.py +11 -4
  8. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/nextflow.config.j2 +5 -5
  9. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/utils.py +4 -2
  10. {bactopia-2.0.0 → bactopia-2.0.2}/pyproject.toml +11 -11
  11. {bactopia-2.0.0 → bactopia-2.0.2}/LICENSE +0 -0
  12. {bactopia-2.0.0 → bactopia-2.0.2}/README.md +0 -0
  13. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/__init__.py +0 -0
  14. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/atb.py +0 -0
  15. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/__init__.py +0 -0
  16. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/__init__.py +0 -0
  17. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/atb_downloader.py +0 -0
  18. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/atb/atb_formatter.py +0 -0
  19. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/citations.py +0 -0
  20. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/datasets.py +0 -0
  21. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/download.py +0 -0
  22. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/helpers/__init__.py +0 -0
  23. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/helpers/merge_schemas.py +0 -0
  24. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/jsonify.py +0 -0
  25. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/lint.py +0 -0
  26. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/__init__.py +0 -0
  27. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/bracken_to_excel.py +0 -0
  28. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/check_assembly_accession.py +0 -0
  29. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/check_fastqs.py +0 -0
  30. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/cleanup_coverage.py +0 -0
  31. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/kraken_bracken_summary.py +0 -0
  32. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/mask_consensus.py +0 -0
  33. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/scrubber_summary.py +0 -0
  34. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pipeline/teton_prepare.py +0 -0
  35. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/prepare.py +0 -0
  36. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/prune.py +0 -0
  37. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pubmlst/build.py +0 -0
  38. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/pubmlst/setup.py +0 -0
  39. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/review.py +0 -0
  40. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/search.py +0 -0
  41. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/status.py +0 -0
  42. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/summary.py +0 -0
  43. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/testing.py +0 -0
  44. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/update.py +0 -0
  45. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/cli/workflows.py +0 -0
  46. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/__init__.py +0 -0
  47. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/ena.py +0 -0
  48. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/ncbi.py +0 -0
  49. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/__init__.py +0 -0
  50. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/constants.py +0 -0
  51. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/databases/pubmlst/utils.py +0 -0
  52. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/__init__.py +0 -0
  53. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/models.py +0 -0
  54. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/__init__.py +0 -0
  55. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/lint/rules/workflow_rules.py +0 -0
  56. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parse.py +0 -0
  57. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/__init__.py +0 -0
  58. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/amrfinderplus.py +0 -0
  59. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/annotator.py +0 -0
  60. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/ariba.py +0 -0
  61. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/assembler.py +0 -0
  62. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/blast.py +0 -0
  63. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/citations.py +0 -0
  64. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/coverage.py +0 -0
  65. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/error.py +0 -0
  66. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/gather.py +0 -0
  67. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/generic.py +0 -0
  68. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/kraken.py +0 -0
  69. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/mapping.py +0 -0
  70. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/mlst.py +0 -0
  71. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/nextflow.py +0 -0
  72. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/parsables.py +0 -0
  73. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/qc.py +0 -0
  74. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/sketcher.py +0 -0
  75. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/variants.py +0 -0
  76. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/versions.py +0 -0
  77. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/parsers/workflows.py +0 -0
  78. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/reports/__init__.py +0 -0
  79. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/reports/templates/__init__.py +0 -0
  80. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/summary.py +0 -0
  81. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/__init__.py +0 -0
  82. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/logos.py +0 -0
  83. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/params.config.j2 +0 -0
  84. {bactopia-2.0.0 → bactopia-2.0.2}/bactopia/templates/nextflow/process.config.j2 +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: bactopia
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: A Python package for working with Bactopia
5
5
  License: MIT
6
6
  License-File: LICENSE
@@ -15,16 +15,16 @@ Classifier: Programming Language :: Python :: 3.11
15
15
  Classifier: Programming Language :: Python :: 3.12
16
16
  Classifier: Programming Language :: Python :: 3.13
17
17
  Classifier: Programming Language :: Python :: 3.14
18
- Requires-Dist: biopython (>=1.80,<2.0)
19
- Requires-Dist: jinja2 (>=3.1.6,<4.0.0)
20
- Requires-Dist: openpyxl (>=3.1.0,<4.0.0)
21
- Requires-Dist: pandas (>=2.2.0,<3.0.0)
22
- Requires-Dist: pyyaml (>=6.0,<7.0)
23
- Requires-Dist: rauth (>=0.7.3,<0.8.0)
24
- Requires-Dist: requests (>=2.28.2,<3.0.0)
25
- Requires-Dist: rich (>=13.3.1,<14.0.0)
26
- Requires-Dist: rich-click (>=1.6.1,<2.0.0)
27
- Requires-Dist: tqdm (>=4.66.5,<5.0.0)
18
+ Requires-Dist: biopython (>=1.80)
19
+ Requires-Dist: jinja2 (>=3.1.6)
20
+ Requires-Dist: openpyxl (>=3.1.0)
21
+ Requires-Dist: pandas (>=2.2.0)
22
+ Requires-Dist: pyyaml (>=6.0)
23
+ Requires-Dist: rauth (>=0.7.3)
24
+ Requires-Dist: requests (>=2.28.2)
25
+ Requires-Dist: rich (>=13.3.1)
26
+ Requires-Dist: rich-click (>=1.6.1)
27
+ Requires-Dist: tqdm (>=4.66.5)
28
28
  Project-URL: Homepage, https://bactopia.github.io/
29
29
  Project-URL: Repository, https://github.com/bactopia/bactopia-py
30
30
  Description-Content-Type: text/markdown
@@ -16,7 +16,9 @@ from rich.logging import RichHandler
16
16
  import bactopia
17
17
  from bactopia.nf import (
18
18
  find_main_nf,
19
+ get_bactopia_version,
19
20
  parse_groovydoc_full,
21
+ parse_includes,
20
22
  parse_main_nf_structure,
21
23
  parse_module_config_full,
22
24
  parse_workflow_config,
@@ -28,71 +30,6 @@ rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)
28
30
  click.rich_click.USE_RICH_MARKUP = True
29
31
 
30
32
 
31
- def _parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
32
- """Parse include statements from a main.nf file.
33
-
34
- Resolves source paths against the file's directory and the repo root
35
- to derive normalized component keys (lowercase, underscore-separated).
36
-
37
- Returns dict with:
38
- modules: list of module keys (e.g., "abricate_run")
39
- subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
40
- plugins: list of plugin function names
41
- """
42
- result = {"modules": [], "subworkflows": [], "plugins": []}
43
- if not main_nf.exists():
44
- return result
45
-
46
- try:
47
- text = main_nf.read_text()
48
- except OSError:
49
- return result
50
-
51
- seen_modules = set()
52
- seen_subworkflows = set()
53
-
54
- for m in re.finditer(
55
- r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
56
- text,
57
- ):
58
- source = m.group(2)
59
-
60
- if "plugin/" in source:
61
- result["plugins"].append(m.group(1))
62
- continue
63
-
64
- # Resolve the source path relative to the file's directory
65
- # Nextflow source paths omit .nf extension; parent of resolved path
66
- # is the component directory
67
- resolved = (main_nf.parent / source).resolve()
68
-
69
- try:
70
- rel_str = str(resolved.relative_to(bactopia_path))
71
- except ValueError:
72
- continue
73
-
74
- if rel_str.startswith("modules/"):
75
- # e.g., "modules/abricate/run/main" -> "abricate/run"
76
- component = rel_str.removeprefix("modules/")
77
- if component.endswith("/main"):
78
- component = component[:-5]
79
- key = component.replace("/", "_")
80
- if key not in seen_modules:
81
- seen_modules.add(key)
82
- result["modules"].append(key)
83
- elif rel_str.startswith("subworkflows/"):
84
- # e.g., "subworkflows/bactopia/gather/main" -> "bactopia/gather"
85
- component = rel_str.removeprefix("subworkflows/")
86
- if component.endswith("/main"):
87
- component = component[:-5]
88
- key = component.replace("/", "_")
89
- if key not in seen_subworkflows:
90
- seen_subworkflows.add(key)
91
- result["subworkflows"].append(key)
92
-
93
- return result
94
-
95
-
96
33
  def _extract_description(groovydoc: dict) -> str:
97
34
  """Extract the first line description from GroovyDoc raw lines."""
98
35
  if not groovydoc.get("has_doc") or not groovydoc.get("raw_lines"):
@@ -116,7 +53,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
116
53
  Dict mapping channel names to lists of field names, e.g.,
117
54
  {"sample_outputs": ["gff", "gbk", ...], "run_outputs": []}.
118
55
  """
119
- field_pattern = re.compile(r"\*\s*-\s*`(\w+)`\s*:")
56
+ field_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
120
57
  output_pattern = re.compile(r"\*\s*@output\s+(\S+)")
121
58
  tag_pattern = re.compile(r"\*\s*@(?!output)\w+")
122
59
 
@@ -140,7 +77,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
140
77
  if current_channel is not None:
141
78
  fm = field_pattern.search(line)
142
79
  if fm:
143
- channels[current_channel].append(fm.group(1))
80
+ channels[current_channel].append(fm.group(1).rstrip("?"))
144
81
 
145
82
  return channels
146
83
 
@@ -210,14 +147,16 @@ def _clean_scope(raw: str) -> str:
210
147
  return raw.strip().strip('"').strip("'")
211
148
 
212
149
 
213
- def _build_module_entry(component_name: str, main_nf: Path) -> dict:
150
+ def _build_module_entry(
151
+ component_name: str, main_nf: Path, bactopia_path: Path
152
+ ) -> dict:
214
153
  """Build a catalog entry for a module."""
215
154
  groovydoc = parse_groovydoc_full(main_nf)
216
155
  config = parse_module_config_full(main_nf.parent / "module.config")
217
156
 
218
157
  entry = {
219
158
  "description": _extract_description(groovydoc),
220
- "path": str(main_nf.parent.relative_to(main_nf.parents[3])) + "/",
159
+ "path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
221
160
  }
222
161
 
223
162
  # Scope and process_name from config
@@ -235,6 +174,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
235
174
  fields = groovydoc["doc_input_records"][0].get("fields", [])
236
175
  if fields:
237
176
  entry["takes"] = [f for f in fields if f != "meta"]
177
+ optional_input = groovydoc.get("doc_optional_input_fields", set())
178
+ if optional_input:
179
+ takes_opt = [f for f in entry["takes"] if f in optional_input]
180
+ if takes_opt:
181
+ entry["takes_optional"] = takes_opt
238
182
 
239
183
  # Emits from GroovyDoc @output (named fields only)
240
184
  if groovydoc.get("doc_output_fields"):
@@ -242,6 +186,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
242
186
  named = [f for f in groovydoc["doc_output_fields"] if f not in standard]
243
187
  if named:
244
188
  entry["emits"] = named
189
+ optional_output = groovydoc.get("doc_optional_output_fields", set())
190
+ if optional_output:
191
+ emits_opt = [f for f in named if f in optional_output]
192
+ if emits_opt:
193
+ entry["emits_optional"] = emits_opt
245
194
 
246
195
  # Tags from GroovyDoc @tags
247
196
  parsed_tags = _parse_tags(groovydoc)
@@ -256,11 +205,11 @@ def _build_subworkflow_entry(
256
205
  ) -> dict:
257
206
  """Build a catalog entry for a subworkflow."""
258
207
  groovydoc = parse_groovydoc_full(main_nf)
259
- includes = _parse_includes(main_nf, bactopia_path)
208
+ includes = parse_includes(main_nf, bactopia_path)
260
209
 
261
210
  entry = {
262
211
  "description": _extract_description(groovydoc),
263
- "path": str(main_nf.parent.relative_to(main_nf.parents[3])) + "/",
212
+ "path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
264
213
  }
265
214
 
266
215
  # Takes from GroovyDoc @input
@@ -268,8 +217,18 @@ def _build_subworkflow_entry(
268
217
  fields = groovydoc["doc_input_records"][0].get("fields", [])
269
218
  if fields:
270
219
  entry["takes"] = [f for f in fields if f != "meta"]
220
+ optional_input = groovydoc.get("doc_optional_input_fields", set())
221
+ if optional_input:
222
+ takes_opt = [f for f in entry["takes"] if f in optional_input]
223
+ if takes_opt:
224
+ entry["takes_optional"] = takes_opt
271
225
  if groovydoc.get("doc_input_params"):
272
226
  entry["takes_params"] = groovydoc["doc_input_params"]
227
+ optional_params = groovydoc.get("doc_optional_input_params", set())
228
+ if optional_params:
229
+ params_opt = [p for p in entry["takes_params"] if p in optional_params]
230
+ if params_opt:
231
+ entry["takes_params_optional"] = params_opt
273
232
 
274
233
  # Emits from GroovyDoc @output -- structured as channel -> fields dict
275
234
  tags = groovydoc.get("tags", {})
@@ -305,14 +264,14 @@ def _build_workflow_entry(
305
264
  ) -> dict:
306
265
  """Build a catalog entry for a workflow."""
307
266
  groovydoc = parse_groovydoc_full(main_nf)
308
- includes = _parse_includes(main_nf, bactopia_path)
267
+ includes = parse_includes(main_nf, bactopia_path)
309
268
 
310
269
  # Determine type
311
270
  is_tool = "bactopia-tools/" in str(main_nf)
312
- wf_path = str(main_nf.parent.relative_to(main_nf.parents[3 if is_tool else 2]))
271
+ wf_path = str(main_nf.parent.relative_to(bactopia_path))
313
272
  # Add trailing slash for tool/named workflow directories, but not for the
314
273
  # root bactopia workflow which uses a Nextflow convention path
315
- if is_tool or wf_path != "bactopia/bactopia":
274
+ if is_tool or wf_path != ".":
316
275
  wf_path += "/"
317
276
  entry = {
318
277
  "description": _extract_description(groovydoc),
@@ -345,14 +304,11 @@ def generate_catalog(bactopia_path: Path) -> dict:
345
304
  The catalog dict ready for JSON serialization.
346
305
  """
347
306
  # Extract versions from nextflow.config
348
- bactopia_version = "unknown"
307
+ bactopia_version = get_bactopia_version(bactopia_path)
349
308
  plugin_version = "unknown"
350
309
  nf_config = bactopia_path / "nextflow.config"
351
310
  if nf_config.exists():
352
311
  for line in nf_config.read_text().splitlines():
353
- m = re.match(r"\s*params\.bactopia_version\s*=\s*['\"]([^'\"]+)['\"]", line)
354
- if m:
355
- bactopia_version = m.group(1)
356
312
  m = re.match(r"\s*id\s+['\"]nf-bactopia@([^'\"]+)['\"]", line)
357
313
  if m:
358
314
  plugin_version = m.group(1)
@@ -381,7 +337,9 @@ def generate_catalog(bactopia_path: Path) -> dict:
381
337
  component_name = str(rel).replace("modules/", "")
382
338
  # Normalize key: slash to underscore (e.g., "abricate/run" -> "abricate_run")
383
339
  key = component_name.replace("/", "_")
384
- catalog["modules"][key] = _build_module_entry(component_name, main_nf)
340
+ catalog["modules"][key] = _build_module_entry(
341
+ component_name, main_nf, bactopia_path
342
+ )
385
343
 
386
344
  # Subworkflows
387
345
  subworkflows_dir = bactopia_path / "subworkflows"
@@ -291,7 +291,7 @@ PASSTHROUGH_OUTPUT_FIELDS = {"r1", "r2", "se", "lr"}
291
291
 
292
292
 
293
293
  def rule_m017(component: str, ctx: dict) -> list[LintResult]:
294
- """prefix = task.ext.prefix ?: "${meta.name}" present."""
294
+ """prefix = task.ext.prefix ?: "${_meta.name}" present."""
295
295
  rid = "M017"
296
296
  if ctx["structure"]["has_prefix_definition"]:
297
297
  return [_pass(rid, component, "prefix definition present")]
@@ -299,7 +299,7 @@ def rule_m017(component: str, ctx: dict) -> list[LintResult]:
299
299
  _fail(
300
300
  rid,
301
301
  component,
302
- 'Missing: prefix = task.ext.prefix ?: "${meta.name}"',
302
+ 'Missing: prefix = task.ext.prefix ?: "${_meta.name}"',
303
303
  )
304
304
  ]
305
305
 
@@ -958,6 +958,90 @@ def rule_m032(component: str, ctx: dict) -> list[LintResult]:
958
958
  return [_fail(rid, component, f"@input record field mismatch: {'; '.join(msgs)}")]
959
959
 
960
960
 
961
+ def rule_m033(component: str, ctx: dict) -> list[LintResult]:
962
+ """Optionality markers (?) match between GroovyDoc and code."""
963
+ rid = "M033"
964
+ doc = ctx["groovydoc"]
965
+ struct = ctx["structure"]
966
+ if not doc["has_doc"]:
967
+ return [] # M006 covers this
968
+
969
+ mismatches = []
970
+
971
+ # --- Input record field optionality ---
972
+ doc_records = doc.get("doc_input_records", [])
973
+ code_input_fields = struct.get("input_record_fields", [])
974
+ if doc_records and code_input_fields:
975
+ doc_optional = doc.get("doc_optional_input_fields", set())
976
+ code_optional = struct.get("code_optional_input_fields", set())
977
+ common = set(doc_records[0]["fields"]) & set(code_input_fields)
978
+ for field in sorted(common):
979
+ in_doc = field in doc_optional
980
+ in_code = field in code_optional
981
+ if in_doc and not in_code:
982
+ mismatches.append(
983
+ f"input record field '{field}': doc has ? but code does not"
984
+ )
985
+ elif in_code and not in_doc:
986
+ mismatches.append(
987
+ f"input record field '{field}': code has ? but doc does not"
988
+ )
989
+
990
+ # --- Input params optionality ---
991
+ doc_params = doc.get("doc_input_params", [])
992
+ code_params = struct.get("input_params", [])
993
+ if doc_params and code_params:
994
+ doc_opt_params = doc.get("doc_optional_input_params", set())
995
+ code_opt_params = struct.get("code_optional_input_params", set())
996
+ common = set(doc_params) & set(code_params)
997
+ for param in sorted(common):
998
+ in_doc = param in doc_opt_params
999
+ in_code = param in code_opt_params
1000
+ if in_doc and not in_code:
1001
+ mismatches.append(f"input param '{param}': doc has ? but code does not")
1002
+ elif in_code and not in_doc:
1003
+ mismatches.append(f"input param '{param}': code has ? but doc does not")
1004
+
1005
+ # --- Output record field optionality ---
1006
+ doc_output_fields = doc.get("doc_output_fields", [])
1007
+ code_output_fields = struct.get("output_record_fields", [])
1008
+ if doc_output_fields and code_output_fields:
1009
+ doc_opt_output = doc.get("doc_optional_output_fields", set())
1010
+ code_opt_output = struct.get("code_optional_output_fields", set())
1011
+ common = set(doc_output_fields) & set(code_output_fields)
1012
+ common -= STANDARD_OUTPUT_FIELDS
1013
+ for field in sorted(common):
1014
+ in_doc = field in doc_opt_output
1015
+ in_code = field in code_opt_output
1016
+ if in_doc and not in_code:
1017
+ mismatches.append(
1018
+ f"output field '{field}': doc has ? but code missing optional: true"
1019
+ )
1020
+ elif in_code and not in_doc:
1021
+ mismatches.append(
1022
+ f"output field '{field}': code has optional: true but doc missing ?"
1023
+ )
1024
+
1025
+ if mismatches:
1026
+ return [_fail(rid, component, f"Optionality mismatch: {'; '.join(mismatches)}")]
1027
+
1028
+ # Only PASS if there was something to check
1029
+ has_checks = (
1030
+ (doc_records and code_input_fields)
1031
+ or (doc_params and code_params)
1032
+ or (doc_output_fields and code_output_fields)
1033
+ )
1034
+ if has_checks:
1035
+ return [
1036
+ _pass(
1037
+ rid,
1038
+ component,
1039
+ "Optionality markers match between GroovyDoc and code",
1040
+ )
1041
+ ]
1042
+ return []
1043
+
1044
+
961
1045
  def rule_m034(component: str, ctx: dict) -> list[LintResult]:
962
1046
  """@output does not describe standard fields (meta, results, logs, nf_logs, versions)."""
963
1047
  rid = "M034"
@@ -1158,6 +1242,7 @@ MODULE_RULES = [
1158
1242
  # GroovyDoc accuracy
1159
1243
  rule_m031,
1160
1244
  rule_m032,
1245
+ rule_m033,
1161
1246
  rule_m034,
1162
1247
  rule_m035,
1163
1248
  rule_m036,
@@ -285,6 +285,252 @@ def rule_s016(component: str, ctx: dict) -> list[LintResult]:
285
285
  ]
286
286
 
287
287
 
288
+ def _parse_doc_component_list(tag_value: str) -> set[str]:
289
+ """Parse a @modules or @subworkflows tag value into a set of normalized names.
290
+
291
+ Handles comma-separated names with optional 'as alias' notation.
292
+ E.g., "prokka as prokka_module, csvtk_concat" -> {"prokka", "csvtk_concat"}
293
+ """
294
+ names = set()
295
+ if not tag_value:
296
+ return names
297
+ for entry in tag_value.split(","):
298
+ entry = entry.strip()
299
+ if not entry:
300
+ continue
301
+ # Handle "name as alias" notation -- extract base name
302
+ parts = entry.split()
303
+ if len(parts) >= 3 and parts[1] == "as":
304
+ names.add(parts[0])
305
+ else:
306
+ names.add(parts[0])
307
+ return names
308
+
309
+
310
+ def rule_s017(component: str, ctx: dict) -> list[LintResult]:
311
+ """@modules match actual module includes."""
312
+ rid = "S017"
313
+ doc = ctx["groovydoc"]
314
+ if not doc["has_doc"]:
315
+ return []
316
+ includes = ctx.get("includes", {})
317
+ actual_modules = set(includes.get("modules", []))
318
+ doc_value = doc["tags"].get("modules", "")
319
+ doc_modules = _parse_doc_component_list(doc_value)
320
+ # Skip if neither GroovyDoc nor includes mention modules
321
+ if not actual_modules and not doc_modules:
322
+ return []
323
+ if doc_modules == actual_modules:
324
+ return [_pass(rid, component, "@modules match actual includes")]
325
+ missing = actual_modules - doc_modules
326
+ extra = doc_modules - actual_modules
327
+ parts = []
328
+ if missing:
329
+ parts.append(f"missing from @modules: {', '.join(sorted(missing))}")
330
+ if extra:
331
+ parts.append(f"extra in @modules: {', '.join(sorted(extra))}")
332
+ return [_fail(rid, component, f"@modules mismatch: {'; '.join(parts)}")]
333
+
334
+
335
+ def rule_s018(component: str, ctx: dict) -> list[LintResult]:
336
+ """@subworkflows match actual subworkflow includes."""
337
+ rid = "S018"
338
+ doc = ctx["groovydoc"]
339
+ if not doc["has_doc"]:
340
+ return []
341
+ includes = ctx.get("includes", {})
342
+ actual_subs = set(includes.get("subworkflows", []))
343
+ doc_value = doc["tags"].get("subworkflows", "")
344
+ doc_subs = _parse_doc_component_list(doc_value)
345
+ # Skip if neither GroovyDoc nor includes mention subworkflows
346
+ if not actual_subs and not doc_subs:
347
+ return []
348
+ if doc_subs == actual_subs:
349
+ return [_pass(rid, component, "@subworkflows match actual includes")]
350
+ missing = actual_subs - doc_subs
351
+ extra = doc_subs - actual_subs
352
+ parts = []
353
+ if missing:
354
+ parts.append(f"missing from @subworkflows: {', '.join(sorted(missing))}")
355
+ if extra:
356
+ parts.append(f"extra in @subworkflows: {', '.join(sorted(extra))}")
357
+ return [_fail(rid, component, f"@subworkflows mismatch: {'; '.join(parts)}")]
358
+
359
+
360
+ def rule_s019(component: str, ctx: dict) -> list[LintResult]:
361
+ """@citation keys exist in data/citations.yml."""
362
+ rid = "S019"
363
+ doc = ctx["groovydoc"]
364
+ if not doc["has_doc"]:
365
+ return []
366
+ citation_value = doc["tags"].get("citation", "")
367
+ if not citation_value:
368
+ return [] # S003 covers missing @citation
369
+ citation_keys = ctx.get("citation_keys", set())
370
+ if not citation_keys:
371
+ return [] # citations.yml not available -- skip check
372
+ keys = [k.strip() for k in citation_value.split(",")]
373
+ invalid = [k for k in keys if k and k not in citation_keys]
374
+ if invalid:
375
+ return [
376
+ _fail(
377
+ rid,
378
+ component,
379
+ f"@citation keys not in citations.yml: {', '.join(invalid)}",
380
+ )
381
+ ]
382
+ return [_pass(rid, component, "All @citation keys are valid")]
383
+
384
+
385
+ def rule_s020(component: str, ctx: dict) -> list[LintResult]:
386
+ """@tags complexity value is valid."""
387
+ rid = "S020"
388
+ tags = ctx["groovydoc"]["tags"]
389
+ tags_value = tags.get("tags", "")
390
+ if not tags_value:
391
+ return []
392
+ parsed = _parse_tags_field(tags_value)
393
+ complexity = parsed.get("complexity", "")
394
+ if not complexity:
395
+ return []
396
+ valid = {"simple", "moderate", "complex"}
397
+ if complexity in valid:
398
+ return [_pass(rid, component, f"complexity:{complexity} is valid")]
399
+ return [
400
+ _warn(
401
+ rid,
402
+ component,
403
+ f"Invalid complexity value '{complexity}', expected one of: {', '.join(sorted(valid))}",
404
+ )
405
+ ]
406
+
407
+
408
+ def rule_s021(component: str, ctx: dict) -> list[LintResult]:
409
+ """@tags input-type value is valid."""
410
+ rid = "S021"
411
+ tags = ctx["groovydoc"]["tags"]
412
+ tags_value = tags.get("tags", "")
413
+ if not tags_value:
414
+ return []
415
+ parsed = _parse_tags_field(tags_value)
416
+ input_type = parsed.get("input-type", "")
417
+ if not input_type:
418
+ return []
419
+ valid = {"none", "single", "multiple", "parameter"}
420
+ if input_type in valid:
421
+ return [_pass(rid, component, f"input-type:{input_type} is valid")]
422
+ return [
423
+ _warn(
424
+ rid,
425
+ component,
426
+ f"Invalid input-type value '{input_type}', expected one of: {', '.join(sorted(valid))}",
427
+ )
428
+ ]
429
+
430
+
431
+ def rule_s022(component: str, ctx: dict) -> list[LintResult]:
432
+ """@tags output-type value is valid."""
433
+ rid = "S022"
434
+ tags = ctx["groovydoc"]["tags"]
435
+ tags_value = tags.get("tags", "")
436
+ if not tags_value:
437
+ return []
438
+ parsed = _parse_tags_field(tags_value)
439
+ output_type = parsed.get("output-type", "")
440
+ if not output_type:
441
+ return []
442
+ valid = {"single", "multiple"}
443
+ if output_type in valid:
444
+ return [_pass(rid, component, f"output-type:{output_type} is valid")]
445
+ return [
446
+ _warn(
447
+ rid,
448
+ component,
449
+ f"Invalid output-type value '{output_type}', expected one of: {', '.join(sorted(valid))}",
450
+ )
451
+ ]
452
+
453
+
454
+ VALID_FEATURES = {
455
+ "aggregation",
456
+ "alternative-execution",
457
+ "archive-output",
458
+ "components",
459
+ "compression",
460
+ "conditional-input",
461
+ "conditional-logic",
462
+ "database-dependent",
463
+ "internet-access",
464
+ "no-test",
465
+ "resource-download",
466
+ }
467
+
468
+
469
+ def rule_s023(component: str, ctx: dict) -> list[LintResult]:
470
+ """@tags features values are valid."""
471
+ rid = "S023"
472
+ tags = ctx["groovydoc"]["tags"]
473
+ tags_value = tags.get("tags", "")
474
+ if not tags_value:
475
+ return []
476
+ parsed = _parse_tags_field(tags_value)
477
+ features = parsed.get("features", "")
478
+ if not features:
479
+ return []
480
+ feature_list = [f.strip() for f in features.split(",")]
481
+ invalid = [f for f in feature_list if f and f not in VALID_FEATURES]
482
+ if invalid:
483
+ return [
484
+ _fail(
485
+ rid,
486
+ component,
487
+ f"Invalid feature values: {', '.join(invalid)} "
488
+ f"(valid: {', '.join(sorted(VALID_FEATURES))})",
489
+ )
490
+ ]
491
+ return [_pass(rid, component, "All feature values are valid")]
492
+
493
+
494
+ # Canonical tag order for subworkflows
495
+ SUBWORKFLOW_TAG_ORDER = [
496
+ "status",
497
+ "keywords",
498
+ "tags",
499
+ "citation",
500
+ "modules",
501
+ "subworkflows",
502
+ "note",
503
+ "input",
504
+ "output",
505
+ ]
506
+
507
+
508
+ def rule_s024(component: str, ctx: dict) -> list[LintResult]:
509
+ """GroovyDoc tag ordering."""
510
+ rid = "S024"
511
+ doc = ctx["groovydoc"]
512
+ if not doc["has_doc"]:
513
+ return []
514
+ actual_order = doc.get("doc_tag_order", [])
515
+ if not actual_order:
516
+ return []
517
+ known_order = [t for t in actual_order if t in SUBWORKFLOW_TAG_ORDER]
518
+ expected_positions = {t: i for i, t in enumerate(SUBWORKFLOW_TAG_ORDER)}
519
+ for i in range(len(known_order) - 1):
520
+ curr = known_order[i]
521
+ nxt = known_order[i + 1]
522
+ if expected_positions[curr] > expected_positions[nxt]:
523
+ return [
524
+ _warn(
525
+ rid,
526
+ component,
527
+ f"Tag ordering incorrect: @{curr} appears before @{nxt} "
528
+ f"(expected: {' -> '.join('@' + t for t in SUBWORKFLOW_TAG_ORDER if t in known_order)})",
529
+ )
530
+ ]
531
+ return [_pass(rid, component, "GroovyDoc tag ordering is correct")]
532
+
533
+
288
534
  SUBWORKFLOW_RULES = [
289
535
  rule_s001,
290
536
  rule_s002,
@@ -302,4 +548,12 @@ SUBWORKFLOW_RULES = [
302
548
  rule_s014,
303
549
  rule_s015,
304
550
  rule_s016,
551
+ rule_s017,
552
+ rule_s018,
553
+ rule_s019,
554
+ rule_s020,
555
+ rule_s021,
556
+ rule_s022,
557
+ rule_s023,
558
+ rule_s024,
305
559
  ]
@@ -12,6 +12,7 @@ from bactopia.nf import (
12
12
  check_file_whitespace,
13
13
  find_main_nf,
14
14
  parse_groovydoc_full,
15
+ parse_includes,
15
16
  parse_main_nf_structure,
16
17
  parse_module_config_full,
17
18
  parse_schema_json,
@@ -220,6 +221,9 @@ def run_lint(
220
221
  if "/utils/" in component_name:
221
222
  continue
222
223
  ctx = _build_simple_context(main_nf)
224
+ ctx["citation_keys"] = citation_keys
225
+ ctx["bactopia_path"] = bactopia_path
226
+ ctx["includes"] = parse_includes(main_nf, bactopia_path)
223
227
  ignored = _collect_ignores(main_nf.parent)
224
228
  results = _run_rules(component_name, ctx, SUBWORKFLOW_RULES, ignored)
225
229
  all_results.extend(results)