PyPI - bactopia - Versions diffs - 2.0.1__tar.gz → 2.0.2__tar.gz - Mend

bactopia 2.0.1tar.gz → 2.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (84) hide show

{bactopia-2.0.1 → bactopia-2.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: bactopia
-Version: 2.0.1
+Version: 2.0.2
 Summary: A Python package for working with Bactopia
 License: MIT
 License-File: LICENSE

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/cli/catalog.py RENAMED Viewed

@@ -18,6 +18,7 @@ from bactopia.nf import (
     find_main_nf,
     get_bactopia_version,
     parse_groovydoc_full,
+    parse_includes,
     parse_main_nf_structure,
     parse_module_config_full,
     parse_workflow_config,
@@ -29,71 +30,6 @@ rich.traceback.install(console=stderr, width=200, word_wrap=True, extra_lines=1)
 click.rich_click.USE_RICH_MARKUP = True
-def _parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
-    """Parse include statements from a main.nf file.
-    Resolves source paths against the file's directory and the repo root
-    to derive normalized component keys (lowercase, underscore-separated).
-    Returns dict with:
-        modules: list of module keys (e.g., "abricate_run")
-        subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
-        plugins: list of plugin function names
-    """
-    result = {"modules": [], "subworkflows": [], "plugins": []}
-    if not main_nf.exists():
-        return result
-    try:
-        text = main_nf.read_text()
-    except OSError:
-        return result
-    seen_modules = set()
-    seen_subworkflows = set()
-    for m in re.finditer(
-        r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
-        text,
-    ):
-        source = m.group(2)
-        if "plugin/" in source:
-            result["plugins"].append(m.group(1))
-            continue
-        # Resolve the source path relative to the file's directory
-        # Nextflow source paths omit .nf extension; parent of resolved path
-        # is the component directory
-        resolved = (main_nf.parent / source).resolve()
-        try:
-            rel_str = str(resolved.relative_to(bactopia_path))
-        except ValueError:
-            continue
-        if rel_str.startswith("modules/"):
-            # e.g., "modules/abricate/run/main" -> "abricate/run"
-            component = rel_str.removeprefix("modules/")
-            if component.endswith("/main"):
-                component = component[:-5]
-            key = component.replace("/", "_")
-            if key not in seen_modules:
-                seen_modules.add(key)
-                result["modules"].append(key)
-        elif rel_str.startswith("subworkflows/"):
-            # e.g., "subworkflows/bactopia/gather/main" -> "bactopia/gather"
-            component = rel_str.removeprefix("subworkflows/")
-            if component.endswith("/main"):
-                component = component[:-5]
-            key = component.replace("/", "_")
-            if key not in seen_subworkflows:
-                seen_subworkflows.add(key)
-                result["subworkflows"].append(key)
-    return result
 def _extract_description(groovydoc: dict) -> str:
     """Extract the first line description from GroovyDoc raw lines."""
     if not groovydoc.get("has_doc") or not groovydoc.get("raw_lines"):
@@ -117,7 +53,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
         Dict mapping channel names to lists of field names, e.g.,
         {"sample_outputs": ["gff", "gbk", ...], "run_outputs": []}.
     """
-    field_pattern = re.compile(r"\*\s*-\s*`(\w+)`\s*:")
+    field_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
     output_pattern = re.compile(r"\*\s*@output\s+(\S+)")
     tag_pattern = re.compile(r"\*\s*@(?!output)\w+")
@@ -141,7 +77,7 @@ def _parse_output_fields(raw_lines: list[str]) -> dict[str, list[str]]:
         if current_channel is not None:
             fm = field_pattern.search(line)
             if fm:
-                channels[current_channel].append(fm.group(1))
+                channels[current_channel].append(fm.group(1).rstrip("?"))
     return channels
@@ -211,14 +147,16 @@ def _clean_scope(raw: str) -> str:
     return raw.strip().strip('"').strip("'")
-def _build_module_entry(component_name: str, main_nf: Path) -> dict:
+def _build_module_entry(
+    component_name: str, main_nf: Path, bactopia_path: Path
+) -> dict:
     """Build a catalog entry for a module."""
     groovydoc = parse_groovydoc_full(main_nf)
     config = parse_module_config_full(main_nf.parent / "module.config")
     entry = {
         "description": _extract_description(groovydoc),
-        "path": str(main_nf.parent.relative_to(main_nf.parents[3])) + "/",
+        "path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
     }
     # Scope and process_name from config
@@ -236,6 +174,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
         fields = groovydoc["doc_input_records"][0].get("fields", [])
         if fields:
             entry["takes"] = [f for f in fields if f != "meta"]
+            optional_input = groovydoc.get("doc_optional_input_fields", set())
+            if optional_input:
+                takes_opt = [f for f in entry["takes"] if f in optional_input]
+                if takes_opt:
+                    entry["takes_optional"] = takes_opt
     # Emits from GroovyDoc @output (named fields only)
     if groovydoc.get("doc_output_fields"):
@@ -243,6 +186,11 @@ def _build_module_entry(component_name: str, main_nf: Path) -> dict:
         named = [f for f in groovydoc["doc_output_fields"] if f not in standard]
         if named:
             entry["emits"] = named
+            optional_output = groovydoc.get("doc_optional_output_fields", set())
+            if optional_output:
+                emits_opt = [f for f in named if f in optional_output]
+                if emits_opt:
+                    entry["emits_optional"] = emits_opt
     # Tags from GroovyDoc @tags
     parsed_tags = _parse_tags(groovydoc)
@@ -257,11 +205,11 @@ def _build_subworkflow_entry(
 ) -> dict:
     """Build a catalog entry for a subworkflow."""
     groovydoc = parse_groovydoc_full(main_nf)
-    includes = _parse_includes(main_nf, bactopia_path)
+    includes = parse_includes(main_nf, bactopia_path)
     entry = {
         "description": _extract_description(groovydoc),
-        "path": str(main_nf.parent.relative_to(main_nf.parents[3])) + "/",
+        "path": str(main_nf.parent.relative_to(bactopia_path)) + "/",
     }
     # Takes from GroovyDoc @input
@@ -269,8 +217,18 @@ def _build_subworkflow_entry(
         fields = groovydoc["doc_input_records"][0].get("fields", [])
         if fields:
             entry["takes"] = [f for f in fields if f != "meta"]
+            optional_input = groovydoc.get("doc_optional_input_fields", set())
+            if optional_input:
+                takes_opt = [f for f in entry["takes"] if f in optional_input]
+                if takes_opt:
+                    entry["takes_optional"] = takes_opt
     if groovydoc.get("doc_input_params"):
         entry["takes_params"] = groovydoc["doc_input_params"]
+        optional_params = groovydoc.get("doc_optional_input_params", set())
+        if optional_params:
+            params_opt = [p for p in entry["takes_params"] if p in optional_params]
+            if params_opt:
+                entry["takes_params_optional"] = params_opt
     # Emits from GroovyDoc @output -- structured as channel -> fields dict
     tags = groovydoc.get("tags", {})
@@ -306,14 +264,14 @@ def _build_workflow_entry(
 ) -> dict:
     """Build a catalog entry for a workflow."""
     groovydoc = parse_groovydoc_full(main_nf)
-    includes = _parse_includes(main_nf, bactopia_path)
+    includes = parse_includes(main_nf, bactopia_path)
     # Determine type
     is_tool = "bactopia-tools/" in str(main_nf)
-    wf_path = str(main_nf.parent.relative_to(main_nf.parents[3 if is_tool else 2]))
+    wf_path = str(main_nf.parent.relative_to(bactopia_path))
     # Add trailing slash for tool/named workflow directories, but not for the
     # root bactopia workflow which uses a Nextflow convention path
-    if is_tool or wf_path != "bactopia/bactopia":
+    if is_tool or wf_path != ".":
         wf_path += "/"
     entry = {
         "description": _extract_description(groovydoc),
@@ -379,7 +337,9 @@ def generate_catalog(bactopia_path: Path) -> dict:
             component_name = str(rel).replace("modules/", "")
         # Normalize key: slash to underscore (e.g., "abricate/run" -> "abricate_run")
         key = component_name.replace("/", "_")
-        catalog["modules"][key] = _build_module_entry(component_name, main_nf)
+        catalog["modules"][key] = _build_module_entry(
+            component_name, main_nf, bactopia_path
+        )
     # Subworkflows
     subworkflows_dir = bactopia_path / "subworkflows"

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/module_rules.py RENAMED Viewed

@@ -291,7 +291,7 @@ PASSTHROUGH_OUTPUT_FIELDS = {"r1", "r2", "se", "lr"}
 def rule_m017(component: str, ctx: dict) -> list[LintResult]:
-    """prefix = task.ext.prefix ?: "${meta.name}" present."""
+    """prefix = task.ext.prefix ?: "${_meta.name}" present."""
     rid = "M017"
     if ctx["structure"]["has_prefix_definition"]:
         return [_pass(rid, component, "prefix definition present")]
@@ -299,7 +299,7 @@ def rule_m017(component: str, ctx: dict) -> list[LintResult]:
         _fail(
             rid,
             component,
-            'Missing: prefix = task.ext.prefix ?: "${meta.name}"',
+            'Missing: prefix = task.ext.prefix ?: "${_meta.name}"',
         )
     ]
@@ -958,6 +958,90 @@ def rule_m032(component: str, ctx: dict) -> list[LintResult]:
     return [_fail(rid, component, f"@input record field mismatch: {'; '.join(msgs)}")]
+def rule_m033(component: str, ctx: dict) -> list[LintResult]:
+    """Optionality markers (?) match between GroovyDoc and code."""
+    rid = "M033"
+    doc = ctx["groovydoc"]
+    struct = ctx["structure"]
+    if not doc["has_doc"]:
+        return []  # M006 covers this
+    mismatches = []
+    # --- Input record field optionality ---
+    doc_records = doc.get("doc_input_records", [])
+    code_input_fields = struct.get("input_record_fields", [])
+    if doc_records and code_input_fields:
+        doc_optional = doc.get("doc_optional_input_fields", set())
+        code_optional = struct.get("code_optional_input_fields", set())
+        common = set(doc_records[0]["fields"]) & set(code_input_fields)
+        for field in sorted(common):
+            in_doc = field in doc_optional
+            in_code = field in code_optional
+            if in_doc and not in_code:
+                mismatches.append(
+                    f"input record field '{field}': doc has ? but code does not"
+                )
+            elif in_code and not in_doc:
+                mismatches.append(
+                    f"input record field '{field}': code has ? but doc does not"
+                )
+    # --- Input params optionality ---
+    doc_params = doc.get("doc_input_params", [])
+    code_params = struct.get("input_params", [])
+    if doc_params and code_params:
+        doc_opt_params = doc.get("doc_optional_input_params", set())
+        code_opt_params = struct.get("code_optional_input_params", set())
+        common = set(doc_params) & set(code_params)
+        for param in sorted(common):
+            in_doc = param in doc_opt_params
+            in_code = param in code_opt_params
+            if in_doc and not in_code:
+                mismatches.append(f"input param '{param}': doc has ? but code does not")
+            elif in_code and not in_doc:
+                mismatches.append(f"input param '{param}': code has ? but doc does not")
+    # --- Output record field optionality ---
+    doc_output_fields = doc.get("doc_output_fields", [])
+    code_output_fields = struct.get("output_record_fields", [])
+    if doc_output_fields and code_output_fields:
+        doc_opt_output = doc.get("doc_optional_output_fields", set())
+        code_opt_output = struct.get("code_optional_output_fields", set())
+        common = set(doc_output_fields) & set(code_output_fields)
+        common -= STANDARD_OUTPUT_FIELDS
+        for field in sorted(common):
+            in_doc = field in doc_opt_output
+            in_code = field in code_opt_output
+            if in_doc and not in_code:
+                mismatches.append(
+                    f"output field '{field}': doc has ? but code missing optional: true"
+                )
+            elif in_code and not in_doc:
+                mismatches.append(
+                    f"output field '{field}': code has optional: true but doc missing ?"
+                )
+    if mismatches:
+        return [_fail(rid, component, f"Optionality mismatch: {'; '.join(mismatches)}")]
+    # Only PASS if there was something to check
+    has_checks = (
+        (doc_records and code_input_fields)
+        or (doc_params and code_params)
+        or (doc_output_fields and code_output_fields)
+    )
+    if has_checks:
+        return [
+            _pass(
+                rid,
+                component,
+                "Optionality markers match between GroovyDoc and code",
+            )
+        ]
+    return []
 def rule_m034(component: str, ctx: dict) -> list[LintResult]:
     """@output does not describe standard fields (meta, results, logs, nf_logs, versions)."""
     rid = "M034"
@@ -1158,6 +1242,7 @@ MODULE_RULES = [
     # GroovyDoc accuracy
     rule_m031,
     rule_m032,
+    rule_m033,
     rule_m034,
     rule_m035,
     rule_m036,

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/rules/subworkflow_rules.py RENAMED Viewed

@@ -285,6 +285,252 @@ def rule_s016(component: str, ctx: dict) -> list[LintResult]:
     ]
+def _parse_doc_component_list(tag_value: str) -> set[str]:
+    """Parse a @modules or @subworkflows tag value into a set of normalized names.
+    Handles comma-separated names with optional 'as alias' notation.
+    E.g., "prokka as prokka_module, csvtk_concat" -> {"prokka", "csvtk_concat"}
+    """
+    names = set()
+    if not tag_value:
+        return names
+    for entry in tag_value.split(","):
+        entry = entry.strip()
+        if not entry:
+            continue
+        # Handle "name as alias" notation -- extract base name
+        parts = entry.split()
+        if len(parts) >= 3 and parts[1] == "as":
+            names.add(parts[0])
+        else:
+            names.add(parts[0])
+    return names
+def rule_s017(component: str, ctx: dict) -> list[LintResult]:
+    """@modules match actual module includes."""
+    rid = "S017"
+    doc = ctx["groovydoc"]
+    if not doc["has_doc"]:
+        return []
+    includes = ctx.get("includes", {})
+    actual_modules = set(includes.get("modules", []))
+    doc_value = doc["tags"].get("modules", "")
+    doc_modules = _parse_doc_component_list(doc_value)
+    # Skip if neither GroovyDoc nor includes mention modules
+    if not actual_modules and not doc_modules:
+        return []
+    if doc_modules == actual_modules:
+        return [_pass(rid, component, "@modules match actual includes")]
+    missing = actual_modules - doc_modules
+    extra = doc_modules - actual_modules
+    parts = []
+    if missing:
+        parts.append(f"missing from @modules: {', '.join(sorted(missing))}")
+    if extra:
+        parts.append(f"extra in @modules: {', '.join(sorted(extra))}")
+    return [_fail(rid, component, f"@modules mismatch: {'; '.join(parts)}")]
+def rule_s018(component: str, ctx: dict) -> list[LintResult]:
+    """@subworkflows match actual subworkflow includes."""
+    rid = "S018"
+    doc = ctx["groovydoc"]
+    if not doc["has_doc"]:
+        return []
+    includes = ctx.get("includes", {})
+    actual_subs = set(includes.get("subworkflows", []))
+    doc_value = doc["tags"].get("subworkflows", "")
+    doc_subs = _parse_doc_component_list(doc_value)
+    # Skip if neither GroovyDoc nor includes mention subworkflows
+    if not actual_subs and not doc_subs:
+        return []
+    if doc_subs == actual_subs:
+        return [_pass(rid, component, "@subworkflows match actual includes")]
+    missing = actual_subs - doc_subs
+    extra = doc_subs - actual_subs
+    parts = []
+    if missing:
+        parts.append(f"missing from @subworkflows: {', '.join(sorted(missing))}")
+    if extra:
+        parts.append(f"extra in @subworkflows: {', '.join(sorted(extra))}")
+    return [_fail(rid, component, f"@subworkflows mismatch: {'; '.join(parts)}")]
+def rule_s019(component: str, ctx: dict) -> list[LintResult]:
+    """@citation keys exist in data/citations.yml."""
+    rid = "S019"
+    doc = ctx["groovydoc"]
+    if not doc["has_doc"]:
+        return []
+    citation_value = doc["tags"].get("citation", "")
+    if not citation_value:
+        return []  # S003 covers missing @citation
+    citation_keys = ctx.get("citation_keys", set())
+    if not citation_keys:
+        return []  # citations.yml not available -- skip check
+    keys = [k.strip() for k in citation_value.split(",")]
+    invalid = [k for k in keys if k and k not in citation_keys]
+    if invalid:
+        return [
+            _fail(
+                rid,
+                component,
+                f"@citation keys not in citations.yml: {', '.join(invalid)}",
+            )
+        ]
+    return [_pass(rid, component, "All @citation keys are valid")]
+def rule_s020(component: str, ctx: dict) -> list[LintResult]:
+    """@tags complexity value is valid."""
+    rid = "S020"
+    tags = ctx["groovydoc"]["tags"]
+    tags_value = tags.get("tags", "")
+    if not tags_value:
+        return []
+    parsed = _parse_tags_field(tags_value)
+    complexity = parsed.get("complexity", "")
+    if not complexity:
+        return []
+    valid = {"simple", "moderate", "complex"}
+    if complexity in valid:
+        return [_pass(rid, component, f"complexity:{complexity} is valid")]
+    return [
+        _warn(
+            rid,
+            component,
+            f"Invalid complexity value '{complexity}', expected one of: {', '.join(sorted(valid))}",
+        )
+    ]
+def rule_s021(component: str, ctx: dict) -> list[LintResult]:
+    """@tags input-type value is valid."""
+    rid = "S021"
+    tags = ctx["groovydoc"]["tags"]
+    tags_value = tags.get("tags", "")
+    if not tags_value:
+        return []
+    parsed = _parse_tags_field(tags_value)
+    input_type = parsed.get("input-type", "")
+    if not input_type:
+        return []
+    valid = {"none", "single", "multiple", "parameter"}
+    if input_type in valid:
+        return [_pass(rid, component, f"input-type:{input_type} is valid")]
+    return [
+        _warn(
+            rid,
+            component,
+            f"Invalid input-type value '{input_type}', expected one of: {', '.join(sorted(valid))}",
+        )
+    ]
+def rule_s022(component: str, ctx: dict) -> list[LintResult]:
+    """@tags output-type value is valid."""
+    rid = "S022"
+    tags = ctx["groovydoc"]["tags"]
+    tags_value = tags.get("tags", "")
+    if not tags_value:
+        return []
+    parsed = _parse_tags_field(tags_value)
+    output_type = parsed.get("output-type", "")
+    if not output_type:
+        return []
+    valid = {"single", "multiple"}
+    if output_type in valid:
+        return [_pass(rid, component, f"output-type:{output_type} is valid")]
+    return [
+        _warn(
+            rid,
+            component,
+            f"Invalid output-type value '{output_type}', expected one of: {', '.join(sorted(valid))}",
+        )
+    ]
+VALID_FEATURES = {
+    "aggregation",
+    "alternative-execution",
+    "archive-output",
+    "components",
+    "compression",
+    "conditional-input",
+    "conditional-logic",
+    "database-dependent",
+    "internet-access",
+    "no-test",
+    "resource-download",
+}
+def rule_s023(component: str, ctx: dict) -> list[LintResult]:
+    """@tags features values are valid."""
+    rid = "S023"
+    tags = ctx["groovydoc"]["tags"]
+    tags_value = tags.get("tags", "")
+    if not tags_value:
+        return []
+    parsed = _parse_tags_field(tags_value)
+    features = parsed.get("features", "")
+    if not features:
+        return []
+    feature_list = [f.strip() for f in features.split(",")]
+    invalid = [f for f in feature_list if f and f not in VALID_FEATURES]
+    if invalid:
+        return [
+            _fail(
+                rid,
+                component,
+                f"Invalid feature values: {', '.join(invalid)} "
+                f"(valid: {', '.join(sorted(VALID_FEATURES))})",
+            )
+        ]
+    return [_pass(rid, component, "All feature values are valid")]
+# Canonical tag order for subworkflows
+SUBWORKFLOW_TAG_ORDER = [
+    "status",
+    "keywords",
+    "tags",
+    "citation",
+    "modules",
+    "subworkflows",
+    "note",
+    "input",
+    "output",
+]
+def rule_s024(component: str, ctx: dict) -> list[LintResult]:
+    """GroovyDoc tag ordering."""
+    rid = "S024"
+    doc = ctx["groovydoc"]
+    if not doc["has_doc"]:
+        return []
+    actual_order = doc.get("doc_tag_order", [])
+    if not actual_order:
+        return []
+    known_order = [t for t in actual_order if t in SUBWORKFLOW_TAG_ORDER]
+    expected_positions = {t: i for i, t in enumerate(SUBWORKFLOW_TAG_ORDER)}
+    for i in range(len(known_order) - 1):
+        curr = known_order[i]
+        nxt = known_order[i + 1]
+        if expected_positions[curr] > expected_positions[nxt]:
+            return [
+                _warn(
+                    rid,
+                    component,
+                    f"Tag ordering incorrect: @{curr} appears before @{nxt} "
+                    f"(expected: {' -> '.join('@' + t for t in SUBWORKFLOW_TAG_ORDER if t in known_order)})",
+                )
+            ]
+    return [_pass(rid, component, "GroovyDoc tag ordering is correct")]
 SUBWORKFLOW_RULES = [
     rule_s001,
     rule_s002,
@@ -302,4 +548,12 @@ SUBWORKFLOW_RULES = [
     rule_s014,
     rule_s015,
     rule_s016,
+    rule_s017,
+    rule_s018,
+    rule_s019,
+    rule_s020,
+    rule_s021,
+    rule_s022,
+    rule_s023,
+    rule_s024,
 ]

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/lint/runner.py RENAMED Viewed

@@ -12,6 +12,7 @@ from bactopia.nf import (
     check_file_whitespace,
     find_main_nf,
     parse_groovydoc_full,
+    parse_includes,
     parse_main_nf_structure,
     parse_module_config_full,
     parse_schema_json,
@@ -220,6 +221,9 @@ def run_lint(
             if "/utils/" in component_name:
                 continue
             ctx = _build_simple_context(main_nf)
+            ctx["citation_keys"] = citation_keys
+            ctx["bactopia_path"] = bactopia_path
+            ctx["includes"] = parse_includes(main_nf, bactopia_path)
             ignored = _collect_ignores(main_nf.parent)
             results = _run_rules(component_name, ctx, SUBWORKFLOW_RULES, ignored)
             all_results.extend(results)

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/nf.py RENAMED Viewed

@@ -281,6 +281,11 @@ def parse_workflows(bactopia_path, input_wf, include_merlin=False, build_all=Fal
                     # Fallback: derive path from key
                     module_path = f"modules/{module.replace('_', '/')}"
                     config_path = f"{bactopia_path}/{module_path}/module.config"
+                if not Path(config_path).exists():
+                    logging.warning(
+                        f"module.config not found at {config_path} for module '{module}', skipping"
+                    )
+                    continue
                 logging.debug(f"Adding module {module}: {config_path}")
                 final_workflows[wf][module] = config_path
@@ -494,11 +499,15 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
         "raw_lines": [],
         "links": [],
         # Parsed GroovyDoc fields for lint rules M031-M037
-        "doc_output_fields": [],  # field names from @output record(...)
-        "doc_input_records": [],  # list of {fields: [...]} per @input record(...)
-        "doc_input_params": [],  # non-record @input names
-        "doc_output_described_fields": [],  # fields with description lines
+        "doc_output_fields": [],  # field names from @output record(...), ? stripped
+        "doc_input_records": [],  # list of {fields: [...]} per @input record(...), ? stripped
+        "doc_input_params": [],  # non-record @input names, ? stripped
+        "doc_output_described_fields": [],  # fields with description lines, ? stripped
         "doc_tag_order": [],  # ordered list of tag names as they appear
+        # Optionality tracking (base names of fields that had ? suffix in GroovyDoc)
+        "doc_optional_output_fields": set(),
+        "doc_optional_input_fields": set(),
+        "doc_optional_input_params": set(),
     }
     lines = _read_lines(main_nf)
     if not lines:
@@ -511,19 +520,30 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
     result["raw_lines"] = doc_lines
     # Extract tags with their values
+    # Multi-value tags are stored as lists; single-value tags as strings.
+    # Continuation lines (lines with * but no @tag) are appended to the
+    # previous single-value tag (e.g., multi-line @modules or @subworkflows).
+    multi_value_tags = {"input", "output", "note", "publish", "section", "results"}
     tag_pattern = re.compile(r"\*\s*@(\w+)\s*(.*)")
+    continuation_pattern = re.compile(r"\*\s+([^@\s].+)")
+    last_single_tag = None
     for line in doc_lines:
         m = tag_pattern.search(line)
         if m:
             tag_name = m.group(1)
             tag_value = m.group(2).strip()
-            # For tags that can appear multiple times (input, output, note),
-            # store as a list
-            if tag_name in ("input", "output", "note", "publish", "section", "results"):
+            if tag_name in multi_value_tags:
                 result["tags"].setdefault(tag_name, [])
                 result["tags"][tag_name].append(tag_value)
+                last_single_tag = None
             else:
                 result["tags"][tag_name] = tag_value
+                last_single_tag = tag_name
+        elif last_single_tag:
+            # Continuation line for a single-value tag
+            cm = continuation_pattern.search(line)
+            if cm:
+                result["tags"][last_single_tag] += " " + cm.group(1).strip()
     # Extract URLs
     url_pattern = re.compile(r"https?://[^\s\)>]+")
@@ -546,31 +566,52 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
                 seen_tags.append(tag_name)
     result["doc_tag_order"] = seen_tags
-    # Parse @output record(...) fields
+    # Parse @output record(...) fields (strip ? suffix, track optionality)
     output_tags = result["tags"].get("output", [])
     for oval in output_tags:
         record_match = re.match(r"record\(([^)]+)\)", oval)
         if record_match:
-            fields = [f.strip() for f in record_match.group(1).split(",")]
+            fields = []
+            for raw in record_match.group(1).split(","):
+                raw = raw.strip()
+                if raw.endswith("?"):
+                    base = raw[:-1]
+                    result["doc_optional_output_fields"].add(base)
+                    fields.append(base)
+                else:
+                    fields.append(raw)
             result["doc_output_fields"] = fields
-    # Parse @input blocks
+    # Parse @input blocks (strip ? suffix, track optionality)
     input_tags = result["tags"].get("input", [])
     for ival in input_tags:
         # Check for record(meta, ...) syntax
         record_match = re.match(r"record\(([^)]+)\)", ival)
         if record_match:
-            fields = [f.strip() for f in record_match.group(1).split(",")]
+            fields = []
+            for raw in record_match.group(1).split(","):
+                raw = raw.strip()
+                if raw.endswith("?"):
+                    base = raw[:-1]
+                    result["doc_optional_input_fields"].add(base)
+                    fields.append(base)
+                else:
+                    fields.append(raw)
             result["doc_input_records"].append({"fields": fields})
         else:
-            # Non-record input (e.g., "db", "proteins")
+            # Non-record input (e.g., "db", "proteins", "proteins?")
             param_name = ival.split()[0] if ival.strip() else ""
             if param_name:
-                result["doc_input_params"].append(param_name)
+                if param_name.endswith("?"):
+                    base = param_name[:-1]
+                    result["doc_optional_input_params"].add(base)
+                    result["doc_input_params"].append(base)
+                else:
+                    result["doc_input_params"].append(param_name)
     # Parse @output description lines to find which fields are described
-    # Pattern: * - `field`: description
-    desc_pattern = re.compile(r"\*\s*-\s*`(\w+)`\s*:")
+    # Pattern: * - `field`: description  (field may have ? suffix)
+    desc_pattern = re.compile(r"\*\s*-\s*`(\w+\??)`\s*:")
     in_output_section = False
     for line in doc_lines:
         if re.search(r"\*\s*@output", line):
@@ -582,7 +623,8 @@ def parse_groovydoc_full(main_nf: Path) -> dict:
         if in_output_section:
             dm = desc_pattern.search(line)
             if dm:
-                result["doc_output_described_fields"].append(dm.group(1))
+                field_name = dm.group(1).rstrip("?")
+                result["doc_output_described_fields"].append(field_name)
     return result
@@ -634,9 +676,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
         "output_has_nf_logs": False,
         "output_versions_uses_files": False,
         "output_generic_using_file": [],
-        # Input parsing for M031/M032
+        # Input parsing for M031/M032/M033
         "input_record_fields": [],  # fields from (meta: Map, field: Type): Record
         "input_params": [],  # non-record input names (db, proteins, etc.)
+        # Optionality tracking for M033
+        "code_optional_input_fields": set(),  # input record fields with Type?
+        "code_optional_input_params": set(),  # non-record input params with Type?
+        "code_optional_output_fields": set(),  # output fields with optional: true
         # Workflow-specific fields (W011-W020)
         "first_line": "",
         "todos": [],  # list of {"line_num": int, "text": str}
@@ -719,6 +765,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
                 prefix_text = record_text[line_start : fm.start()].strip()
                 if not prefix_text.startswith("//"):
                     result["output_record_fields"].append(field_name)
+                    # Check for optional: true on the same line (M033)
+                    line_end = record_text.find("\n", fm.end())
+                    if line_end == -1:
+                        line_end = len(record_text)
+                    rest_of_line = record_text[fm.end() : line_end]
+                    if re.search(r"optional\s*:\s*true", rest_of_line):
+                        result["code_optional_output_fields"].add(field_name)
             # --- Output record detail parsing (M023-M030) ---
@@ -827,8 +880,11 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
         if record_input_match:
             for part in record_input_match.group(1).split(","):
                 part = part.strip()
-                name = part.split(":")[0].strip()
+                pieces = part.split(":")
+                name = pieces[0].strip()
                 result["input_record_fields"].append(name)
+                if len(pieces) > 1 and pieces[1].strip().endswith("?"):
+                    result["code_optional_input_fields"].add(name)
         # Match non-record inputs: name: Type (one per line, not inside parens)
         for line in input_text.split("\n"):
             stripped = line.strip()
@@ -837,10 +893,13 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
                 continue
             if stripped.startswith("("):
                 continue
-            # Match "name: Type" or "name   : Type"
-            param_match = re.match(r"(\w+)\s*:\s*\w+", stripped)
+            # Match "name: Type" or "name: Type?" (optional)
+            param_match = re.match(r"(\w+)\s*:\s*(\w+\??)", stripped)
             if param_match:
-                result["input_params"].append(param_match.group(1))
+                param_name = param_match.group(1)
+                result["input_params"].append(param_name)
+                if param_match.group(2).endswith("?"):
+                    result["code_optional_input_params"].add(param_name)
     # Check for versions.yml in script block
     result["has_versions_yml"] = "versions.yml" in full_text
@@ -860,10 +919,10 @@ def parse_main_nf_structure(main_nf: Path) -> dict:
         re.search(r"\btuple\b", full_text, re.IGNORECASE)
     )
-    # Check for prefix = task.ext.prefix ?: "${meta.name}" (M017)
+    # Check for prefix = task.ext.prefix ?: "${_meta.name}" (M017)
     result["has_prefix_definition"] = bool(
         re.search(
-            r'prefix\s*=\s*task\.ext\.prefix\s*\?:\s*"\$\{meta\.name\}"', full_text
+            r'prefix\s*=\s*task\.ext\.prefix\s*\?:\s*"\$\{_meta\.name\}"', full_text
         )
     )
@@ -1431,3 +1490,73 @@ def parse_workflow_config(config_path: Path) -> dict:
         result["ext_raw"] = ext_str_match.group(1)
         result["ext"] = None  # String format is invalid -- rule will flag this
     return result
+def parse_includes(main_nf: Path, bactopia_path: Path) -> dict:
+    """Parse include statements from a main.nf file.
+    Resolves source paths against the file's directory and the repo root
+    to derive normalized component keys (lowercase, underscore-separated).
+    Args:
+        main_nf: Path to a main.nf file.
+        bactopia_path: Root path of the Bactopia repo.
+    Returns:
+        A dict with:
+            modules: list of module keys (e.g., "abricate_run")
+            subworkflows: list of subworkflow keys (e.g., "bactopia_gather")
+            plugins: list of plugin function names
+    """
+    result: dict[str, list[str]] = {"modules": [], "subworkflows": [], "plugins": []}
+    if not main_nf.exists():
+        return result
+    try:
+        text = main_nf.read_text()
+    except OSError:
+        return result
+    seen_modules: set[str] = set()
+    seen_subworkflows: set[str] = set()
+    for m in re.finditer(
+        r"include\s*\{\s*(\w+)(?:\s+as\s+\w+)?\s*\}\s*from\s*['\"]([^'\"]+)['\"]",
+        text,
+    ):
+        source = m.group(2)
+        if "plugin/" in source:
+            result["plugins"].append(m.group(1))
+            continue
+        # Resolve the source path relative to the file's directory
+        # Nextflow source paths omit .nf extension; parent of resolved path
+        # is the component directory
+        resolved = (main_nf.parent / source).resolve()
+        try:
+            rel_str = str(resolved.relative_to(bactopia_path))
+        except ValueError:
+            continue
+        if rel_str.startswith("modules/"):
+            # e.g., "modules/abricate/run/main" -> "abricate_run"
+            component = rel_str.removeprefix("modules/")
+            if component.endswith("/main"):
+                component = component[:-5]
+            key = component.replace("/", "_")
+            if key not in seen_modules:
+                seen_modules.add(key)
+                result["modules"].append(key)
+        elif rel_str.startswith("subworkflows/"):
+            # e.g., "subworkflows/bactopia/gather/main" -> "bactopia_gather"
+            component = rel_str.removeprefix("subworkflows/")
+            if component.endswith("/main"):
+                component = component[:-5]
+            key = component.replace("/", "_")
+            if key not in seen_subworkflows:
+                seen_subworkflows.add(key)
+                result["subworkflows"].append(key)
+    return result

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/outputs.py RENAMED Viewed

@@ -74,10 +74,13 @@ def parse_declared_outputs(meta_dir: Path) -> set[str]:
         for _channel, records in data.items():
             for rec in records:
-                for field_name, field_val in rec.items():
-                    if field_name == "meta":
-                        continue
-                    _collect_paths(field_val, declared, declared_dirs)
+                if isinstance(rec, dict):
+                    for field_name, field_val in rec.items():
+                        if field_name == "meta":
+                            continue
+                        _collect_paths(field_val, declared, declared_dirs)
+                else:
+                    _collect_paths(rec, declared, declared_dirs)
     # Expand directory entries: all files under a declared directory are declared
     for dir_path in declared_dirs:
@@ -262,6 +265,10 @@ def scan_test_outputs(test_dir: Path) -> dict:
         if not meta_dir.exists() or not work_dir.exists():
             continue
+        # Skip if no output_*.json files (e.g., workflow-level tests don't produce them)
+        if not list(meta_dir.glob("output_*.json")):
+            continue
         # Map work dir hashes to process names
         trace_map = _parse_trace_csv(meta_dir)

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/templates/nextflow/nextflow.config.j2 RENAMED Viewed

@@ -69,31 +69,31 @@ includeConfig "{{ profiles }}"
 timeline {
     enabled = true
     overwrite = true
-    file = "${params.infodir}/${params.workflow.name}-timeline.html"
+    file = "${params.infodir}/${params.wf}-timeline.html"
 }
 report {
     enabled = true
     overwrite = true
-    file = "${params.infodir}/${params.workflow.name}-report.html"
+    file = "${params.infodir}/${params.wf}-report.html"
 }
 trace {
     enabled = true
     overwrite = true
-    file = "${params.infodir}/${params.workflow.name}-trace.txt"
+    file = "${params.infodir}/${params.wf}-trace.txt"
     fields = 'task_id,hash,native_id,process,tag,name,status,exit,module,container,cpus,time,disk,memory,attempt,start,complete,duration,realtime,queue,%cpu,%mem,rss,vmem'
 }
 dag {
     enabled = true
     overwrite = true
-    file = "${params.infodir}/${params.workflow.name}-dag.svg"
+    file = "${params.infodir}/${params.wf}-dag.svg"
 }
 // Plugins
 plugins {
-    id 'nf-bactopia@1.1.0'
+    id 'nf-bactopia@2.0.0'
 }
 bactopia {

{bactopia-2.0.1 → bactopia-2.0.2}/bactopia/utils.py RENAMED Viewed

@@ -48,11 +48,13 @@ def execute(
         else:
             return command.returncode
     except subprocess.CalledProcessError as e:
-        logging.error(f'"{cmd}" return exit code {e.returncode}')
-        logging.error(e)
         if allow_fail:
+            logging.debug(f'"{cmd}" return exit code {e.returncode}')
+            logging.debug(e)
             return None
         else:
+            logging.error(f'"{cmd}" return exit code {e.returncode}')
+            logging.error(e)
             sys.exit(e.returncode)

{bactopia-2.0.1 → bactopia-2.0.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "bactopia"
-version = "2.0.1"
+version = "2.0.2"
 description = "A Python package for working with Bactopia"
 authors = [
     "Robert A. Petit III <robbie.petit@gmail.com>",