PyPI - proofctl - Versions diffs - 0.1.0__py3-none-any.whl - Mend

proofctl 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

proofctl/__init__.py +0 -0
proofctl/baseline.py +63 -0
proofctl/checkers/__init__.py +0 -0
proofctl/checkers/base.py +61 -0
proofctl/checkers/dockerfile.py +452 -0
proofctl/checkers/hcl_utils.py +180 -0
proofctl/checkers/imports.py +486 -0
proofctl/checkers/leakage.py +228 -0
proofctl/checkers/llm_integration.py +370 -0
proofctl/checkers/placeholders.py +261 -0
proofctl/checkers/quality.py +539 -0
proofctl/checkers/security.py +831 -0
proofctl/checkers/terraform.py +1979 -0
proofctl/checkers/terragrunt.py +206 -0
proofctl/checkers/variants.py +166 -0
proofctl/checkers/yaml_checker.py +1220 -0
proofctl/cli.py +246 -0
proofctl/config.py +92 -0
proofctl/engine.py +403 -0
proofctl/fixer.py +138 -0
proofctl/models.py +44 -0
proofctl/reporters/__init__.py +0 -0
proofctl/reporters/html_reporter.py +191 -0
proofctl/reporters/json_reporter.py +22 -0
proofctl/reporters/terminal.py +110 -0
proofctl-0.1.0.dist-info/METADATA +563 -0
proofctl-0.1.0.dist-info/RECORD +31 -0
proofctl-0.1.0.dist-info/WHEEL +5 -0
proofctl-0.1.0.dist-info/entry_points.txt +2 -0
proofctl-0.1.0.dist-info/licenses/LICENSE +21 -0
proofctl-0.1.0.dist-info/top_level.txt +1 -0

proofctl/__init__.py ADDED Viewed

File without changes

proofctl/baseline.py ADDED Viewed

@@ -0,0 +1,63 @@
+from __future__ import annotations
+import json
+from collections import Counter
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING
+if TYPE_CHECKING:
+    from .models import Finding
+_BASELINE_FILE = ".proofctl-baseline.json"
+def _fingerprint(f: Finding) -> tuple[str, str, str]:
+    # Stable across line-number shifts: same bug at a new line still matches.
+    return (f.rule_id, f.file, f.message)
+def save_baseline(findings: list[Finding], root: Path) -> Path:
+    baseline_path = root / _BASELINE_FILE
+    data = {
+        "created": datetime.now(timezone.utc).isoformat(),
+        "proofctl_version": "0.1.0",
+        "count": len(findings),
+        "findings": [
+            {"rule_id": f.rule_id, "file": f.file, "message": f.message}
+            for f in findings
+        ],
+    }
+    baseline_path.write_text(json.dumps(data, indent=2))
+    return baseline_path
+def load_baseline(root: Path) -> list[dict] | None:
+    baseline_path = root / _BASELINE_FILE
+    if not baseline_path.exists():
+        return None
+    try:
+        data = json.loads(baseline_path.read_text())
+        return data.get("findings", [])
+    except (json.JSONDecodeError, KeyError):
+        return None
+def filter_new_findings(findings: list[Finding], baseline: list[dict]) -> list[Finding]:
+    """Return only findings not accounted for in the baseline.
+    Uses a multiset so N identical baseline entries suppress exactly N occurrences
+    in the new run — handles repeated findings correctly.
+    """
+    baseline_counts: Counter[tuple] = Counter(
+        (b["rule_id"], b["file"], b["message"]) for b in baseline
+    )
+    result = []
+    seen: Counter[tuple] = Counter()
+    for f in findings:
+        key = _fingerprint(f)
+        if seen[key] < baseline_counts[key]:
+            seen[key] += 1
+        else:
+            result.append(f)
+    return result

proofctl/checkers/__init__.py ADDED Viewed

File without changes

proofctl/checkers/base.py ADDED Viewed

@@ -0,0 +1,61 @@
+from __future__ import annotations
+import ast
+from abc import ABC, abstractmethod
+from pathlib import Path
+from ..models import Finding
+class FileChecker(ABC):
+    """Runs once per Python file. Receives path, raw source, and parsed AST (None on SyntaxError)."""
+    @abstractmethod
+    def check(self, path: Path, source: str, tree: ast.Module | None) -> list[Finding]:
+        ...
+class DirectoryChecker(ABC):
+    """Runs once per scan root. Used for cross-file analysis."""
+    @abstractmethod
+    def check(self, root: Path, py_files: list[Path]) -> list[Finding]:
+        ...
+class HclFileChecker(ABC):
+    """Runs once per HCL file (.tf, .tfvars, .hcl).
+    Subclasses declare which extensions they handle via the `extensions` class
+    variable.  An empty tuple means "run on all HCL files".
+    """
+    extensions: tuple[str, ...] = ()
+    @abstractmethod
+    def check(self, path: Path, source: str) -> list[Finding]:
+        ...
+class HclDirChecker(ABC):
+    """Runs once per scan root for cross-file HCL analysis."""
+    @abstractmethod
+    def check(self, root: Path, hcl_files: list[Path]) -> list[Finding]:
+        ...
+class DockerfileChecker(ABC):
+    """Runs on Dockerfile* files (pure text, no AST)."""
+    @abstractmethod
+    def check(self, path: Path, source: str) -> list[Finding]:
+        ...
+class YamlFileChecker(ABC):
+    """Runs on .yml / .yaml files (parsed via PyYAML)."""
+    @abstractmethod
+    def check(self, path: Path, source: str) -> list[Finding]:
+        ...

proofctl/checkers/dockerfile.py ADDED Viewed

@@ -0,0 +1,452 @@
+"""Dockerfile static analysis — PROOFCTL-DF-* rules."""
+from __future__ import annotations
+import re
+from pathlib import Path
+from ..models import Finding, Severity
+from .base import DockerfileChecker
+# ── helpers ───────────────────────────────────────────────────────────────────
+_SHA_DIGEST_RE = re.compile(r"@sha256:[a-f0-9]{64}")
+_HAS_TAG_RE = re.compile(r":[a-zA-Z0-9][\w.\-]*$")
+# DF-007: curl/wget piped to a shell
+_CURL_PIPE_RE = re.compile(r"\b(curl|wget)\b.+\|\s*(bash|sh|zsh|python[23]?|perl|ruby)")
+# DF-009: dependency install patterns in RUN args
+_DEP_INSTALL_RE = re.compile(
+    r"\b(pip3?\s+install|npm\s+install|yarn\s+install|poetry\s+install|pipenv\s+install)\b"
+)
+# DF-009: broad COPY source tokens
+_BROAD_COPY_SOURCES = {".", "./"}
+# DF-010: OCI label prefix
+_OCI_LABEL_PREFIX = "org.opencontainers.image."
+_SECRET_VAR_RE = re.compile(
+    r"^(password|passwd|secret[_-]?key|secret|api[_-]?key|auth[_-]?token|"
+    r"access[_-]?key|private[_-]?key|credentials?|client[_-]?secret|"
+    r"db[_-]?pass(?:word)?|database[_-]?pass(?:word)?|jwt[_-]?secret|"
+    r"encryption[_-]?key|bearer[_-]?token|session[_-]?secret)$",
+    re.IGNORECASE,
+)
+# Archive extensions that make ADD legitimate (auto-extract)
+_ARCHIVE_RE = re.compile(r"\.(tar(\.(gz|bz2|xz|lz4|zst))?|tgz|tbz2)$", re.IGNORECASE)
+# URLs that make ADD legitimate (remote fetch)
+_URL_RE = re.compile(r"^https?://")
+def _parse_dockerfile(source: str) -> list[tuple[int, str, str]]:
+    """Return [(1-based lineno, INSTRUCTION, arguments), ...]."""
+    instructions: list[tuple[int, str, str]] = []
+    lines = source.splitlines()
+    i = 0
+    while i < len(lines):
+        raw = lines[i]
+        stripped = raw.strip()
+        if not stripped or stripped.startswith("#"):
+            i += 1
+            continue
+        lineno = i + 1
+        # Collect continuation lines
+        joined = stripped
+        while joined.endswith("\\") and i + 1 < len(lines):
+            joined = joined[:-1].rstrip()
+            i += 1
+            cont = lines[i].strip()
+            if cont.startswith("#"):
+                i += 1
+                continue
+            joined += " " + cont
+        parts = joined.split(None, 1)
+        if parts:
+            instr = parts[0].upper()
+            args = parts[1] if len(parts) > 1 else ""
+            instructions.append((lineno, instr, args))
+        i += 1
+    return instructions
+def _from_image_ref(args: str) -> tuple[str, str | None]:
+    """Parse 'FROM [--platform=...] <image> [AS <name>]' → (image_ref, stage_name)."""
+    tokens = args.split()
+    # Strip --platform= flag
+    tokens = [t for t in tokens if not t.startswith("--")]
+    if not tokens:
+        return ("", None)
+    image = tokens[0]
+    stage = tokens[2] if len(tokens) >= 3 and tokens[1].upper() == "AS" else None
+    return image, stage
+# ── rule implementations ──────────────────────────────────────────────────────
+def _df001(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-001 — Unpinned base image."""
+    stage_names: set[str] = set()
+    findings: list[Finding] = []
+    for lineno, instr, args in instructions:
+        if instr != "FROM":
+            continue
+        image, stage = _from_image_ref(args)
+        if stage:
+            stage_names.add(stage.lower())
+    for lineno, instr, args in instructions:
+        if instr != "FROM":
+            continue
+        image, _ = _from_image_ref(args)
+        if not image:
+            continue
+        # Special base images / stage aliases
+        bare_name = re.split(r"[@:]", image)[0].lower()
+        if bare_name in ("scratch", "busybox") or bare_name in stage_names:
+            continue
+        if _SHA_DIGEST_RE.search(image):
+            continue  # digest-pinned — clean
+        # Strip digest to check tag
+        image_no_digest = image.split("@")[0]
+        if ":" not in image_no_digest:
+            sev = Severity.ERROR
+            msg = f"Base image '{image}' has no tag — resolves to :latest implicitly"
+            hint = "Pin to a specific version and digest: FROM python:3.12@sha256:<digest>"
+        elif image_no_digest.endswith(":latest"):
+            sev = Severity.ERROR
+            msg = f"Base image '{image}' uses the mutable ':latest' tag"
+            hint = "Replace :latest with a pinned version and digest."
+        else:
+            sev = Severity.WARNING
+            msg = f"Base image '{image}' is pinned to a tag but not a digest (still mutable)"
+            hint = "Add digest pinning: FROM python:3.12@sha256:<digest>"
+        findings.append(Finding(
+            file=str(path),
+            line=lineno,
+            col=0,
+            rule_id="PROOFCTL-DF-001",
+            rule_name="Unpinned base image",
+            severity=sev,
+            message=msg,
+            hint=hint,
+            authority="SLSA Supply Chain – Base image provenance",
+        ))
+    return findings
+def _df002(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-002 — Running as root (no USER or USER root/0)."""
+    user_instructions = [(ln, args) for ln, instr, args in instructions if instr == "USER"]
+    if not user_instructions:
+        return [Finding(
+            file=str(path),
+            line=None,
+            col=None,
+            rule_id="PROOFCTL-DF-002",
+            rule_name="Running as root",
+            severity=Severity.WARNING,
+            message="No USER instruction — container runs as root by default",
+            hint="Add 'USER nonroot' (or a named user) before the final CMD/ENTRYPOINT.",
+            authority="CIS Docker Benchmark 4.1 – Do not run containers as root",
+        )]
+    findings = []
+    for lineno, args in user_instructions:
+        user = args.strip().split()[0] if args.strip() else ""
+        if user in ("root", "0"):
+            findings.append(Finding(
+                file=str(path),
+                line=lineno,
+                col=0,
+                rule_id="PROOFCTL-DF-002",
+                rule_name="Running as root",
+                severity=Severity.WARNING,
+                message=f"USER instruction sets user to '{user}' (root)",
+                hint="Use a non-root user: USER appuser",
+                authority="CIS Docker Benchmark 4.1 – Do not run containers as root",
+            ))
+    return findings
+def _df003(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-003 — ADD used for local files (use COPY instead)."""
+    findings = []
+    for lineno, instr, args in instructions:
+        if instr != "ADD":
+            continue
+        # ADD can take multiple sources; first token is the src
+        tokens = args.split()
+        if not tokens:
+            continue
+        src = tokens[0]
+        if _URL_RE.match(src) or _ARCHIVE_RE.search(src):
+            continue  # legitimate ADD use
+        findings.append(Finding(
+            file=str(path),
+            line=lineno,
+            col=0,
+            rule_id="PROOFCTL-DF-003",
+            rule_name="ADD used for local files",
+            severity=Severity.WARNING,
+            message=f"ADD '{src}' copies a local path — use COPY for predictable behaviour",
+            hint="Replace ADD with COPY unless you need URL fetch or archive auto-extraction.",
+            authority="Docker Best Practices – Prefer COPY over ADD",
+        ))
+    return findings
+def _df004(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-004 — apt-get install without --no-install-recommends."""
+    findings = []
+    for lineno, instr, args in instructions:
+        if instr != "RUN":
+            continue
+        if "apt-get install" not in args and "apt install" not in args:
+            continue
+        if "--no-install-recommends" in args:
+            continue
+        findings.append(Finding(
+            file=str(path),
+            line=lineno,
+            col=0,
+            rule_id="PROOFCTL-DF-004",
+            rule_name="apt-get install without --no-install-recommends",
+            severity=Severity.INFO,
+            message="apt-get install missing --no-install-recommends — installs unnecessary packages",
+            hint="Add --no-install-recommends to keep image layers lean.",
+            authority="Docker Best Practices – Minimise image layers",
+        ))
+    return findings
+def _df005(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-005 — Secret variable name in ENV or ARG."""
+    findings = []
+    for lineno, instr, args in instructions:
+        if instr not in ("ENV", "ARG"):
+            continue
+        # ENV KEY=VAL or ENV KEY VAL (legacy); ARG KEY or ARG KEY=DEFAULT
+        # Extract the variable name: first token up to '=' or whitespace
+        key = re.split(r"[=\s]", args.strip())[0] if args.strip() else ""
+        if not key:
+            continue
+        if _SECRET_VAR_RE.match(key):
+            sev = Severity.ERROR if instr == "ENV" else Severity.WARNING
+            findings.append(Finding(
+                file=str(path),
+                line=lineno,
+                col=0,
+                rule_id="PROOFCTL-DF-005",
+                rule_name="Secret in Dockerfile instruction",
+                severity=sev,
+                message=f"{instr} exposes a secret-named variable '{key}' — visible in image metadata",
+                hint=(
+                    "Use Docker BuildKit secrets (--secret) or pass via runtime environment, "
+                    "never bake into the image layer."
+                ),
+                authority="OWASP Docker Security – Never store secrets in images",
+            ))
+    return findings
+def _df006(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-006 — Missing HEALTHCHECK instruction."""
+    # Only flag if the image has an ENTRYPOINT or CMD (i.e., it's a runnable image)
+    has_cmd = any(instr in ("CMD", "ENTRYPOINT") for _, instr, _ in instructions)
+    has_healthcheck = any(instr == "HEALTHCHECK" for _, instr, _ in instructions)
+    from_scratch = any(
+        _from_image_ref(args)[0].lower() == "scratch"
+        for _, instr, args in instructions if instr == "FROM"
+    )
+    if has_cmd and not has_healthcheck and not from_scratch:
+        return [Finding(
+            file=str(path),
+            line=None,
+            col=None,
+            rule_id="PROOFCTL-DF-006",
+            rule_name="Missing HEALTHCHECK",
+            severity=Severity.INFO,
+            message="Dockerfile has no HEALTHCHECK — orchestrators cannot detect unhealthy containers",
+            hint="Add HEALTHCHECK --interval=30s --timeout=3s CMD curl -f http://localhost/health || exit 1",
+            authority="Docker Best Practices – Container health checks",
+        )]
+    return []
+def _df007(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-007 — curl|sh / wget|sh (remote script execution in RUN)."""
+    findings: list[Finding] = []
+    for lineno, instr, args in instructions:
+        if instr != "RUN":
+            continue
+        if _CURL_PIPE_RE.search(args):
+            findings.append(Finding(
+                file=str(path),
+                line=lineno,
+                col=0,
+                rule_id="PROOFCTL-DF-007",
+                rule_name="Remote script execution via pipe",
+                severity=Severity.ERROR,
+                message="RUN instruction pipes a remote download directly to a shell — arbitrary code execution risk",
+                hint=(
+                    "Download to a file, verify its checksum (sha256sum), then execute: "
+                    "RUN curl -fsSL https://... -o /tmp/script && sha256sum -c /tmp/script.sha256 && sh /tmp/script"
+                ),
+                authority="OWASP CICD-SEC-3 – Dependency Chain Abuse / CIS Docker Benchmark 4.6",
+            ))
+    return findings
+def _df008(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-008 — ADD <url> without checksum verification."""
+    findings: list[Finding] = []
+    for idx, (lineno, instr, args) in enumerate(instructions):
+        if instr != "ADD":
+            continue
+        tokens = args.split()
+        if not tokens:
+            continue
+        url = tokens[0]
+        if not _URL_RE.match(url):
+            continue
+        # BuildKit inline checksum syntax: --checksum=sha256:...
+        if "--checksum=sha256:" in args:
+            continue
+        # Check the next 3 instructions for a verification RUN
+        window = instructions[idx + 1: idx + 4]
+        verified = any(
+            next_instr == "RUN" and (
+                "sha256sum" in next_args
+                or "shasum -a 256" in next_args
+                or "gpg --verify" in next_args
+            )
+            for _, next_instr, next_args in window
+        )
+        if verified:
+            continue
+        findings.append(Finding(
+            file=str(path),
+            line=lineno,
+            col=0,
+            rule_id="PROOFCTL-DF-008",
+            rule_name="ADD URL without checksum verification",
+            severity=Severity.ERROR,
+            message=f"ADD fetches '{url}' without checksum verification — supply chain integrity risk",
+            hint="Use ADD with --checksum=sha256:<hash>, or RUN curl ... && sha256sum -c to verify before using.",
+            authority="OWASP CICD-SEC-6 – Insufficient Artifact Integrity Validation",
+        ))
+    return findings
+def _df009(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-009 — COPY . . before dependency installation."""
+    findings: list[Finding] = []
+    # Broad COPY destinations that indicate a full source-tree copy
+    _broad_dests = {"/app", "/src", "/code", "/workspace", "."}
+    # Find the first broad COPY (source is bare ".")
+    first_broad_copy: tuple[int, str, str] | None = None
+    for item in instructions:
+        lineno, instr, args = item
+        if instr != "COPY":
+            continue
+        tokens = args.split()
+        if not tokens:
+            continue
+        src = tokens[0]
+        if src in _BROAD_COPY_SOURCES:
+            first_broad_copy = item
+            break
+    if first_broad_copy is None:
+        return findings
+    copy_lineno = first_broad_copy[0]
+    # Find the first dependency-install RUN
+    for lineno, instr, args in instructions:
+        if instr != "RUN":
+            continue
+        if _DEP_INSTALL_RE.search(args):
+            if copy_lineno < lineno:
+                findings.append(Finding(
+                    file=str(path),
+                    line=copy_lineno,
+                    col=0,
+                    rule_id="PROOFCTL-DF-009",
+                    rule_name="COPY . before dependency installation",
+                    severity=Severity.INFO,
+                    message="COPY . precedes dependency installation — every source change invalidates the dependency layer cache",
+                    hint=(
+                        "Copy only dependency files first (requirements.txt / package.json), "
+                        "run the install, then COPY . to maximise layer cache hits."
+                    ),
+                    authority="Docker Best Practices – Leverage build cache",
+                ))
+            break
+    return findings
+def _df010(instructions: list[tuple[int, str, str]], path: Path) -> list[Finding]:
+    """PROOFCTL-DF-010 — Missing OCI image labels."""
+    has_runnable = any(instr in ("CMD", "ENTRYPOINT") for _, instr, _ in instructions)
+    if not has_runnable:
+        return []
+    has_oci_label = any(
+        instr == "LABEL" and _OCI_LABEL_PREFIX in args
+        for _, instr, args in instructions
+    )
+    if has_oci_label:
+        return []
+    return [Finding(
+        file=str(path),
+        line=None,
+        col=None,
+        rule_id="PROOFCTL-DF-010",
+        rule_name="Missing OCI image labels",
+        severity=Severity.INFO,
+        message="Dockerfile has no OCI image labels — SBOM generation and provenance tracking are impaired",
+        hint=(
+            "Add LABEL org.opencontainers.image.source=https://github.com/org/repo "
+            "org.opencontainers.image.version=1.0.0"
+        ),
+        authority="OCI Image Spec – Annotations / OWASP CICD-SEC-9",
+    )]
+# ── main checker ──────────────────────────────────────────────────────────────
+class _DockerfileCheckerImpl(DockerfileChecker):
+    def check(self, path: Path, source: str) -> list[Finding]:
+        instructions = _parse_dockerfile(source)
+        findings: list[Finding] = []
+        findings.extend(_df001(instructions, path))
+        findings.extend(_df002(instructions, path))
+        findings.extend(_df003(instructions, path))
+        findings.extend(_df004(instructions, path))
+        findings.extend(_df005(instructions, path))
+        findings.extend(_df006(instructions, path))
+        findings.extend(_df007(instructions, path))
+        findings.extend(_df008(instructions, path))
+        findings.extend(_df009(instructions, path))
+        findings.extend(_df010(instructions, path))
+        return findings
+# Singleton used by the engine
+DockerfileRulesChecker = _DockerfileCheckerImpl