npm - @intentsolutionsio/penetration-tester - Versions diffs - 2.0.0 → 3.0.4 - Mend

@intentsolutionsio/penetration-tester 2.0.0 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/skills/defining-pentest-scope/scripts/define_scope.py ADDED Viewed

@@ -0,0 +1,472 @@
+#!/usr/bin/env python3
+"""defining-pentest-scope — parse + validate + normalize a scope definition.
+Reads a ROE YAML, parses every in-scope and out-of-scope target into a
+structured form (host / cidr / url / cloud-account / saas-tenant), validates
+syntax, detects overlap between in-scope and out-of-scope ranges, flags
+reserved or known third-party SaaS ranges, and emits Findings via
+lib/finding.py. Optionally writes a flat IP allowlist file ready for nmap /
+WAF / Burp consumption.
+Usage:
+    python3 define_scope.py [--roe FILE] [--extension FILE]
+                             [--emit-allowlist FILE] [--emit-targets FILE]
+                             [--output FILE] [--format json|jsonl|markdown]
+                             [--min-severity sev]
+"""
+from __future__ import annotations
+import argparse
+import ipaddress
+import json
+import re
+import sys
+from collections import Counter
+from pathlib import Path
+from typing import Any
+from urllib.parse import urlparse
+# --- lib/ import -------------------------------------------------------------
+_LIB_ROOT = Path(__file__).resolve().parents[3]
+sys.path.insert(0, str(_LIB_ROOT))
+from lib.finding import Finding, Severity  # noqa: E402
+from lib import report  # noqa: E402
+try:
+    import yaml  # type: ignore[import-not-found]
+    _HAS_PYYAML = True
+except ImportError:
+    yaml = None
+    _HAS_PYYAML = False
+SKILL_ID = "defining-pentest-scope"
+CATEGORY = "engagement-scope"
+# Known third-party SaaS / cloud edge ranges (illustrative, NOT exhaustive).
+# These are not authoritative; real engagements should consult published
+# IP ranges (e.g. AWS ip-ranges.json, Cloudflare cidr lists).
+KNOWN_SAAS_RANGES = {
+    "AWS": [
+        "3.0.0.0/9",
+        "13.32.0.0/15",
+        "52.0.0.0/8",
+    ],
+    "Cloudflare": [
+        "104.16.0.0/13",
+        "172.64.0.0/13",
+        "131.0.72.0/22",
+    ],
+    "GitHub": [
+        "140.82.112.0/20",
+        "143.55.64.0/20",
+    ],
+    "GCP": [
+        "34.0.0.0/8",
+        "35.184.0.0/13",
+    ],
+    "Azure": [
+        "13.64.0.0/11",
+        "20.0.0.0/8",
+    ],
+}
+RESERVED_RANGES = [
+    "10.0.0.0/8",
+    "172.16.0.0/12",
+    "192.168.0.0/16",
+    "169.254.0.0/16",
+    "127.0.0.0/8",
+    "224.0.0.0/4",
+    "::1/128",
+    "fe80::/10",
+    "fc00::/7",
+]
+# --- YAML loading ------------------------------------------------------------
+def _load_yaml(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        return {}
+    text = path.read_text(encoding="utf-8")
+    if _HAS_PYYAML:
+        return yaml.safe_load(text) or {}
+    # Minimal fallback — relies on the simple structure of ROE files
+    return _minimal_yaml_load(text)
+def _minimal_yaml_load(text: str) -> dict[str, Any]:
+    root: dict[str, Any] = {}
+    stack: list[tuple[int, Any]] = [(-1, root)]
+    for raw in text.splitlines():
+        if not raw.strip() or raw.lstrip().startswith("#"):
+            continue
+        indent = len(raw) - len(raw.lstrip())
+        line = raw.strip()
+        while stack and stack[-1][0] >= indent:
+            stack.pop()
+        container = stack[-1][1]
+        if line.startswith("- "):
+            item = line[2:].strip()
+            if isinstance(container, dict):
+                # Demote last key to a list
+                for k in list(container.keys())[::-1]:
+                    if container[k] in (None, "", {}):
+                        container[k] = []
+                        container = container[k]
+                        stack.append((indent, container))
+                        break
+            if isinstance(container, list):
+                if ":" in item:
+                    k, _, v = item.partition(":")
+                    container.append({k.strip(): v.strip().strip('"').strip("'")})
+                    stack.append((indent + 2, container[-1]))
+                else:
+                    container.append(item.strip('"').strip("'"))
+        elif ":" in line:
+            key, _, value = line.partition(":")
+            key = key.strip()
+            value = value.strip().strip('"').strip("'")
+            if isinstance(container, dict):
+                if value == "":
+                    container[key] = {}
+                    stack.append((indent, container[key]))
+                else:
+                    container[key] = value
+    return root
+# --- Target classification --------------------------------------------------
+def classify_target(entry: Any) -> tuple[str, str, str | None]:
+    """Return (raw, type, normalized) for a scope entry.
+    type ∈ {"hostname","wildcard","ipv4","ipv6","cidrv4","cidrv6","url","cloud","saas","malformed"}
+    """
+    if isinstance(entry, dict):
+        if "host" in entry:
+            return classify_target(entry["host"])
+        if "cidr" in entry:
+            return classify_target(entry["cidr"])
+        if "url" in entry:
+            return classify_target(entry["url"])
+        if "cloud_account" in entry:
+            return entry["cloud_account"], "cloud", entry["cloud_account"]
+        if "saas_tenant" in entry:
+            return entry["saas_tenant"], "saas", entry["saas_tenant"]
+        return str(entry), "malformed", None
+    s = str(entry).strip()
+    if not s:
+        return s, "malformed", None
+    # URL
+    if s.startswith("http://") or s.startswith("https://"):
+        try:
+            parsed = urlparse(s)
+            if parsed.netloc:
+                return s, "url", s
+        except ValueError:
+            return s, "malformed", None
+    # Cloud account / SaaS tenant prefixes
+    if re.match(r"^(aws|gcp|azure):", s, flags=re.I):
+        return s, "cloud", s
+    if re.match(r"^(okta|auth0|saml|google-workspace|microsoft365):", s, flags=re.I):
+        return s, "saas", s
+    # CIDR / IP
+    if "/" in s:
+        try:
+            net = ipaddress.ip_network(s, strict=False)
+            return s, "cidrv6" if net.version == 6 else "cidrv4", str(net)
+        except ValueError:
+            return s, "malformed", None
+    try:
+        addr = ipaddress.ip_address(s)
+        return s, "ipv6" if addr.version == 6 else "ipv4", str(addr)
+    except ValueError:
+        pass
+    # Wildcard
+    if s.startswith("*."):
+        if re.match(r"^\*\.[A-Za-z0-9.-]+$", s):
+            return s, "wildcard", s.lower()
+        return s, "malformed", None
+    # Hostname
+    if re.match(r"^[A-Za-z0-9]([A-Za-z0-9.-]*[A-Za-z0-9])?$", s):
+        return s, "hostname", s.lower()
+    return s, "malformed", None
+# --- Overlap / SaaS / Reserved detection ------------------------------------
+def _to_network(entry: str) -> ipaddress.IPv4Network | ipaddress.IPv6Network | None:
+    try:
+        return ipaddress.ip_network(entry, strict=False)
+    except ValueError:
+        try:
+            return ipaddress.ip_network(f"{entry}/32" if ":" not in entry else f"{entry}/128", strict=False)
+        except ValueError:
+            return None
+def cidr_overlap(a: str, b: str) -> bool:
+    na = _to_network(a)
+    nb = _to_network(b)
+    if na is None or nb is None or na.version != nb.version:
+        return False
+    return na.overlaps(nb)
+def detect_saas(entry: str) -> str | None:
+    net = _to_network(entry)
+    if net is None:
+        return None
+    for vendor, ranges in KNOWN_SAAS_RANGES.items():
+        for r in ranges:
+            try:
+                saas_net = ipaddress.ip_network(r, strict=False)
+                if net.version == saas_net.version and net.overlaps(saas_net):
+                    return vendor
+            except ValueError:
+                continue
+    return None
+def detect_reserved(entry: str) -> str | None:
+    net = _to_network(entry)
+    if net is None:
+        return None
+    for r in RESERVED_RANGES:
+        try:
+            reserved_net = ipaddress.ip_network(r)
+            if net.version == reserved_net.version and net.overlaps(reserved_net):
+                return r
+        except ValueError:
+            continue
+    return None
+# --- Finding generation -----------------------------------------------------
+def _f(
+    severity: Severity,
+    title: str,
+    target: str,
+    detail: str,
+    remediation: str,
+    evidence: tuple[tuple[str, Any], ...] = (),
+) -> Finding:
+    return Finding(
+        skill_id=SKILL_ID,
+        title=title,
+        severity=severity,
+        target=target,
+        detail=detail,
+        remediation=remediation,
+        evidence=evidence,
+    )
+def evaluate_scope(
+    in_scope: list[Any], out_of_scope: list[Any], target_label: str
+) -> tuple[list[Finding], list[dict[str, Any]], list[str]]:
+    findings: list[Finding] = []
+    normalized: list[dict[str, Any]] = []
+    allowlist: list[str] = []
+    seen = Counter()
+    classified_in: list[tuple[Any, str, str, str | None]] = []
+    for entry in in_scope:
+        raw, t, norm = classify_target(entry)
+        seen[norm or raw] += 1
+        classified_in.append((entry, raw, t, norm))
+    classified_out: list[tuple[Any, str, str, str | None]] = []
+    for entry in out_of_scope:
+        raw, t, norm = classify_target(entry)
+        classified_out.append((entry, raw, t, norm))
+    # Per-entry validation
+    for entry, raw, t, norm in classified_in:
+        if t == "malformed":
+            findings.append(
+                _f(
+                    Severity.HIGH,
+                    f"malformed in-scope target: {raw}",
+                    raw,
+                    f"Entry `{raw}` does not parse as host / CIDR / URL / cloud account / SaaS tenant.",
+                    "Fix the entry's syntax in the ROE and re-run this skill.",
+                    evidence=(("raw", raw),),
+                )
+            )
+            continue
+        normalized.append({"raw": raw, "type": t, "normalized": norm})
+        if t in ("ipv4", "ipv6", "cidrv4", "cidrv6"):
+            allowlist.append(norm or raw)
+            saas = detect_saas(norm or raw)
+            if saas is not None:
+                findings.append(
+                    _f(
+                        Severity.HIGH,
+                        f"{raw} appears to be in {saas} infrastructure",
+                        raw,
+                        f"`{raw}` overlaps a known {saas} range. Testing third-party "
+                        f"SaaS infrastructure requires SEPARATE authorization from "
+                        f"the SaaS vendor, not just the customer.",
+                        f"Either confirm {saas} has authorized testing OR remove this entry from scope.",
+                        evidence=(("vendor", saas), ("entry", raw)),
+                    )
+                )
+            reserved = detect_reserved(norm or raw)
+            if reserved is not None and t in ("ipv4", "cidrv4"):
+                # RFC1918 in internal pentest is expected; only flag for external context
+                findings.append(
+                    _f(
+                        Severity.MEDIUM,
+                        f"{raw} is in a reserved range ({reserved})",
+                        raw,
+                        f"`{raw}` falls within reserved range {reserved}. Acceptable "
+                        f"for internal pentests; verify this is intentional.",
+                        "If this is an internal pentest, no action needed. Otherwise, remove from external scope.",
+                        evidence=(("reserved", reserved), ("entry", raw)),
+                    )
+                )
+        if seen[norm or raw] > 1:
+            findings.append(
+                _f(
+                    Severity.INFO,
+                    f"duplicate in-scope entry: {raw}",
+                    raw,
+                    f"`{raw}` appears {seen[norm or raw]} times.",
+                    "Deduplicate; harmless but indicates ROE hygiene issue.",
+                )
+            )
+        if t == "wildcard":
+            findings.append(
+                _f(
+                    Severity.INFO,
+                    f"wildcard entry: {raw}",
+                    raw,
+                    f"`{raw}` will be expanded to specific subdomains at scan time. "
+                    f"Wildcard scope is broad; ensure the customer intended it.",
+                    "Document the expected scope of the wildcard in the ROE rules section.",
+                )
+            )
+    # Overlap detection: any in-scope CIDR/IP that intersects an out-of-scope CIDR.
+    for in_entry, in_raw, in_t, in_norm in classified_in:
+        if in_t not in ("ipv4", "ipv6", "cidrv4", "cidrv6"):
+            continue
+        for out_entry, out_raw, out_t, out_norm in classified_out:
+            if out_t not in ("ipv4", "ipv6", "cidrv4", "cidrv6"):
+                continue
+            if cidr_overlap(in_norm or in_raw, out_norm or out_raw):
+                findings.append(
+                    _f(
+                        Severity.CRITICAL,
+                        f"in-scope {in_raw} overlaps out-of-scope {out_raw}",
+                        in_raw,
+                        f"In-scope entry `{in_raw}` overlaps with out-of-scope "
+                        f"entry `{out_raw}`. Probing the overlap is NOT authorized.",
+                        f"Either narrow `{in_raw}` to exclude `{out_raw}`, or remove "
+                        f"`{out_raw}` from the out-of-scope list (the authorizer's call).",
+                        evidence=(("in_scope", in_raw), ("out_of_scope", out_raw)),
+                    )
+                )
+    return findings, normalized, allowlist
+# --- CLI ---------------------------------------------------------------------
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+    p.add_argument("--roe", default="roe.yaml")
+    p.add_argument("--extension", default=None)
+    p.add_argument("--emit-allowlist", default=None)
+    p.add_argument("--emit-targets", default=None)
+    p.add_argument("--output", default=None)
+    p.add_argument("--format", default="markdown", choices=["json", "jsonl", "markdown"])
+    p.add_argument(
+        "--min-severity",
+        default="info",
+        choices=["info", "low", "medium", "high", "critical"],
+    )
+    return p
+def _filter_min_severity(findings: list[Finding], min_sev: str) -> list[Finding]:
+    floor = Severity(min_sev).numeric
+    return [f for f in findings if f.severity.numeric >= floor]
+def main(argv: list[str] | None = None) -> int:
+    args = _build_arg_parser().parse_args(argv)
+    roe_path = Path(args.roe).resolve()
+    if not roe_path.exists():
+        f = _f(
+            Severity.CRITICAL,
+            "ROE file missing",
+            str(roe_path),
+            f"No ROE at {roe_path}; cannot define scope.",
+            "Create or path-correct the ROE and re-run.",
+        )
+        report.emit([f], args.output, args.format, scan_target=str(roe_path))
+        return 1
+    data = _load_yaml(roe_path)
+    in_scope = data.get("in_scope_targets") or []
+    out_of_scope = data.get("out_of_scope_targets") or []
+    if args.extension:
+        ext_data = _load_yaml(Path(args.extension).resolve())
+        in_scope = list(in_scope) + list(ext_data.get("in_scope_targets") or [])
+    if not in_scope:
+        f = _f(
+            Severity.CRITICAL,
+            "in_scope_targets is empty",
+            str(roe_path),
+            "Cannot define scope without at least one in-scope target.",
+            "Add in-scope targets to the ROE and re-sign.",
+        )
+        report.emit([f], args.output, args.format, scan_target=str(roe_path))
+        return 1
+    findings, normalized, allowlist = evaluate_scope(in_scope, out_of_scope, str(roe_path))
+    if args.emit_allowlist:
+        Path(args.emit_allowlist).write_text("\n".join(sorted(set(allowlist))) + "\n", encoding="utf-8")
+    if args.emit_targets:
+        Path(args.emit_targets).write_text(json.dumps(normalized, indent=2, sort_keys=True), encoding="utf-8")
+    if not findings:
+        findings = [
+            _f(
+                Severity.INFO,
+                "scope is clean",
+                str(roe_path),
+                f"All {len(normalized)} targets validated. No overlaps, no malformed "
+                f"entries, no third-party SaaS conflicts.",
+                "Hand off the allowlist + normalized target list to scan skills.",
+            )
+        ]
+    findings = _filter_min_severity(findings, args.min_severity)
+    report.emit(findings, args.output, args.format, scan_target=str(roe_path))
+    return report.exit_code(findings)
+if __name__ == "__main__":
+    sys.exit(main())

package/skills/detecting-command-injection-patterns/SKILL.md ADDED Viewed

@@ -0,0 +1,144 @@
+---
+name: detecting-command-injection-patterns
+description: |
+  Scan a source tree for command-injection vulnerable patterns:
+  shell=True calls in Python subprocess, os.system / os.popen with
+  interpolated strings, Node child_process.exec with template
+  literals, Ruby backticks / Kernel#system / Kernel#exec with
+  interpolation, Go exec.Command with shell wrapping, PHP system /
+  passthru / shell_exec / backticks with $-interpolation, Java
+  Runtime.exec with concatenated args.
+  Use when: pre-commit gate on code that calls out to shell utilities,
+  audit of file-processing / archive-handling / image-conversion
+  code, post-bug-report investigation for "we shell out to a tool."
+  Threshold: any shell-invocation API called with a string that
+  contains a variable interpolation, OR shell=True with anything
+  other than a fixed literal.
+  Trigger with: "scan command injection", "shell=True audit",
+  "find exec calls", "check os.system".
+allowed-tools:
+  - Read
+  - Bash(python3:*)
+  - Glob
+  - Grep
+disallowed-tools:
+  - Bash(rm:*)
+  - Bash(curl:*)
+version: 3.0.0-dev
+author: Jeremy Longshore <jeremy@intentsolutions.io>
+license: MIT
+compatibility: Designed for Claude Code
+tags:
+  - security
+  - static-analysis
+  - command-injection
+  - pentest
+---
+# Detecting Command Injection Patterns
+## Overview
+Command injection (CWE-78, OWASP A03:2021) shows up wherever an
+application shells out to a binary. Image conversion (`convert`),
+archive extraction (`tar`, `unzip`), video processing (`ffmpeg`),
+DNS lookup (`dig`), and "we just need to call this CLI tool once"
+are the common origins.
+The vulnerability shape is universal: a string is built including
+user input, then handed to a shell interpreter. The shell parses
+the string with normal shell semantics — including `;`, `|`, `&`,
+`$()`, backticks. Any of those in the user-controlled portion
+becomes shell-executable.
+## When the skill produces findings
+| Finding | Severity | Threshold | Affected control |
+|---|---|---|---|
+| Python `subprocess.run(..., shell=True)` with interpolation | **CRITICAL** | f-string / concat / format argument with `shell=True` | CWE-78 |
+| Python `os.system(...)` with interpolation | **CRITICAL** | non-literal argument | CWE-78 |
+| Python `os.popen(...)` with interpolation | **CRITICAL** | non-literal argument | CWE-78 |
+| Node `child_process.exec(...)` with template literal | **CRITICAL** | `${...}` in the command string | CWE-78 |
+| Node `child_process.execSync(...)` with template | **CRITICAL** | same | CWE-78 |
+| Ruby backticks with interpolation | **CRITICAL** | `` `cmd #{var}` `` | CWE-78 |
+| Ruby `Kernel#system(string)` with interpolation | **CRITICAL** | `system("cmd #{var}")` | CWE-78 |
+| Go `exec.Command("sh", "-c", ...)` with interpolation | **HIGH** | shell wrapper with var | CWE-78 |
+| PHP `system / exec / passthru / shell_exec` with $-interp | **CRITICAL** | `system("cmd $var")` | CWE-78 |
+| Java `Runtime.exec(String)` with concat | **HIGH** | single-string form (vs array) with var | CWE-78 |
+## Prerequisites
+- Python 3.9+
+- Target source tree on local filesystem
+## Instructions
+### Step 1 — Run the scanner
+```bash
+python3 ${CLAUDE_PLUGIN_ROOT}/skills/detecting-command-injection-patterns/scripts/scan_cmdi.py /path/to/repo
+```
+Options:
+```
+Usage: scan_cmdi.py PATH [OPTIONS]
+Options:
+  --output FILE      Write findings to FILE
+  --format FMT       json | jsonl | markdown (default: markdown)
+  --min-severity SEV (default: info)
+  --include-tests    Include test directories (default: excluded)
+  --languages LIST   Comma-separated subset to scan
+```
+### Step 2 — Interpret findings
+CRITICAL = direct user-input → shell construction. Fix immediately.
+HIGH = pattern where the shell layer exists but user-input reachability
+needs verification.
+### Step 3 — Remediation
+The universal fix: pass arguments as a list (array), not a single
+string. Most APIs have a list form that bypasses shell entirely.
+See `references/PLAYBOOK.md` for per-language patterns.
+## Examples
+### Example 1 — Pre-commit on a media-processing service
+```bash
+python3 ${CLAUDE_PLUGIN_ROOT}/skills/detecting-command-injection-patterns/scripts/scan_cmdi.py \
+    --min-severity high $(git diff --name-only main...HEAD | tr '\n' ' ')
+```
+### Example 2 — CI gate
+```yaml
+- name: Command-injection scan
+  run: |
+    python3 plugins/security/penetration-tester/skills/detecting-command-injection-patterns/scripts/scan_cmdi.py \
+        . --min-severity high
+```
+## Output
+JSON / JSONL / Markdown. Exit codes: 0 clean, 1 high/critical, 2 error.
+## Error Handling
+False positives common in build scripts that interpolate fixed
+build constants. Verify each finding by reading whether the
+interpolated value is user-reachable.
+## Resources
+- `references/THEORY.md` — Why shell=True is the default footgun,
+  per-language shell-out idioms, argument-vector vs command-string
+  semantics
+- `references/PLAYBOOK.md` — Per-language safe-shellout patterns
+  (Python subprocess list-args, Node spawn, Ruby Open3.capture3,
+  Go exec.Command list-args, Java ProcessBuilder, PHP escapeshellarg)