npm - @intentsolutionsio/penetration-tester - Versions diffs - 2.0.0 → 3.0.4 - Mend

@intentsolutionsio/penetration-tester 2.0.0 → 3.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

package/skills/recording-pentest-engagement/scripts/record_engagement.py ADDED Viewed

@@ -0,0 +1,461 @@
+#!/usr/bin/env python3
+"""recording-pentest-engagement — chain-of-custody packager.
+Walks an engagement directory, builds a SHA-256 manifest of every file,
+optionally signs the manifest with GPG, optionally creates a tarball of the
+directory + manifest. Emits Findings via lib/finding.py for any inconsistency
+between the manifest and the on-disk state.
+Usage:
+    python3 record_engagement.py PATH [--output FILE] [--format json|jsonl|markdown]
+                                       [--min-severity sev] [--manifest FILE]
+                                       [--tar FILE] [--sign] [--signer KEY]
+                                       [--exclude GLOB]
+"""
+from __future__ import annotations
+import argparse
+import fnmatch
+import hashlib
+import json
+import os
+import shutil
+import subprocess
+import sys
+import tarfile
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import Any
+# --- lib/ import -------------------------------------------------------------
+_LIB_ROOT = Path(__file__).resolve().parents[3]
+sys.path.insert(0, str(_LIB_ROOT))
+from lib.finding import Finding, Severity  # noqa: E402
+from lib import report  # noqa: E402
+SKILL_ID = "recording-pentest-engagement"
+CATEGORY = "evidence-chain"
+DEFAULT_EXCLUDES = (
+    # fnmatch-style patterns. fnmatch's `*` crosses directory separators on
+    # POSIX (Linux + macOS), so a bare name plus a `*/<name>` variant
+    # together cover top-level and any nested instance. The pack's CI runs
+    # on ubuntu-latest only; on Windows, fnmatch's `*` semantics differ and
+    # these patterns would miss deeply-nested matches. Reviewer note
+    # (PR #837 review): if Windows support ever lands, swap to a
+    # per-depth pattern set or pathspec-based matching.
+    "manifest.sha256",
+    "*/manifest.sha256",
+    "manifest.sha256.asc",
+    "*/manifest.sha256.asc",
+    ".DS_Store",
+    "*/.DS_Store",
+    "*__pycache__*",
+)
+# --- Helpers ----------------------------------------------------------------
+def _f(
+    severity: Severity,
+    title: str,
+    target: str,
+    detail: str,
+    remediation: str,
+    evidence: tuple[tuple[str, Any], ...] = (),
+) -> Finding:
+    return Finding(
+        skill_id=SKILL_ID,
+        title=title,
+        severity=severity,
+        target=target,
+        detail=detail,
+        remediation=remediation,
+        evidence=evidence,
+    )
+def _matches_any(path: str, patterns: list[str]) -> bool:
+    return any(fnmatch.fnmatch(path, pat) for pat in patterns)
+def _sha256_file(path: Path) -> str:
+    h = hashlib.sha256()
+    with open(path, "rb") as fh:
+        for chunk in iter(lambda: fh.read(65536), b""):
+            h.update(chunk)
+    return h.hexdigest()
+# --- Manifest building ------------------------------------------------------
+def walk_files(root: Path, excludes: list[str]) -> tuple[list[Path], list[Finding]]:
+    """Return (file_list, findings) where findings flag operational issues."""
+    files: list[Path] = []
+    findings: list[Finding] = []
+    if not root.exists():
+        findings.append(
+            _f(
+                Severity.CRITICAL,
+                f"engagement directory missing: {root}",
+                str(root),
+                f"Path {root} does not exist; cannot record engagement.",
+                "Create the engagement directory and re-run.",
+            )
+        )
+        return files, findings
+    for dirpath, _dirnames, filenames in os.walk(root):
+        for name in filenames:
+            p = Path(dirpath) / name
+            rel = p.relative_to(root)
+            rel_str = str(rel)
+            if _matches_any(rel_str, excludes):
+                continue
+            try:
+                if p.is_symlink():
+                    findings.append(
+                        _f(
+                            Severity.MEDIUM,
+                            f"symlink in tree: {rel_str}",
+                            str(p),
+                            "Symlinks break archive portability and complicate integrity verification.",
+                            "Replace the symlink with the actual file, or exclude "
+                            "the directory containing the symlink.",
+                        )
+                    )
+                    continue
+                if p.stat().st_size == 0:
+                    findings.append(
+                        _f(
+                            Severity.INFO,
+                            f"empty file in tree: {rel_str}",
+                            str(p),
+                            "0-byte file; possibly an export error or placeholder.",
+                            "Verify the file is intentional; remove if not.",
+                        )
+                    )
+            except (OSError, PermissionError) as e:
+                findings.append(
+                    _f(
+                        Severity.HIGH,
+                        f"cannot stat {rel_str}",
+                        str(p),
+                        f"OSError: {e}",
+                        "Resolve permissions; re-run.",
+                    )
+                )
+                continue
+            files.append(p)
+    return files, findings
+def compute_manifest(root: Path, files: list[Path]) -> tuple[list[tuple[str, str]], list[Finding]]:
+    """Return (manifest_entries, findings) where entries are (hash, relpath)."""
+    entries: list[tuple[str, str]] = []
+    findings: list[Finding] = []
+    for f in sorted(files):
+        try:
+            digest = _sha256_file(f)
+        except (OSError, PermissionError) as e:
+            findings.append(
+                _f(
+                    Severity.HIGH,
+                    f"cannot read {f.relative_to(root)}",
+                    str(f),
+                    f"Cannot hash file: {e}",
+                    "Resolve permissions; re-run.",
+                )
+            )
+            continue
+        entries.append((digest, str(f.relative_to(root))))
+    return entries, findings
+def write_manifest(entries: list[tuple[str, str]], manifest_path: Path) -> None:
+    lines = [f"{digest}  {rel}\n" for digest, rel in entries]
+    manifest_path.write_text("".join(lines), encoding="utf-8")
+# --- Manifest verification (when one already exists) -----------------------
+def load_existing_manifest(manifest_path: Path) -> list[tuple[str, str]] | None:
+    if not manifest_path.exists():
+        return None
+    out: list[tuple[str, str]] = []
+    for line in manifest_path.read_text(encoding="utf-8").splitlines():
+        line = line.rstrip("\n")
+        if not line.strip():
+            continue
+        # standard sha256sum format: HASH<sp><sp>PATH
+        if "  " in line:
+            digest, rel = line.split("  ", 1)
+        elif " " in line:
+            parts = line.split()
+            digest = parts[0]
+            rel = " ".join(parts[1:])
+        else:
+            continue
+        out.append((digest.strip(), rel.strip()))
+    return out
+def verify_against_existing(
+    root: Path, computed: list[tuple[str, str]], existing: list[tuple[str, str]]
+) -> list[Finding]:
+    findings: list[Finding] = []
+    existing_map = dict((rel, digest) for digest, rel in existing)
+    computed_map = dict((rel, digest) for digest, rel in computed)
+    for rel, comp_digest in computed_map.items():
+        if rel not in existing_map:
+            findings.append(
+                _f(
+                    Severity.HIGH,
+                    f"file not in existing manifest: {rel}",
+                    str(root / rel),
+                    "File present on disk but absent from the existing manifest. "
+                    "Either the file was added after manifest creation, or the "
+                    "manifest is stale.",
+                    "Re-generate the manifest to include the new file (acceptable "
+                    "for in-progress engagements; never acceptable post-closeout).",
+                )
+            )
+        elif existing_map[rel] != comp_digest:
+            findings.append(
+                _f(
+                    Severity.CRITICAL,
+                    f"hash mismatch: {rel}",
+                    str(root / rel),
+                    f"Manifest says {existing_map[rel]}, computed {comp_digest}. "
+                    f"The file has been modified since the manifest was created.",
+                    "Investigate the modification. If legitimate (post-engagement "
+                    "edit), the original archive's integrity claim is broken; "
+                    "create a NEW archive with a fresh manifest and document the "
+                    "modification reason.",
+                )
+            )
+    for rel in existing_map.keys() - computed_map.keys():
+        findings.append(
+            _f(
+                Severity.HIGH,
+                f"manifest entry for missing file: {rel}",
+                str(root / rel),
+                "Manifest lists a file that doesn't exist on disk.",
+                "Either restore the file or generate a fresh manifest without it.",
+            )
+        )
+    return findings
+# --- Findings sanity (cross-reference) --------------------------------------
+def scan_findings_for_external_refs(root: Path) -> list[Finding]:
+    """If findings JSONs reference paths, verify those paths are inside root."""
+    out: list[Finding] = []
+    findings_dir = root / "findings"
+    if not findings_dir.is_dir():
+        return out
+    for f in findings_dir.glob("**/*.json"):
+        try:
+            data = json.loads(f.read_text(encoding="utf-8"))
+        except (OSError, json.JSONDecodeError):
+            continue
+        records = data if isinstance(data, list) else data.get("findings", []) if isinstance(data, dict) else []
+        for rec in records:
+            if not isinstance(rec, dict):
+                continue
+            evidence = rec.get("evidence", {})
+            if isinstance(evidence, dict):
+                for k, v in evidence.items():
+                    if not isinstance(v, str):
+                        continue
+                    if v.startswith("/") and root.as_posix() not in v:
+                        out.append(
+                            _f(
+                                Severity.MEDIUM,
+                                f"finding references out-of-tree path: {v}",
+                                str(f),
+                                f"Finding evidence['{k}'] = {v} points outside the "
+                                f"engagement directory tree at {root}.",
+                                "Either copy the referenced file into the engagement "
+                                "tree (so it's archived) or change the finding to "
+                                "reference an in-tree path.",
+                            )
+                        )
+    return out
+# --- GPG signing ------------------------------------------------------------
+def sign_manifest(manifest_path: Path, signer: str | None) -> tuple[bool, str]:
+    if not shutil.which("gpg"):
+        return False, "gpg not installed"
+    cmd = ["gpg", "--armor", "--detach-sign"]
+    if signer:
+        cmd.extend(["--local-user", signer])
+    cmd.extend(["--output", str(manifest_path) + ".asc", str(manifest_path)])
+    try:
+        proc = subprocess.run(cmd, capture_output=True, text=True, timeout=60, check=False)  # noqa: S603
+    except (subprocess.TimeoutExpired, FileNotFoundError) as e:
+        return False, f"gpg invocation failed: {e}"
+    if proc.returncode != 0:
+        return False, f"gpg exited {proc.returncode}: {proc.stderr.strip()[:200]}"
+    return True, ""
+# --- Tar packaging ----------------------------------------------------------
+def create_tar(root: Path, tar_path: Path) -> tuple[bool, str]:
+    try:
+        tar_path.parent.mkdir(parents=True, exist_ok=True)
+        with tarfile.open(tar_path, "w:gz") as tar:
+            tar.add(root, arcname=root.name)
+    except (OSError, tarfile.TarError) as e:
+        return False, f"tar creation failed: {e}"
+    return True, ""
+# --- CLI ---------------------------------------------------------------------
+def _build_arg_parser() -> argparse.ArgumentParser:
+    p = argparse.ArgumentParser(description=__doc__.split("\n")[0])
+    p.add_argument("path", help="Engagement directory")
+    p.add_argument("--output", default=None)
+    p.add_argument("--format", default="markdown", choices=["json", "jsonl", "markdown"])
+    p.add_argument(
+        "--min-severity",
+        default="info",
+        choices=["info", "low", "medium", "high", "critical"],
+    )
+    p.add_argument("--manifest", default=None)
+    p.add_argument("--tar", default=None)
+    p.add_argument("--sign", action="store_true")
+    p.add_argument("--signer", default=None)
+    p.add_argument("--exclude", action="append", default=[])
+    return p
+def _filter_min_severity(findings: list[Finding], min_sev: str) -> list[Finding]:
+    floor = Severity(min_sev).numeric
+    return [f for f in findings if f.severity.numeric >= floor]
+def main(argv: list[str] | None = None) -> int:
+    args = _build_arg_parser().parse_args(argv)
+    root = Path(args.path).resolve()
+    excludes = list(DEFAULT_EXCLUDES) + list(args.exclude)
+    files, walk_findings = walk_files(root, excludes)
+    if not files and walk_findings:
+        report.emit(walk_findings, args.output, args.format, scan_target=str(root))
+        return 1
+    entries, hash_findings = compute_manifest(root, files)
+    manifest_path = Path(args.manifest).resolve() if args.manifest else (root / "manifest.sha256")
+    existing = load_existing_manifest(manifest_path)
+    verify_findings: list[Finding] = []
+    if existing is not None:
+        verify_findings = verify_against_existing(root, entries, existing)
+    # Always rewrite the manifest to current state (it's authoritative now)
+    try:
+        write_manifest(entries, manifest_path)
+    except OSError as e:
+        walk_findings.append(
+            _f(
+                Severity.HIGH,
+                f"cannot write manifest at {manifest_path}",
+                str(manifest_path),
+                f"OSError: {e}",
+                "Resolve permissions; re-run.",
+            )
+        )
+    ref_findings = scan_findings_for_external_refs(root)
+    sign_findings: list[Finding] = []
+    if args.sign:
+        ok, msg = sign_manifest(manifest_path, args.signer)
+        if ok:
+            sign_findings.append(
+                _f(
+                    Severity.INFO,
+                    "manifest signed",
+                    str(manifest_path) + ".asc",
+                    "Manifest signed with GPG detached signature.",
+                    "Distribute the .asc alongside the manifest; verify with "
+                    "`gpg --verify manifest.sha256.asc manifest.sha256`.",
+                )
+            )
+        else:
+            sign_findings.append(
+                _f(
+                    Severity.HIGH,
+                    "manifest signing failed",
+                    str(manifest_path),
+                    msg,
+                    "Sign the manifest manually after resolving the GPG issue, or skip --sign.",
+                )
+            )
+    tar_findings: list[Finding] = []
+    if args.tar:
+        ok, msg = create_tar(root, Path(args.tar).resolve())
+        if ok:
+            tar_findings.append(
+                _f(
+                    Severity.INFO,
+                    "archive created",
+                    args.tar,
+                    f"Archive written: {args.tar}",
+                    "Hand off to legal / archive / customer per the engagement closeout protocol.",
+                )
+            )
+        else:
+            tar_findings.append(
+                _f(
+                    Severity.HIGH,
+                    "archive creation failed",
+                    args.tar,
+                    msg,
+                    "Resolve the tar error and re-run with --tar.",
+                )
+            )
+    all_findings = walk_findings + hash_findings + verify_findings + ref_findings + sign_findings + tar_findings
+    if not all_findings:
+        all_findings = [
+            _f(
+                Severity.INFO,
+                "engagement archive is internally consistent",
+                str(root),
+                f"Manifest covers {len(entries)} files. No integrity issues detected.",
+                "No action required. Distribute the manifest (and optionally "
+                ".tar.gz) per the engagement closeout protocol.",
+                evidence=(
+                    ("file_count", len(entries)),
+                    ("manifest", str(manifest_path)),
+                    ("recorded_at", datetime.now(timezone.utc).isoformat()),
+                ),
+            )
+        ]
+    all_findings = _filter_min_severity(all_findings, args.min_severity)
+    report.emit(all_findings, args.output, args.format, scan_target=str(root))
+    return report.exit_code(all_findings)
+if __name__ == "__main__":
+    sys.exit(main())

package/skills/scanning-for-hardcoded-secrets/SKILL.md ADDED Viewed

@@ -0,0 +1,215 @@
+---
+name: scanning-for-hardcoded-secrets
+description: |
+  Scan a source-code tree for hardcoded credentials embedded in source
+  files: AWS access keys, GitHub tokens, Stripe keys, Slack tokens,
+  Anthropic API keys, OpenAI keys, JWT signing secrets, generic
+  base64-encoded passwords, RSA / SSH private keys, and high-entropy
+  string literals that pattern-match common credential shapes.
+  Use when: pre-commit gate before pushing a feature branch, audit
+  before SOC2, post-incident scan after a leak, or inheriting a
+  codebase you didn't write.
+  Threshold: any source file contains a string that matches a
+  canonical credential regex (AWS AKIA prefix, GitHub ghp_ prefix,
+  etc.) OR a string with Shannon entropy above 4.5 in a field
+  context (key=, token:, secret=).
+  Trigger with: "scan secrets", "credential scan", "find hardcoded
+  keys", "leak check".
+allowed-tools:
+  - Read
+  - Bash(python3:*)
+  - Glob
+  - Grep
+disallowed-tools:
+  - Bash(rm:*)
+  - Bash(curl:*)
+  - Bash(wget:*)
+  - Write(.env)
+  - Edit(.env)
+version: 3.0.0-dev
+author: Jeremy Longshore <jeremy@intentsolutions.io>
+license: MIT
+compatibility: Designed for Claude Code
+tags:
+  - security
+  - static-analysis
+  - secrets
+  - pentest
+---
+# Scanning for Hardcoded Secrets
+## Overview
+The single most common cause of credential breach in 2026 remains
+hardcoded secrets in source code. Engineers paste an API key into a
+config file "just for testing," forget to remove it, commit the
+file. The credential is now in the repository's history forever
+(`git rebase` doesn't help if anyone cloned in between) and
+extractable by anyone who reaches the repo: contractors,
+ex-employees, attackers via `.git/` directory exposure (see skill
+six), GitHub bot scrapers crawling public repos.
+The cost of detection-after-commit is near-zero (free tools exist:
+gitleaks, trufflehog, this skill). The cost of detection-before-commit
+is also near-zero (pre-commit hooks). The cost of remediation after
+the fact is rotating every credential exposed + auditing for
+exploitation + potentially notifying customers of breach. The
+asymmetry is severe, the discipline is the only constraint.
+This skill scans a filesystem tree, matching against a canonical
+regex library covering the credential shapes attackers and bots
+search for first.
+## When the skill produces findings
+| Finding | Severity | Threshold | Affected control |
+|---|---|---|---|
+| AWS access key (AKIA-prefix) | **CRITICAL** | Literal `AKIA[0-9A-Z]{16}` in any file | CWE-798 |
+| AWS secret access key | **CRITICAL** | 40-char base64 in `aws_secret_access_key` field context | CWE-798 |
+| GitHub personal access token | **CRITICAL** | `ghp_[A-Za-z0-9]{36}` or `gho_`, `ghu_`, `ghs_`, `ghr_` | CWE-798 |
+| GitHub app installation token | **CRITICAL** | `ghs_[A-Za-z0-9]{36}` | CWE-798 |
+| Stripe live key | **CRITICAL** | `sk_live_[A-Za-z0-9]{24,}` | CWE-798 |
+| Stripe test key | **MEDIUM** | `sk_test_[A-Za-z0-9]{24,}` | CWE-798 |
+| Anthropic API key | **CRITICAL** | `sk-ant-api03-[A-Za-z0-9_-]{93}` or similar | CWE-798 |
+| OpenAI API key | **CRITICAL** | `sk-(proj-)?[A-Za-z0-9_-]{40,}` | CWE-798 |
+| Slack bot token | **CRITICAL** | `xoxb-[A-Za-z0-9-]+` | CWE-798 |
+| Slack user token | **CRITICAL** | `xoxp-[A-Za-z0-9-]+` | CWE-798 |
+| Google API key | **HIGH** | `AIza[A-Za-z0-9_-]{35}` | CWE-798 |
+| RSA / OpenSSH private key | **CRITICAL** | BEGIN PRIVATE KEY header (RSA, OPENSSH, EC, DSA variants) | CWE-321 |
+| JWT secret | **HIGH** | Long string in `jwt_secret`, `JWT_SECRET`, `signing_secret` field | CWE-321 |
+| Generic password literal | **HIGH** | `password = "..."` with non-placeholder value | CWE-798 |
+| High-entropy string in key/token field | **MEDIUM** | Shannon entropy ≥ 4.5 in `key:`/`token:` field context | CWE-798 |
+| `.env`-shaped KEY=VALUE in non-`.env` file | **HIGH** | Multiple `[A-Z_]+=` lines in `.py`/`.js`/`.md` files | CWE-200 |
+## Prerequisites
+- Python 3.9+
+- Target source-code tree on local filesystem
+## Instructions
+### Step 1 — Identify the scan target
+This skill scans a filesystem path. No authorization gate (it
+operates on local source code, not network targets).
+### Step 2 — Run the scanner
+```bash
+python3 ${CLAUDE_PLUGIN_ROOT}/skills/scanning-for-hardcoded-secrets/scripts/scan_secrets.py /path/to/repo
+```
+Options:
+```
+Usage: scan_secrets.py PATH [OPTIONS]
+Options:
+  --output FILE      Write findings to FILE (default: stdout)
+  --format FMT       json | jsonl | markdown (default: markdown)
+  --min-severity SEV (default: info)
+  --include-tests    Include files under tests/, test/, __tests__/, spec/
+                     (default: excluded to reduce false positives)
+  --git-history N    Also scan the last N git commits' diffs (default: 0
+                     = working tree only)
+  --exclude GLOB     Skip files matching glob (repeatable)
+  --entropy-only     Only flag entropy-based findings (skip regex)
+```
+The scanner walks the tree, applies the regex library to every
+file's contents, and emits a Finding per match with file path, line
+number, severity, and the redacted matched text.
+### Step 3 — Interpret findings
+CRITICAL = the matched string is a real credential shape that
+upstream tools auto-extract. Rotate the credential immediately.
+Audit logs for any API call against that credential since the
+commit landed.
+HIGH = pattern strongly suggests credential but requires manual
+verification (the literal might be a placeholder or test fixture).
+MEDIUM / LOW = entropy-based heuristic that needs human review.
+### Step 4 — Remediation
+For any confirmed real credential:
+1. **Rotate immediately.** Don't wait to refactor; the leak window
+   is between when the commit landed and when you rotate.
+2. **Audit usage.** Check provider's API logs for any unfamiliar
+   calls against that credential since the leak commit timestamp.
+3. **Remove from source.** Move to environment variables, secrets
+   manager, or a runtime-provisioned secret. See
+   `references/PLAYBOOK.md` for per-language patterns.
+4. **Scrub history if reasonable.** `git filter-repo` or `BFG
+   Repo-Cleaner` can purge the secret from history, but only if you
+   can force-push and coordinate with every clone-holder. For
+   public repos, history-scrub is often not worth the disruption
+   compared to just rotating.
+## Examples
+### Example 1 — Pre-commit gate
+```bash
+# .git/hooks/pre-commit (or via pre-commit framework)
+python3 plugins/security/penetration-tester/skills/scanning-for-hardcoded-secrets/scripts/scan_secrets.py \
+    --min-severity high --format json . | jq -e 'length == 0' \
+    || { echo "Secrets detected. Fix before commit."; exit 1; }
+```
+### Example 2 — CI scan on every push
+```yaml
+- name: Hardcoded-secrets scan
+  run: |
+    python3 plugins/security/penetration-tester/skills/scanning-for-hardcoded-secrets/scripts/scan_secrets.py \
+        . --min-severity high --format json --output secrets-scan.json
+- run: |
+    if jq 'length > 0' secrets-scan.json | grep -q true; then
+      echo "::error::Hardcoded secret detected"
+      exit 1
+    fi
+```
+### Example 3 — Audit inherited codebase
+```bash
+python3 ${CLAUDE_PLUGIN_ROOT}/skills/scanning-for-hardcoded-secrets/scripts/scan_secrets.py \
+    /path/to/acquired-repo --include-tests --min-severity medium
+```
+`--include-tests` is important here because legacy test fixtures
+often contain real credentials someone forgot to redact.
+## Output
+JSON / JSONL / Markdown per `lib/report.py`. Exit codes: 0 clean, 1
+high/critical, 2 error.
+Matched strings are partially redacted in output (first 4 + last 4
+chars visible, middle redacted) to avoid the scanner output itself
+becoming a leak surface.
+## Error Handling
+- **False positive on placeholder strings** like `<YOUR_KEY_HERE>` →
+  the scanner skips strings containing `<`, `>`, `EXAMPLE`,
+  `PLACEHOLDER`, `YOUR_`, `XXXX` (configurable).
+- **Binary file in tree** → skipped (the scanner reads only text
+  files by content-type sniffing).
+- **Large file** → files >5 MB are skipped (avoids scanning compiled
+  artifacts and lockfiles).
+## Resources
+- `references/THEORY.md` — Per-credential-family threat model, why
+  each provider's keys are extracted by bots first, history-scrub
+  decision framework, entropy-detection theory
+- `references/PLAYBOOK.md` — Per-language migration patterns
+  (Python dotenv, Node .env+dotenv, Ruby Rails credentials, Go
+  envconfig), provider rotation procedures, GitHub secret-scanning
+  integration