PyPI - plainmarker - Versions diffs - 0.49.0__py3-none-any.whl - Mend

plainmarker 0.49.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

keeper_core/__init__.py +12 -0
keeper_core/__main__.py +11 -0
keeper_core/accept_baseline.py +327 -0
keeper_core/auditor.py +565 -0
keeper_core/baseline.py +125 -0
keeper_core/calibrate.py +150 -0
keeper_core/calibrate_auditor.py +320 -0
keeper_core/checks.py +995 -0
keeper_core/cli.py +267 -0
keeper_core/config.py +188 -0
keeper_core/doctor.py +80 -0
keeper_core/interrogation.py +210 -0
keeper_core/models.py +155 -0
keeper_core/onboarding/__init__.py +6 -0
keeper_core/onboarding/builtin_provider.py +311 -0
keeper_core/onboarding/onboard.py +136 -0
keeper_core/onboarding/provider.py +58 -0
keeper_core/onboarding/risk.py +61 -0
keeper_core/onboarding/summary.py +52 -0
keeper_core/ranking.py +144 -0
keeper_core/redact.py +66 -0
keeper_core/report.py +704 -0
keeper_core/sast_rules/javascript.yaml +43 -0
keeper_core/sast_rules/python.yaml +71 -0
keeper_core/seatbelt.py +270 -0
keeper_core/session_verify.py +975 -0
keeper_core/shell_audit.py +1058 -0
keeper_core/templates/keeper/config.local-ollama.yaml +36 -0
keeper_core/templates/keeper/config.openrouter-deepseek.yaml +55 -0
keeper_core/templates/keeper/config.openrouter-free.yaml +58 -0
keeper_core/templates/keeper/config.yaml +30 -0
keeper_core/templates/keeper/decision-ledger.md +24 -0
keeper_core/templates/keeper/failure-library.md +18 -0
keeper_core/templates/keeper/hard-truths.yaml +18 -0
keeper_core/templates/keeper/project-state.md +28 -0
keeper_core/witness.py +128 -0
keeper_core/witness_store.py +360 -0
plainmarker-0.49.0.dist-info/METADATA +147 -0
plainmarker-0.49.0.dist-info/RECORD +43 -0
plainmarker-0.49.0.dist-info/WHEEL +5 -0
plainmarker-0.49.0.dist-info/entry_points.txt +2 -0
plainmarker-0.49.0.dist-info/licenses/LICENSE +21 -0
plainmarker-0.49.0.dist-info/top_level.txt +1 -0

keeper_core/__init__.py ADDED Viewed

@@ -0,0 +1,12 @@
+"""plainmarker Core — the standalone engine.
+All of plainmarker's real logic lives here. This package assumes NOTHING about Claude
+Code (or any other host) and is designed to run on its own. Host-specific surfaces
+(the Claude Code plugin, a future standalone CLI service, etc.) live under
+``adapters/`` and only call into this package.
+Milestone 0, Step 1: this is the skeleton. The feature modules described in
+``docs/ARCHITECTURE.md`` are added one per later build-kit step.
+"""
+__version__ = "0.49.0"

keeper_core/__main__.py ADDED Viewed

@@ -0,0 +1,11 @@
+"""Run plainmarker Core as a module: ``python -m keeper_core ...``.
+This is the invocation path the Claude Code plugin's MCP server will use later
+(Step 5): it launches Core as a subprocess instead of importing it, so the plugin
+never has to carry a copy of Core's logic.
+"""
+from keeper_core.cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

keeper_core/accept_baseline.py ADDED Viewed

@@ -0,0 +1,327 @@
+"""Accept-baseline (D-079) + per-finding acknowledge (D-082): the OWNER's noise filter for `plainmarker check`.
+`plainmarker accept <path>` records the current findings' content-bound IDs to `.keeper/accepted.json`;
+`plainmarker check` then headlines only NEW findings (IDs not in the store) and quietly counts the accepted
+ones. `plainmarker accept <path> --only <selector> --reason "..."` acknowledges ONE finding and records WHY,
+so `plainmarker check` keeps just that one quiet and shows the reason (the decision is remembered) until the
+code at that spot changes — the content-bound id then changes and the finding re-surfaces on its own.
+NOT A SECURITY BOUNDARY. `plainmarker check` is the ADVISORY narrator and is fully silenceable by an agent
+with file-write (this store, `.keeperignore`, or editing `.keeper/`). The SIGNED GATE (`plainmarker baseline`
+/`sign`/`audit`) IGNORES this store and reports everything — it is the only trust boundary. Therefore this
+module is imported ONLY by the keeper-check path; `baseline.py`/`auditor.py`/`witness_store.py` MUST NOT
+import it (enforced mechanically by tests/test_accept_baseline.py::test_gate_modules_do_not_import_accept).
+Content-bound IDs (a rotated secret / changed source line gets a new id and re-surfaces) are an
+honest-user RE-REVIEW aid, NOT an adversary defense.
+"""
+from __future__ import annotations
+import hashlib
+import json
+import re
+from datetime import datetime, timezone
+from pathlib import Path
+_STORE = "accepted.json"
+_SCHEMA = 2                                   # 1 = {schema,recorded,accepted}; 2 adds {reasons:{id:{...}}}
+# Only these two checks are acceptable in v1. Shell HARD findings (download-and-run / exfil) are
+# deliberately NEVER acceptable, and shell has no receipt + a 20-cap, so it is excluded entirely.
+_ACCEPTABLE = ("secrets", "code_vulnerabilities")
+# A selector is FILE:LINE[:TAG]. The file part has NO colon (receipt paths are relative posix paths), so
+# the first ':' ends the file and the optional TAG (a detect-secrets type / semgrep rule) may itself
+# contain ':'. Resolution is fail-CLOSED at the call site (exactly one id, or acknowledge nothing).
+_SELECTOR_RE = re.compile(r"^(?P<file>[^:]+):(?P<line>\d+)(?::(?P<tag>.+))?$")
+def _accept_path(project_path) -> Path:
+    return Path(project_path) / ".keeper" / _STORE
+def _today() -> str:
+    return datetime.now(timezone.utc).date().isoformat()
+def _clean_reason(s) -> str:
+    """Neutralize a stored reason for display: drop control chars / newlines / ANSI (an agent-writable
+    store must not be able to inject plainmarker-looking lines into the report) and cap the length. Applied
+    BOTH when storing (fresh) and when loading (a forged store), so neither path can forge output."""
+    return "".join(ch for ch in str(s) if ch.isprintable()).strip()[:200]
+def _line_at(path: Path, line) -> str:
+    """The stripped source line at 1-based `line`, or "" if unreadable/out of range (fail-safe)."""
+    if not isinstance(line, int) or line < 1:
+        return ""
+    try:
+        with path.open(encoding="utf-8", errors="replace") as fh:
+            for i, text in enumerate(fh, 1):
+                if i == line:
+                    return text.strip()
+    except OSError:
+        return ""
+    return ""
+def _secret_rows(receipt: dict) -> list[dict]:
+    # ONE row per REAL (placeholder-demoted) finding, joined to raw.results for the unsalted hashed_secret
+    # — so a placeholder never mints an id and a rotated value re-surfaces. This is the single source of
+    # truth for secret ids (finding_ids derives from it).
+    raw = ((receipt.get("raw") or {}).get("results")) or {}
+    rows: list[dict] = []
+    consumed: dict[str, set[int]] = {}        # file -> raw indices already mapped, so N distinct secrets on the
+    for f in receipt.get("findings") or []:   # SAME (line,type) map to N distinct hashes (not all to the first)
+        file, line, typ = f.get("file"), f.get("line"), f.get("type")
+        used = consumed.setdefault(file, set())
+        hashed = None
+        for i, entry in enumerate(raw.get(file, [])):
+            if i in used or entry.get("line_number") != line or entry.get("type") != typ:
+                continue
+            used.add(i)
+            hashed = entry.get("hashed_secret")
+            break
+        rid = (f"secrets\0{file}\0{hashed}" if hashed     # fallback so a finding never VANISHES from the count
+               else f"secrets\0{file}\0L{line}\0{typ}")
+        rows.append({"id": rid, "file": file, "line": line, "type": typ,
+                     "content_bound": hashed is not None})    # fallback id is line+type bound (NOT acknowledgeable)
+    return rows
+def _sast_rows(receipt: dict, root: Path) -> list[dict]:
+    # ID off (file, rule, hash-of-source-line-CONTENT) — NOT the line number — so a line shift keeps the
+    # acceptance but an in-place edit at that location re-surfaces it for re-review.
+    rows: list[dict] = []
+    for f in receipt.get("findings") or []:
+        file, rule, line = f.get("file"), f.get("rule"), f.get("line")
+        content_bound = bool(file) and rule is not None
+        if not content_bound:                 # malformed (never from real semgrep) -> fallback so none vanishes
+            rid = f"code_vulnerabilities\0{file or '?'}\0{rule}\0L{line}"
+        else:
+            line_text = _line_at(root / file, line)
+            digest = hashlib.sha256(line_text.encode("utf-8", "replace")).hexdigest()
+            rid = f"code_vulnerabilities\0{file}\0{rule}\0{digest}"
+            if not line_text:                 # unreadable/empty line -> sha256("") is NOT value-binding -> refuse
+                content_bound = False
+        rows.append({"id": rid, "file": file, "rule": rule, "line": line, "content_bound": content_bound})
+    return rows
+def finding_rows(check_result, root) -> list[dict]:
+    """Per-finding rows for a fail CheckResult (secrets + code_vulnerabilities only): each is
+    {id, file, line, type|rule}. Same fail-closed contract as finding_ids (a non-fail / unknown /
+    receiptless / non-acceptable check yields [])."""
+    if (not check_result or check_result.status != "fail"
+            or check_result.check not in _ACCEPTABLE or not check_result.receipt_path):
+        return []
+    try:
+        receipt = json.loads(Path(check_result.receipt_path).read_text(encoding="utf-8"))
+    except (OSError, ValueError):
+        return []
+    return _secret_rows(receipt) if check_result.check == "secrets" else _sast_rows(receipt, Path(root))
+def finding_ids(check_result, root) -> set[str]:
+    """Content-bound IDs for a fail CheckResult's findings (secrets + code_vulnerabilities only).
+    A non-fail / unknown / receiptless check yields an EMPTY set, so it can never be marked "all
+    accepted". Derives from finding_rows so the id logic has one home."""
+    return {r["id"] for r in finding_rows(check_result, root)}
+def finding_label(row: dict) -> str:
+    """The human-readable label for one finding row: FILE:LINE (TAG). SANITIZED — a receipt file path or
+    type string can carry a newline / ANSI (an adversarial agent can name a file `evil\\nkeeper ...`), so
+    this is the single chokepoint that neutralizes label injection into the report (the twin of the reason
+    sanitizer). Every render site routes a finding's display through here."""
+    tag = row.get("type") or row.get("rule")
+    return _clean_reason(f"{row.get('file')}:{row.get('line')}" + (f" ({tag})" if tag else ""))
+def resolve_selector(rows: list[dict], selector: str) -> set[str]:
+    """The set of finding ids a FILE:LINE[:TAG] selector matches. The CALLER fail-CLOSES on len != 1 —
+    a 0/ambiguous match must acknowledge nothing, never the wrong finding or all of them."""
+    m = _SELECTOR_RE.match((selector or "").strip())
+    if not m:
+        return set()
+    file, line, tag = m.group("file"), int(m.group("line")), m.group("tag")
+    out: set[str] = set()
+    for r in rows:
+        if r.get("file") != file or r.get("line") != line:
+            continue
+        if tag is not None and (r.get("type") or r.get("rule")) != tag:
+            continue
+        out.add(r["id"])
+    return out
+def load_store(project_path) -> dict:
+    """The full accept store, sanitized + fail-OPEN. Returns {schema, recorded, accepted:list,
+    reasons:{id:{reason,recorded}}}; any problem (missing/malformed/forged) degrades to an empty store
+    so a broken store never HIDES a finding. Schema-1 stores (no `reasons`) load as reasons={}."""
+    empty = {"schema": _SCHEMA, "recorded": "", "accepted": [], "reasons": {}}
+    try:
+        obj = json.loads(_accept_path(project_path).read_text(encoding="utf-8"))
+    except (OSError, ValueError):
+        return empty
+    if not isinstance(obj, dict):
+        return empty
+    accepted = [x for x in (obj.get("accepted") or []) if isinstance(x, str)]
+    reasons: dict = {}
+    src = obj.get("reasons")
+    if isinstance(src, dict):
+        for k, v in src.items():
+            if isinstance(k, str) and isinstance(v, dict) and isinstance(v.get("reason"), str):
+                rec = v.get("recorded")
+                reasons[k] = {"reason": _clean_reason(v["reason"]),       # forged store can't inject output
+                              "recorded": _clean_reason(rec) if isinstance(rec, str) else ""}
+    recorded = obj.get("recorded")
+    return {"schema": _SCHEMA, "recorded": recorded if isinstance(recorded, str) else "",
+            "accepted": accepted, "reasons": reasons}
+def save_store(project_path, store: dict) -> Path:
+    """Write the store as schema-2, deterministically. GC: a reason is kept ONLY if its id is still in
+    `accepted` (so an orphaned reason — its finding rotated/edited away — never lingers)."""
+    accepted = sorted(set(store.get("accepted", [])))
+    keep = set(accepted)
+    reasons = {k: v for k, v in (store.get("reasons") or {}).items() if k in keep}
+    p = _accept_path(project_path)
+    p.parent.mkdir(parents=True, exist_ok=True)
+    p.write_text(json.dumps({"schema": _SCHEMA, "recorded": store.get("recorded", ""),
+                             "accepted": accepted, "reasons": reasons}, indent=2) + "\n", encoding="utf-8")
+    return p
+def load_accepted(project_path) -> set[str]:
+    """The accepted-finding IDs, or an EMPTY set on any problem (fail-OPEN). Unchanged API."""
+    return set(load_store(project_path)["accepted"])
+def load_reasons(project_path) -> dict:
+    """{id: {reason, recorded}} for findings the owner acknowledged with a reason. Fail-open ({})."""
+    return load_store(project_path)["reasons"]
+def save_accepted(project_path, ids, recorded: str = "") -> Path:
+    """Record `ids` as the accepted set, MERGING into the existing store so a per-finding reason is never
+    clobbered (a surviving id keeps its reason; a vanished id's reason is GC'd by save_store)."""
+    store = load_store(project_path)
+    store["accepted"] = sorted(set(ids))
+    store["recorded"] = recorded
+    return save_store(project_path, store)
+def _scan_rows(path: Path) -> tuple[list[dict], list[str]]:
+    """Scan secrets + sast with the SAME exclude set as plainmarker check (.keeperignore) and return
+    (all finding rows, names of checks that could not be scanned)."""
+    from keeper_core.checks import sast_check, secrets_check
+    from keeper_core.onboarding.builtin_provider import project_excludes
+    evidence = path / ".keeper" / "evidence"
+    excludes = project_excludes(path)           # MUST match plainmarker check's scan set, else the baseline
+    rows: list[dict] = []                       # records findings `plainmarker check` never sees (mismatch)
+    unscanned: list[str] = []
+    for check in (secrets_check, sast_check):
+        r = check(path, evidence, excludes)
+        if r.status == "unknown":               # a timed-out / unavailable scanner accepted NOTHING — say so
+            unscanned.append(r.check)
+        rows.extend(finding_rows(r, path))
+    return rows, unscanned
+def _acknowledge_one(path: Path, rows: list[dict], selector: str, reason: str,
+                     recorded: str, as_json: bool) -> int:
+    """Acknowledge exactly ONE finding (fail-CLOSED): refuse on no reason, no match, or an ambiguous
+    selector. Records the reason and echoes what was acknowledged (informed consent)."""
+    if not reason.strip():
+        print("--only needs --reason \"why this is OK\" — the reason is recorded and shown on later "
+              "`plainmarker check`. Nothing acknowledged.")
+        return 1
+    matches = resolve_selector(rows, selector)
+    if len(matches) != 1:
+        if not matches:
+            print(f"No current finding matches {selector!r}. Run `plainmarker check` to see the exact "
+                  "FILE:LINE to use. Nothing acknowledged.")
+        else:
+            cands = sorted({finding_label(r) for r in rows if r["id"] in matches})
+            if len(cands) == 1:                       # N indistinguishable findings at one spot (same type)
+                print(f"{selector!r} matches {len(matches)} findings at the same spot ({cands[0]}) that "
+                      "cannot be told apart by selector — fix them, or use `plainmarker accept` to accept all "
+                      "current findings. Nothing acknowledged.")
+            else:
+                print(f"{selector!r} matches {len(matches)} findings — add the type/rule to pick one "
+                      f"(e.g. {cands[0]!r}). Candidates:")
+                for c in cands:
+                    print(f"  - {c}")
+                print("Nothing acknowledged.")
+        return 1
+    rid = next(iter(matches))
+    backing = [r for r in rows if r["id"] == rid]
+    if len(backing) > 1:                              # one content-bound id can back N byte-identical lines
+        others = sorted({finding_label(r) for r in backing if finding_label(r) != selector})
+        print(f"{selector!r} has the same content as {len(backing) - 1} other finding(s) "
+              f"({', '.join(others) or 'elsewhere'}), so acknowledging it would silence those too — and a "
+              "future identical line would inherit your reason. Fix them, or use `plainmarker accept` to accept "
+              "all. Nothing acknowledged.")
+        return 1
+    row = backing[0]
+    if not row.get("content_bound", True):           # a fallback (line+type) id would hide a ROTATED value
+        print(f"plainmarker could not fingerprint the content of {finding_label(row)}, so it can't safely "
+              "remember this one — a changed value would silently ride the acknowledgement. Fix it instead. "
+              "Nothing acknowledged.")
+        return 1
+    reason = _clean_reason(reason)
+    store = load_store(path)
+    store["accepted"] = sorted(set(store["accepted"]) | {rid})
+    store["reasons"][rid] = {"reason": reason, "recorded": recorded or _today()}
+    save_store(path, store)
+    if as_json:
+        print(json.dumps({"acknowledged": finding_label(row), "reason": reason}, indent=2))
+        return 0
+    print(f"Acknowledged {finding_label(row)} — \"{reason}\".")
+    print("Future `plainmarker check` keeps this one quiet (and shows your reason) until the code at that spot "
+          "changes. A noise filter, NOT a safety guarantee — `plainmarker baseline` still reports it.")
+    return 0
+def run_accept_cli(project_path: str, recorded: str = "", as_json: bool = False,
+                   only: str | None = None, reason: str = "") -> int:
+    """`plainmarker accept <path>`: re-scan (secrets + code-vulns, local) and record the findings as reviewed,
+    printing exactly what is accepted (informed consent). With `--only <selector> --reason "..."`,
+    acknowledge a SINGLE finding and record why (fail-closed on an ambiguous/absent selector)."""
+    path = Path(project_path).expanduser().resolve()
+    if not path.is_dir():
+        print(f"Not a folder: {path}")
+        return 1
+    from keeper_core.baseline import _protect_keeper_dir
+    _protect_keeper_dir(path)
+    rows, unscanned = _scan_rows(path)
+    if only is not None:
+        return _acknowledge_one(path, rows, only, reason, recorded, as_json)
+    ids = {r["id"] for r in rows}
+    # A check that could NOT be scanned this run (timed out / unavailable) must not silently wipe its prior
+    # acknowledgements + reasons — preserve them (ids are "<check>\0...") so a transient scanner failure
+    # never destroys the owner's recorded decisions. (Disclosed below.)
+    preserved: set[str] = set()
+    if unscanned:
+        pref = tuple(f"{c}\0" for c in unscanned)
+        preserved = {i for i in load_store(path)["accepted"] if i.startswith(pref)}
+    by_check = {"secrets": sum(1 for i in ids if i.startswith("secrets\0")),
+                "code_vulnerabilities": sum(1 for i in ids if i.startswith("code_vulnerabilities\0"))}
+    save_accepted(path, ids | preserved, recorded)   # MERGE: preserves reasons for surviving ids; GCs the rest
+    if as_json:
+        print(json.dumps({"accepted": len(ids), "by_check": by_check, "unscanned": unscanned,
+                          "preserved_unscanned": len(preserved)}, indent=2))
+        return 0
+    parts = ", ".join(f"{n} {'secret' if c == 'secrets' else 'code-vuln'}(s)"
+                      for c, n in by_check.items() if n)
+    print(f"Accepted {len(ids)} finding(s)" + (f" ({parts})" if parts else "") + ".")
+    for c in unscanned:
+        print(f"⚠ {c}: could not scan (timed out / unavailable) — kept your earlier acknowledgements for it.")
+    print("Future `plainmarker check` will flag only NEW findings. This is a noise filter, NOT a safety "
+          "guarantee — `plainmarker baseline` always scans everything.")
+    return 0