plainmarker 0.49.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. keeper_core/__init__.py +12 -0
  2. keeper_core/__main__.py +11 -0
  3. keeper_core/accept_baseline.py +327 -0
  4. keeper_core/auditor.py +565 -0
  5. keeper_core/baseline.py +125 -0
  6. keeper_core/calibrate.py +150 -0
  7. keeper_core/calibrate_auditor.py +320 -0
  8. keeper_core/checks.py +995 -0
  9. keeper_core/cli.py +267 -0
  10. keeper_core/config.py +188 -0
  11. keeper_core/doctor.py +80 -0
  12. keeper_core/interrogation.py +210 -0
  13. keeper_core/models.py +155 -0
  14. keeper_core/onboarding/__init__.py +6 -0
  15. keeper_core/onboarding/builtin_provider.py +311 -0
  16. keeper_core/onboarding/onboard.py +136 -0
  17. keeper_core/onboarding/provider.py +58 -0
  18. keeper_core/onboarding/risk.py +61 -0
  19. keeper_core/onboarding/summary.py +52 -0
  20. keeper_core/ranking.py +144 -0
  21. keeper_core/redact.py +66 -0
  22. keeper_core/report.py +704 -0
  23. keeper_core/sast_rules/javascript.yaml +43 -0
  24. keeper_core/sast_rules/python.yaml +71 -0
  25. keeper_core/seatbelt.py +270 -0
  26. keeper_core/session_verify.py +975 -0
  27. keeper_core/shell_audit.py +1058 -0
  28. keeper_core/templates/keeper/config.local-ollama.yaml +36 -0
  29. keeper_core/templates/keeper/config.openrouter-deepseek.yaml +55 -0
  30. keeper_core/templates/keeper/config.openrouter-free.yaml +58 -0
  31. keeper_core/templates/keeper/config.yaml +30 -0
  32. keeper_core/templates/keeper/decision-ledger.md +24 -0
  33. keeper_core/templates/keeper/failure-library.md +18 -0
  34. keeper_core/templates/keeper/hard-truths.yaml +18 -0
  35. keeper_core/templates/keeper/project-state.md +28 -0
  36. keeper_core/witness.py +128 -0
  37. keeper_core/witness_store.py +360 -0
  38. plainmarker-0.49.0.dist-info/METADATA +147 -0
  39. plainmarker-0.49.0.dist-info/RECORD +43 -0
  40. plainmarker-0.49.0.dist-info/WHEEL +5 -0
  41. plainmarker-0.49.0.dist-info/entry_points.txt +2 -0
  42. plainmarker-0.49.0.dist-info/licenses/LICENSE +21 -0
  43. plainmarker-0.49.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,12 @@
1
+ """plainmarker Core — the standalone engine.
2
+
3
+ All of plainmarker's real logic lives here. This package assumes NOTHING about Claude
4
+ Code (or any other host) and is designed to run on its own. Host-specific surfaces
5
+ (the Claude Code plugin, a future standalone CLI service, etc.) live under
6
+ ``adapters/`` and only call into this package.
7
+
8
+ Milestone 0, Step 1: this is the skeleton. The feature modules described in
9
+ ``docs/ARCHITECTURE.md`` are added one per later build-kit step.
10
+ """
11
+
12
+ __version__ = "0.49.0"
@@ -0,0 +1,11 @@
1
+ """Run plainmarker Core as a module: ``python -m keeper_core ...``.
2
+
3
+ This is the invocation path the Claude Code plugin's MCP server will use later
4
+ (Step 5): it launches Core as a subprocess instead of importing it, so the plugin
5
+ never has to carry a copy of Core's logic.
6
+ """
7
+
8
+ from keeper_core.cli import main
9
+
10
+ if __name__ == "__main__":
11
+ raise SystemExit(main())
@@ -0,0 +1,327 @@
1
+ """Accept-baseline (D-079) + per-finding acknowledge (D-082): the OWNER's noise filter for `plainmarker check`.
2
+
3
+ `plainmarker accept <path>` records the current findings' content-bound IDs to `.keeper/accepted.json`;
4
+ `plainmarker check` then headlines only NEW findings (IDs not in the store) and quietly counts the accepted
5
+ ones. `plainmarker accept <path> --only <selector> --reason "..."` acknowledges ONE finding and records WHY,
6
+ so `plainmarker check` keeps just that one quiet and shows the reason (the decision is remembered) until the
7
+ code at that spot changes — the content-bound id then changes and the finding re-surfaces on its own.
8
+
9
+ NOT A SECURITY BOUNDARY. `plainmarker check` is the ADVISORY narrator and is fully silenceable by an agent
10
+ with file-write (this store, `.keeperignore`, or editing `.keeper/`). The SIGNED GATE (`plainmarker baseline`
11
+ /`sign`/`audit`) IGNORES this store and reports everything — it is the only trust boundary. Therefore this
12
+ module is imported ONLY by the keeper-check path; `baseline.py`/`auditor.py`/`witness_store.py` MUST NOT
13
+ import it (enforced mechanically by tests/test_accept_baseline.py::test_gate_modules_do_not_import_accept).
14
+
15
+ Content-bound IDs (a rotated secret / changed source line gets a new id and re-surfaces) are an
16
+ honest-user RE-REVIEW aid, NOT an adversary defense.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import hashlib
22
+ import json
23
+ import re
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+
27
+ _STORE = "accepted.json"
28
+ _SCHEMA = 2 # 1 = {schema,recorded,accepted}; 2 adds {reasons:{id:{...}}}
29
+
30
+ # Only these two checks are acceptable in v1. Shell HARD findings (download-and-run / exfil) are
31
+ # deliberately NEVER acceptable, and shell has no receipt + a 20-cap, so it is excluded entirely.
32
+ _ACCEPTABLE = ("secrets", "code_vulnerabilities")
33
+
34
+ # A selector is FILE:LINE[:TAG]. The file part has NO colon (receipt paths are relative posix paths), so
35
+ # the first ':' ends the file and the optional TAG (a detect-secrets type / semgrep rule) may itself
36
+ # contain ':'. Resolution is fail-CLOSED at the call site (exactly one id, or acknowledge nothing).
37
+ _SELECTOR_RE = re.compile(r"^(?P<file>[^:]+):(?P<line>\d+)(?::(?P<tag>.+))?$")
38
+
39
+
40
+ def _accept_path(project_path) -> Path:
41
+ return Path(project_path) / ".keeper" / _STORE
42
+
43
+
44
+ def _today() -> str:
45
+ return datetime.now(timezone.utc).date().isoformat()
46
+
47
+
48
+ def _clean_reason(s) -> str:
49
+ """Neutralize a stored reason for display: drop control chars / newlines / ANSI (an agent-writable
50
+ store must not be able to inject plainmarker-looking lines into the report) and cap the length. Applied
51
+ BOTH when storing (fresh) and when loading (a forged store), so neither path can forge output."""
52
+ return "".join(ch for ch in str(s) if ch.isprintable()).strip()[:200]
53
+
54
+
55
+ def _line_at(path: Path, line) -> str:
56
+ """The stripped source line at 1-based `line`, or "" if unreadable/out of range (fail-safe)."""
57
+ if not isinstance(line, int) or line < 1:
58
+ return ""
59
+ try:
60
+ with path.open(encoding="utf-8", errors="replace") as fh:
61
+ for i, text in enumerate(fh, 1):
62
+ if i == line:
63
+ return text.strip()
64
+ except OSError:
65
+ return ""
66
+ return ""
67
+
68
+
69
+ def _secret_rows(receipt: dict) -> list[dict]:
70
+ # ONE row per REAL (placeholder-demoted) finding, joined to raw.results for the unsalted hashed_secret
71
+ # — so a placeholder never mints an id and a rotated value re-surfaces. This is the single source of
72
+ # truth for secret ids (finding_ids derives from it).
73
+ raw = ((receipt.get("raw") or {}).get("results")) or {}
74
+ rows: list[dict] = []
75
+ consumed: dict[str, set[int]] = {} # file -> raw indices already mapped, so N distinct secrets on the
76
+ for f in receipt.get("findings") or []: # SAME (line,type) map to N distinct hashes (not all to the first)
77
+ file, line, typ = f.get("file"), f.get("line"), f.get("type")
78
+ used = consumed.setdefault(file, set())
79
+ hashed = None
80
+ for i, entry in enumerate(raw.get(file, [])):
81
+ if i in used or entry.get("line_number") != line or entry.get("type") != typ:
82
+ continue
83
+ used.add(i)
84
+ hashed = entry.get("hashed_secret")
85
+ break
86
+ rid = (f"secrets\0{file}\0{hashed}" if hashed # fallback so a finding never VANISHES from the count
87
+ else f"secrets\0{file}\0L{line}\0{typ}")
88
+ rows.append({"id": rid, "file": file, "line": line, "type": typ,
89
+ "content_bound": hashed is not None}) # fallback id is line+type bound (NOT acknowledgeable)
90
+ return rows
91
+
92
+
93
+ def _sast_rows(receipt: dict, root: Path) -> list[dict]:
94
+ # ID off (file, rule, hash-of-source-line-CONTENT) — NOT the line number — so a line shift keeps the
95
+ # acceptance but an in-place edit at that location re-surfaces it for re-review.
96
+ rows: list[dict] = []
97
+ for f in receipt.get("findings") or []:
98
+ file, rule, line = f.get("file"), f.get("rule"), f.get("line")
99
+ content_bound = bool(file) and rule is not None
100
+ if not content_bound: # malformed (never from real semgrep) -> fallback so none vanishes
101
+ rid = f"code_vulnerabilities\0{file or '?'}\0{rule}\0L{line}"
102
+ else:
103
+ line_text = _line_at(root / file, line)
104
+ digest = hashlib.sha256(line_text.encode("utf-8", "replace")).hexdigest()
105
+ rid = f"code_vulnerabilities\0{file}\0{rule}\0{digest}"
106
+ if not line_text: # unreadable/empty line -> sha256("") is NOT value-binding -> refuse
107
+ content_bound = False
108
+ rows.append({"id": rid, "file": file, "rule": rule, "line": line, "content_bound": content_bound})
109
+ return rows
110
+
111
+
112
+ def finding_rows(check_result, root) -> list[dict]:
113
+ """Per-finding rows for a fail CheckResult (secrets + code_vulnerabilities only): each is
114
+ {id, file, line, type|rule}. Same fail-closed contract as finding_ids (a non-fail / unknown /
115
+ receiptless / non-acceptable check yields [])."""
116
+ if (not check_result or check_result.status != "fail"
117
+ or check_result.check not in _ACCEPTABLE or not check_result.receipt_path):
118
+ return []
119
+ try:
120
+ receipt = json.loads(Path(check_result.receipt_path).read_text(encoding="utf-8"))
121
+ except (OSError, ValueError):
122
+ return []
123
+ return _secret_rows(receipt) if check_result.check == "secrets" else _sast_rows(receipt, Path(root))
124
+
125
+
126
+ def finding_ids(check_result, root) -> set[str]:
127
+ """Content-bound IDs for a fail CheckResult's findings (secrets + code_vulnerabilities only).
128
+
129
+ A non-fail / unknown / receiptless check yields an EMPTY set, so it can never be marked "all
130
+ accepted". Derives from finding_rows so the id logic has one home."""
131
+ return {r["id"] for r in finding_rows(check_result, root)}
132
+
133
+
134
+ def finding_label(row: dict) -> str:
135
+ """The human-readable label for one finding row: FILE:LINE (TAG). SANITIZED — a receipt file path or
136
+ type string can carry a newline / ANSI (an adversarial agent can name a file `evil\\nkeeper ...`), so
137
+ this is the single chokepoint that neutralizes label injection into the report (the twin of the reason
138
+ sanitizer). Every render site routes a finding's display through here."""
139
+ tag = row.get("type") or row.get("rule")
140
+ return _clean_reason(f"{row.get('file')}:{row.get('line')}" + (f" ({tag})" if tag else ""))
141
+
142
+
143
+ def resolve_selector(rows: list[dict], selector: str) -> set[str]:
144
+ """The set of finding ids a FILE:LINE[:TAG] selector matches. The CALLER fail-CLOSES on len != 1 —
145
+ a 0/ambiguous match must acknowledge nothing, never the wrong finding or all of them."""
146
+ m = _SELECTOR_RE.match((selector or "").strip())
147
+ if not m:
148
+ return set()
149
+ file, line, tag = m.group("file"), int(m.group("line")), m.group("tag")
150
+ out: set[str] = set()
151
+ for r in rows:
152
+ if r.get("file") != file or r.get("line") != line:
153
+ continue
154
+ if tag is not None and (r.get("type") or r.get("rule")) != tag:
155
+ continue
156
+ out.add(r["id"])
157
+ return out
158
+
159
+
160
+ def load_store(project_path) -> dict:
161
+ """The full accept store, sanitized + fail-OPEN. Returns {schema, recorded, accepted:list,
162
+ reasons:{id:{reason,recorded}}}; any problem (missing/malformed/forged) degrades to an empty store
163
+ so a broken store never HIDES a finding. Schema-1 stores (no `reasons`) load as reasons={}."""
164
+ empty = {"schema": _SCHEMA, "recorded": "", "accepted": [], "reasons": {}}
165
+ try:
166
+ obj = json.loads(_accept_path(project_path).read_text(encoding="utf-8"))
167
+ except (OSError, ValueError):
168
+ return empty
169
+ if not isinstance(obj, dict):
170
+ return empty
171
+ accepted = [x for x in (obj.get("accepted") or []) if isinstance(x, str)]
172
+ reasons: dict = {}
173
+ src = obj.get("reasons")
174
+ if isinstance(src, dict):
175
+ for k, v in src.items():
176
+ if isinstance(k, str) and isinstance(v, dict) and isinstance(v.get("reason"), str):
177
+ rec = v.get("recorded")
178
+ reasons[k] = {"reason": _clean_reason(v["reason"]), # forged store can't inject output
179
+ "recorded": _clean_reason(rec) if isinstance(rec, str) else ""}
180
+ recorded = obj.get("recorded")
181
+ return {"schema": _SCHEMA, "recorded": recorded if isinstance(recorded, str) else "",
182
+ "accepted": accepted, "reasons": reasons}
183
+
184
+
185
+ def save_store(project_path, store: dict) -> Path:
186
+ """Write the store as schema-2, deterministically. GC: a reason is kept ONLY if its id is still in
187
+ `accepted` (so an orphaned reason — its finding rotated/edited away — never lingers)."""
188
+ accepted = sorted(set(store.get("accepted", [])))
189
+ keep = set(accepted)
190
+ reasons = {k: v for k, v in (store.get("reasons") or {}).items() if k in keep}
191
+ p = _accept_path(project_path)
192
+ p.parent.mkdir(parents=True, exist_ok=True)
193
+ p.write_text(json.dumps({"schema": _SCHEMA, "recorded": store.get("recorded", ""),
194
+ "accepted": accepted, "reasons": reasons}, indent=2) + "\n", encoding="utf-8")
195
+ return p
196
+
197
+
198
+ def load_accepted(project_path) -> set[str]:
199
+ """The accepted-finding IDs, or an EMPTY set on any problem (fail-OPEN). Unchanged API."""
200
+ return set(load_store(project_path)["accepted"])
201
+
202
+
203
+ def load_reasons(project_path) -> dict:
204
+ """{id: {reason, recorded}} for findings the owner acknowledged with a reason. Fail-open ({})."""
205
+ return load_store(project_path)["reasons"]
206
+
207
+
208
+ def save_accepted(project_path, ids, recorded: str = "") -> Path:
209
+ """Record `ids` as the accepted set, MERGING into the existing store so a per-finding reason is never
210
+ clobbered (a surviving id keeps its reason; a vanished id's reason is GC'd by save_store)."""
211
+ store = load_store(project_path)
212
+ store["accepted"] = sorted(set(ids))
213
+ store["recorded"] = recorded
214
+ return save_store(project_path, store)
215
+
216
+
217
+ def _scan_rows(path: Path) -> tuple[list[dict], list[str]]:
218
+ """Scan secrets + sast with the SAME exclude set as plainmarker check (.keeperignore) and return
219
+ (all finding rows, names of checks that could not be scanned)."""
220
+ from keeper_core.checks import sast_check, secrets_check
221
+ from keeper_core.onboarding.builtin_provider import project_excludes
222
+ evidence = path / ".keeper" / "evidence"
223
+ excludes = project_excludes(path) # MUST match plainmarker check's scan set, else the baseline
224
+ rows: list[dict] = [] # records findings `plainmarker check` never sees (mismatch)
225
+ unscanned: list[str] = []
226
+ for check in (secrets_check, sast_check):
227
+ r = check(path, evidence, excludes)
228
+ if r.status == "unknown": # a timed-out / unavailable scanner accepted NOTHING — say so
229
+ unscanned.append(r.check)
230
+ rows.extend(finding_rows(r, path))
231
+ return rows, unscanned
232
+
233
+
234
+ def _acknowledge_one(path: Path, rows: list[dict], selector: str, reason: str,
235
+ recorded: str, as_json: bool) -> int:
236
+ """Acknowledge exactly ONE finding (fail-CLOSED): refuse on no reason, no match, or an ambiguous
237
+ selector. Records the reason and echoes what was acknowledged (informed consent)."""
238
+ if not reason.strip():
239
+ print("--only needs --reason \"why this is OK\" — the reason is recorded and shown on later "
240
+ "`plainmarker check`. Nothing acknowledged.")
241
+ return 1
242
+ matches = resolve_selector(rows, selector)
243
+ if len(matches) != 1:
244
+ if not matches:
245
+ print(f"No current finding matches {selector!r}. Run `plainmarker check` to see the exact "
246
+ "FILE:LINE to use. Nothing acknowledged.")
247
+ else:
248
+ cands = sorted({finding_label(r) for r in rows if r["id"] in matches})
249
+ if len(cands) == 1: # N indistinguishable findings at one spot (same type)
250
+ print(f"{selector!r} matches {len(matches)} findings at the same spot ({cands[0]}) that "
251
+ "cannot be told apart by selector — fix them, or use `plainmarker accept` to accept all "
252
+ "current findings. Nothing acknowledged.")
253
+ else:
254
+ print(f"{selector!r} matches {len(matches)} findings — add the type/rule to pick one "
255
+ f"(e.g. {cands[0]!r}). Candidates:")
256
+ for c in cands:
257
+ print(f" - {c}")
258
+ print("Nothing acknowledged.")
259
+ return 1
260
+ rid = next(iter(matches))
261
+ backing = [r for r in rows if r["id"] == rid]
262
+ if len(backing) > 1: # one content-bound id can back N byte-identical lines
263
+ others = sorted({finding_label(r) for r in backing if finding_label(r) != selector})
264
+ print(f"{selector!r} has the same content as {len(backing) - 1} other finding(s) "
265
+ f"({', '.join(others) or 'elsewhere'}), so acknowledging it would silence those too — and a "
266
+ "future identical line would inherit your reason. Fix them, or use `plainmarker accept` to accept "
267
+ "all. Nothing acknowledged.")
268
+ return 1
269
+ row = backing[0]
270
+ if not row.get("content_bound", True): # a fallback (line+type) id would hide a ROTATED value
271
+ print(f"plainmarker could not fingerprint the content of {finding_label(row)}, so it can't safely "
272
+ "remember this one — a changed value would silently ride the acknowledgement. Fix it instead. "
273
+ "Nothing acknowledged.")
274
+ return 1
275
+ reason = _clean_reason(reason)
276
+ store = load_store(path)
277
+ store["accepted"] = sorted(set(store["accepted"]) | {rid})
278
+ store["reasons"][rid] = {"reason": reason, "recorded": recorded or _today()}
279
+ save_store(path, store)
280
+ if as_json:
281
+ print(json.dumps({"acknowledged": finding_label(row), "reason": reason}, indent=2))
282
+ return 0
283
+ print(f"Acknowledged {finding_label(row)} — \"{reason}\".")
284
+ print("Future `plainmarker check` keeps this one quiet (and shows your reason) until the code at that spot "
285
+ "changes. A noise filter, NOT a safety guarantee — `plainmarker baseline` still reports it.")
286
+ return 0
287
+
288
+
289
+ def run_accept_cli(project_path: str, recorded: str = "", as_json: bool = False,
290
+ only: str | None = None, reason: str = "") -> int:
291
+ """`plainmarker accept <path>`: re-scan (secrets + code-vulns, local) and record the findings as reviewed,
292
+ printing exactly what is accepted (informed consent). With `--only <selector> --reason "..."`,
293
+ acknowledge a SINGLE finding and record why (fail-closed on an ambiguous/absent selector)."""
294
+ path = Path(project_path).expanduser().resolve()
295
+ if not path.is_dir():
296
+ print(f"Not a folder: {path}")
297
+ return 1
298
+ from keeper_core.baseline import _protect_keeper_dir
299
+ _protect_keeper_dir(path)
300
+ rows, unscanned = _scan_rows(path)
301
+
302
+ if only is not None:
303
+ return _acknowledge_one(path, rows, only, reason, recorded, as_json)
304
+
305
+ ids = {r["id"] for r in rows}
306
+ # A check that could NOT be scanned this run (timed out / unavailable) must not silently wipe its prior
307
+ # acknowledgements + reasons — preserve them (ids are "<check>\0...") so a transient scanner failure
308
+ # never destroys the owner's recorded decisions. (Disclosed below.)
309
+ preserved: set[str] = set()
310
+ if unscanned:
311
+ pref = tuple(f"{c}\0" for c in unscanned)
312
+ preserved = {i for i in load_store(path)["accepted"] if i.startswith(pref)}
313
+ by_check = {"secrets": sum(1 for i in ids if i.startswith("secrets\0")),
314
+ "code_vulnerabilities": sum(1 for i in ids if i.startswith("code_vulnerabilities\0"))}
315
+ save_accepted(path, ids | preserved, recorded) # MERGE: preserves reasons for surviving ids; GCs the rest
316
+ if as_json:
317
+ print(json.dumps({"accepted": len(ids), "by_check": by_check, "unscanned": unscanned,
318
+ "preserved_unscanned": len(preserved)}, indent=2))
319
+ return 0
320
+ parts = ", ".join(f"{n} {'secret' if c == 'secrets' else 'code-vuln'}(s)"
321
+ for c, n in by_check.items() if n)
322
+ print(f"Accepted {len(ids)} finding(s)" + (f" ({parts})" if parts else "") + ".")
323
+ for c in unscanned:
324
+ print(f"⚠ {c}: could not scan (timed out / unavailable) — kept your earlier acknowledgements for it.")
325
+ print("Future `plainmarker check` will flag only NEW findings. This is a noise filter, NOT a safety "
326
+ "guarantee — `plainmarker baseline` always scans everything.")
327
+ return 0