problem-frame-gate 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,210 @@
1
+ """Finite proof-carrying records from the audit calculus."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from typing import Any
7
+
8
+ from .digest import digest_json
9
+ from .model import Envelope, Horizon
10
+ from .result import CheckBuilder, CheckResult
11
+ from .verifier import EnvelopeVerifier, canonical_order, digest_log
12
+
13
+
14
+ @dataclass(frozen=True, slots=True)
15
+ class SwapCover:
16
+ """Finite set of adjacent pairs certified as component-preserving."""
17
+
18
+ independent_pairs: tuple[tuple[str, str], ...] = ()
19
+ component_equalities: tuple[str, ...] = ()
20
+
21
+ def permits(self, left: str, right: str) -> bool:
22
+ return (left, right) in self.independent_pairs or (right, left) in self.independent_pairs
23
+
24
+ def to_json(self) -> dict[str, Any]:
25
+ return {
26
+ "independent_pairs": [list(pair) for pair in self.independent_pairs],
27
+ "component_equalities": list(self.component_equalities),
28
+ }
29
+
30
+
31
+ @dataclass(frozen=True, slots=True)
32
+ class ReplayCertificate:
33
+ """Certificate that a non-canonical replay word reaches the canonical word."""
34
+
35
+ word: tuple[str, ...]
36
+ swaps: tuple[tuple[int, str, str], ...]
37
+ cover: SwapCover
38
+ target_digest: str
39
+
40
+ def to_json(self) -> dict[str, Any]:
41
+ return {
42
+ "word": list(self.word),
43
+ "swaps": [[index, left, right] for index, left, right in self.swaps],
44
+ "cover": self.cover.to_json(),
45
+ "target_digest": self.target_digest,
46
+ }
47
+
48
+
49
+ def check_replay_certificate(
50
+ horizon: Horizon, envelopes: tuple[Envelope, ...], certificate: ReplayCertificate
51
+ ) -> CheckResult:
52
+ builder = CheckBuilder(footprint={"TraceChecker", "FoldKernel", "EnvelopeVerifier"})
53
+ legal = EnvelopeVerifier().verify(horizon, envelopes)
54
+ if not legal.ok:
55
+ return legal.merge(builder.result())
56
+
57
+ id_set = {env.eid for env in envelopes}
58
+ if set(certificate.word) != id_set or len(certificate.word) != len(id_set):
59
+ builder.error("replay-word", "replay word must be a permutation of the log envelope ids")
60
+ return builder.result(digest=digest_log(envelopes), transcript_digest=digest_json(certificate.to_json()))
61
+ if certificate.target_digest != digest_log(envelopes):
62
+ builder.error("replay-target-digest", "replay certificate target digest does not match the log")
63
+
64
+ current = list(certificate.word)
65
+ for step, (index, left, right) in enumerate(certificate.swaps):
66
+ if index < 0 or index + 1 >= len(current):
67
+ builder.error("replay-swap-index", "swap index is outside the replay word", details={"step": step})
68
+ continue
69
+ if current[index] != left or current[index + 1] != right:
70
+ builder.error(
71
+ "replay-swap-pair",
72
+ "swap row does not match the current adjacent pair",
73
+ details={"step": step, "expected": [current[index], current[index + 1]], "actual": [left, right]},
74
+ )
75
+ continue
76
+ if not certificate.cover.permits(left, right):
77
+ builder.error(
78
+ "replay-swap-cover",
79
+ "adjacent swap is not covered by an independence certificate",
80
+ details={"step": step, "pair": [left, right]},
81
+ )
82
+ current[index], current[index + 1] = current[index + 1], current[index]
83
+
84
+ canonical = [env.eid for env in canonical_order(horizon, envelopes)]
85
+ if current != canonical:
86
+ builder.error(
87
+ "replay-not-canonical",
88
+ "certified swap trace does not reach the canonical replay word",
89
+ details={"actual": current, "canonical": canonical},
90
+ )
91
+ return builder.result(digest=digest_log(envelopes), transcript_digest=digest_json(certificate.to_json()))
92
+
93
+
94
+ @dataclass(frozen=True, slots=True)
95
+ class SourceCut:
96
+ """Finite consistent-cut record for a source prefix."""
97
+
98
+ cut_id: str
99
+ source_time: int
100
+ included_eids: tuple[str, ...]
101
+ excluded_frontier_eids: tuple[str, ...]
102
+ digest: str
103
+ clock_rows: tuple[str, ...] = ()
104
+ watermark_rows: tuple[str, ...] = ()
105
+
106
+ def to_json(self) -> dict[str, Any]:
107
+ return {
108
+ "cut_id": self.cut_id,
109
+ "source_time": self.source_time,
110
+ "included_eids": list(self.included_eids),
111
+ "excluded_frontier_eids": list(self.excluded_frontier_eids),
112
+ "digest": self.digest,
113
+ "clock_rows": list(self.clock_rows),
114
+ "watermark_rows": list(self.watermark_rows),
115
+ }
116
+
117
+
118
+ def check_source_cut(horizon: Horizon, envelopes: tuple[Envelope, ...], cut: SourceCut) -> CheckResult:
119
+ builder = CheckBuilder(footprint={"ClockWatermark", "EnvelopeVerifier"})
120
+ legal = EnvelopeVerifier().verify(horizon, envelopes)
121
+ if not legal.ok:
122
+ return legal.merge(builder.result())
123
+
124
+ by_id = {env.eid: env for env in envelopes}
125
+ expected_included = tuple(sorted(env.eid for env in envelopes if env.commit_time <= cut.source_time))
126
+ expected_frontier = tuple(sorted(env.eid for env in envelopes if env.commit_time > cut.source_time))
127
+ if tuple(sorted(cut.included_eids)) != expected_included:
128
+ builder.error(
129
+ "source-cut-included",
130
+ "source cut included set does not match commit-time prefix",
131
+ details={"expected": list(expected_included), "actual": sorted(cut.included_eids)},
132
+ )
133
+ if tuple(sorted(cut.excluded_frontier_eids)) != expected_frontier:
134
+ builder.error(
135
+ "source-cut-frontier",
136
+ "source cut frontier must list all later envelopes in the finite log",
137
+ details={"expected": list(expected_frontier), "actual": sorted(cut.excluded_frontier_eids)},
138
+ )
139
+
140
+ included = tuple(by_id[eid] for eid in cut.included_eids if eid in by_id)
141
+ if cut.digest != digest_log(included):
142
+ builder.error("source-cut-digest", "source cut digest does not match included envelopes")
143
+
144
+ included_ids = set(cut.included_eids)
145
+ for env in included:
146
+ for dep in env.dependencies:
147
+ if dep.eid is not None and dep.eid not in included_ids:
148
+ builder.error(
149
+ "source-cut-dependency",
150
+ "source cut is not closed under dependencies",
151
+ location=env.eid,
152
+ details=dep.to_json(),
153
+ )
154
+ if env.commit_group:
155
+ group_ids = {row.eid for row in envelopes if row.commit_group == env.commit_group}
156
+ if not group_ids.issubset(included_ids):
157
+ builder.error(
158
+ "source-cut-commit-group",
159
+ "source cut is not closed under commit groups",
160
+ location=env.commit_group,
161
+ )
162
+ return builder.result(digest=cut.digest, transcript_digest=digest_json(cut.to_json()))
163
+
164
+
165
+ @dataclass(frozen=True, slots=True)
166
+ class TransitionRecord:
167
+ source_digest: str
168
+ target_digest: str
169
+ kind: str
170
+ transcript_digest: str
171
+ capacity_class: str = "normal"
172
+
173
+ def to_json(self) -> dict[str, str]:
174
+ return {
175
+ "source_digest": self.source_digest,
176
+ "target_digest": self.target_digest,
177
+ "kind": self.kind,
178
+ "transcript_digest": self.transcript_digest,
179
+ "capacity_class": self.capacity_class,
180
+ }
181
+
182
+
183
+ @dataclass(frozen=True, slots=True)
184
+ class ReachabilityTranscript:
185
+ transitions: tuple[TransitionRecord, ...]
186
+ assumptions: tuple[str, ...] = field(default_factory=tuple)
187
+
188
+ def to_json(self) -> dict[str, Any]:
189
+ return {
190
+ "transitions": [transition.to_json() for transition in self.transitions],
191
+ "assumptions": list(self.assumptions),
192
+ }
193
+
194
+
195
+ def check_reachability(transcript: ReachabilityTranscript) -> CheckResult:
196
+ builder = CheckBuilder(footprint={"ReachTranscript"})
197
+ previous_target: str | None = None
198
+ for index, transition in enumerate(transcript.transitions):
199
+ if not transition.source_digest or not transition.target_digest or not transition.transcript_digest:
200
+ builder.error("reach-record", "transition record must bind source, target, and premise transcript")
201
+ if previous_target is not None and transition.source_digest != previous_target:
202
+ builder.error(
203
+ "reach-chain",
204
+ "transition source digest must match previous target digest",
205
+ details={"index": index, "expected": previous_target, "actual": transition.source_digest},
206
+ )
207
+ previous_target = transition.target_digest
208
+ for assumption in transcript.assumptions:
209
+ builder.add_assumption(assumption)
210
+ return builder.result(transcript_digest=digest_json(transcript.to_json()))
@@ -0,0 +1,148 @@
1
+ """Small result objects shared by all checkers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Mapping
6
+ from dataclasses import dataclass, field, replace
7
+ from typing import Any, Literal
8
+
9
+ Severity = Literal["error", "warning"]
10
+
11
+
12
+ @dataclass(frozen=True, slots=True)
13
+ class Issue:
14
+ """One checker finding."""
15
+
16
+ code: str
17
+ message: str
18
+ location: str = ""
19
+ severity: Severity = "error"
20
+ details: Mapping[str, Any] = field(default_factory=dict)
21
+
22
+ def to_json(self) -> dict[str, Any]:
23
+ data: dict[str, Any] = {
24
+ "code": self.code,
25
+ "message": self.message,
26
+ "severity": self.severity,
27
+ }
28
+ if self.location:
29
+ data["location"] = self.location
30
+ if self.details:
31
+ data["details"] = dict(self.details)
32
+ return data
33
+
34
+
35
+ @dataclass(frozen=True, slots=True)
36
+ class CheckResult:
37
+ """A finite checker result with an explicit trusted-footprint label set."""
38
+
39
+ ok: bool
40
+ issues: tuple[Issue, ...] = ()
41
+ footprint: frozenset[str] = frozenset()
42
+ digest: str | None = None
43
+ transcript_digest: str | None = None
44
+ assumptions: tuple[str, ...] = ()
45
+
46
+ def __bool__(self) -> bool:
47
+ return self.ok
48
+
49
+ @classmethod
50
+ def success(
51
+ cls,
52
+ *,
53
+ footprint: set[str] | frozenset[str] | None = None,
54
+ digest: str | None = None,
55
+ transcript_digest: str | None = None,
56
+ assumptions: tuple[str, ...] = (),
57
+ ) -> CheckResult:
58
+ return cls(True, (), frozenset(footprint or ()), digest, transcript_digest, assumptions)
59
+
60
+ @classmethod
61
+ def fail(
62
+ cls,
63
+ *issues: Issue,
64
+ footprint: set[str] | frozenset[str] | None = None,
65
+ digest: str | None = None,
66
+ transcript_digest: str | None = None,
67
+ assumptions: tuple[str, ...] = (),
68
+ ) -> CheckResult:
69
+ return cls(False, tuple(issues), frozenset(footprint or ()), digest, transcript_digest, assumptions)
70
+
71
+ def with_digest(self, digest: str) -> CheckResult:
72
+ return replace(self, digest=digest)
73
+
74
+ def with_transcript_digest(self, transcript_digest: str) -> CheckResult:
75
+ return replace(self, transcript_digest=transcript_digest)
76
+
77
+ def merge(self, *others: CheckResult) -> CheckResult:
78
+ ok = self.ok and all(other.ok for other in others)
79
+ issues = self.issues + tuple(issue for other in others for issue in other.issues)
80
+ footprint = set(self.footprint)
81
+ assumptions = list(self.assumptions)
82
+ for other in others:
83
+ footprint.update(other.footprint)
84
+ assumptions.extend(item for item in other.assumptions if item not in assumptions)
85
+ transcript_digest = self.transcript_digest
86
+ if transcript_digest is None:
87
+ transcript_digest = next((other.transcript_digest for other in others if other.transcript_digest), None)
88
+ return CheckResult(ok, issues, frozenset(footprint), self.digest, transcript_digest, tuple(assumptions))
89
+
90
+ def to_json(self) -> dict[str, Any]:
91
+ data: dict[str, Any] = {
92
+ "ok": self.ok,
93
+ "issues": [issue.to_json() for issue in self.issues],
94
+ "footprint": sorted(self.footprint),
95
+ }
96
+ if self.digest:
97
+ data["digest"] = self.digest
98
+ if self.transcript_digest:
99
+ data["transcript_digest"] = self.transcript_digest
100
+ if self.assumptions:
101
+ data["assumptions"] = list(self.assumptions)
102
+ return data
103
+
104
+
105
+ class CheckBuilder:
106
+ """Mutable helper for deterministic finite checkers."""
107
+
108
+ def __init__(self, *, footprint: set[str] | None = None) -> None:
109
+ self._issues: list[Issue] = []
110
+ self._footprint = set(footprint or ())
111
+ self._assumptions: list[str] = []
112
+
113
+ def error(
114
+ self,
115
+ code: str,
116
+ message: str,
117
+ *,
118
+ location: str = "",
119
+ details: Mapping[str, Any] | None = None,
120
+ ) -> None:
121
+ self._issues.append(Issue(code, message, location=location, severity="error", details=details or {}))
122
+
123
+ def warning(
124
+ self,
125
+ code: str,
126
+ message: str,
127
+ *,
128
+ location: str = "",
129
+ details: Mapping[str, Any] | None = None,
130
+ ) -> None:
131
+ self._issues.append(Issue(code, message, location=location, severity="warning", details=details or {}))
132
+
133
+ def add_footprint(self, *labels: str) -> None:
134
+ self._footprint.update(labels)
135
+
136
+ def add_assumption(self, *labels: str) -> None:
137
+ self._assumptions.extend(label for label in labels if label not in self._assumptions)
138
+
139
+ def result(self, *, digest: str | None = None, transcript_digest: str | None = None) -> CheckResult:
140
+ ok = not any(issue.severity == "error" for issue in self._issues)
141
+ return CheckResult(
142
+ ok,
143
+ tuple(self._issues),
144
+ frozenset(self._footprint),
145
+ digest,
146
+ transcript_digest,
147
+ tuple(self._assumptions),
148
+ )
@@ -0,0 +1,203 @@
1
+ """Finite risk-ledger helpers."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from fractions import Fraction
7
+ from typing import Any
8
+
9
+ from .certificates import check_certificate_live
10
+ from .fold import FoldState
11
+ from .model import Horizon
12
+ from .result import CheckBuilder, CheckResult
13
+
14
+
15
+ @dataclass(frozen=True, slots=True)
16
+ class RiskLedgerSummary:
17
+ total_spend: Fraction
18
+ spend_ids: tuple[str, ...]
19
+
20
+ def to_json(self) -> dict[str, Any]:
21
+ return {
22
+ "total_spend": f"{self.total_spend.numerator}/{self.total_spend.denominator}",
23
+ "spend_ids": list(self.spend_ids),
24
+ }
25
+
26
+
27
+ @dataclass(frozen=True, slots=True)
28
+ class RiskClaimRecord:
29
+ """Finite risk-claim record used by the risk theorem."""
30
+
31
+ claim_id: str
32
+ risk_id: str
33
+ hypothesis_id: str
34
+ mode: str
35
+ cert_id: str
36
+ eta: str
37
+ event_id: str
38
+ standardized_event_id: str
39
+ selection_event_id: str | None = None
40
+ stopping_time_id: str | None = None
41
+ selection_time: int | None = None
42
+ ledger_digest: str | None = None
43
+ route_check: bool = True
44
+ assumption: str = "StatisticalModel"
45
+
46
+ def to_json(self) -> dict[str, Any]:
47
+ return {
48
+ "claim_id": self.claim_id,
49
+ "risk_id": self.risk_id,
50
+ "hypothesis_id": self.hypothesis_id,
51
+ "mode": self.mode,
52
+ "cert_id": self.cert_id,
53
+ "eta": self.eta,
54
+ "event_id": self.event_id,
55
+ "standardized_event_id": self.standardized_event_id,
56
+ "selection_event_id": self.selection_event_id,
57
+ "stopping_time_id": self.stopping_time_id,
58
+ "selection_time": self.selection_time,
59
+ "ledger_digest": self.ledger_digest,
60
+ "route_check": self.route_check,
61
+ "assumption": self.assumption,
62
+ }
63
+
64
+
65
+ def summarize_risk_ledger(state: FoldState) -> RiskLedgerSummary:
66
+ spends: dict[str, dict[str, Any]] = state.component("risk").get("spends", {})
67
+ total = sum((Fraction(str(spend.get("eta", "0"))) for spend in spends.values()), start=Fraction(0))
68
+ return RiskLedgerSummary(total, tuple(sorted(spends)))
69
+
70
+
71
+ def check_risk_ledger(state: FoldState, *, alpha: Fraction | str | None = None) -> CheckResult:
72
+ """Check finite spend accounting and an optional global risk bound."""
73
+
74
+ builder = CheckBuilder(footprint={"RiskLedger", "RiskTranscript"})
75
+ risk = state.component("risk")
76
+ reserves: dict[str, dict[str, Any]] = risk.get("reserves", {})
77
+ spends: dict[str, dict[str, Any]] = risk.get("spends", {})
78
+
79
+ for risk_id, spend in spends.items():
80
+ reserve = reserves.get(risk_id)
81
+ if reserve is None:
82
+ builder.error("risk-spend-without-reserve", "risk spend has no reserve", location=risk_id)
83
+ continue
84
+ if int(reserve.get("reserved_at", 0)) > int(spend.get("spent_at", 0)):
85
+ builder.error("risk-order", "risk spend precedes its reserve", location=risk_id)
86
+ if Fraction(str(spend.get("eta", "0"))) < 0:
87
+ builder.error("negative-risk-spend", "risk spend must be non-negative", location=risk_id)
88
+
89
+ summary = summarize_risk_ledger(state)
90
+ if alpha is not None and summary.total_spend > Fraction(str(alpha)):
91
+ builder.error(
92
+ "risk-bound-exceeded",
93
+ "finite risk spend exceeds the declared bound",
94
+ details={"total": str(summary.total_spend), "alpha": str(alpha)},
95
+ )
96
+ return builder.result()
97
+
98
+
99
+ def check_risk_spend_live(
100
+ state: FoldState,
101
+ *,
102
+ risk_id: str,
103
+ hypothesis_id: str,
104
+ mode: str,
105
+ cert_id: str,
106
+ at_time: int,
107
+ ledger_digest: str | None = None,
108
+ horizon: Horizon | None = None,
109
+ ) -> CheckResult:
110
+ """Before-use check for one risk spend."""
111
+
112
+ builder = CheckBuilder(footprint={"RiskLedger", "RiskTranscript", "ClockWatermark"})
113
+ risk = state.component("risk")
114
+ spend = risk.get("spends", {}).get(risk_id)
115
+ if spend is None:
116
+ builder.error("risk-spend-missing", "risk spend is not installed", location=risk_id)
117
+ return builder.result()
118
+ if spend.get("hypothesis_id") != hypothesis_id:
119
+ builder.error("risk-hypothesis-mismatch", "risk spend cites a different hypothesis", location=risk_id)
120
+ if spend.get("mode") != mode:
121
+ builder.error("risk-mode-mismatch", "risk spend cites a different statistical mode", location=risk_id)
122
+ if horizon is not None and mode not in horizon.risk_modes:
123
+ builder.error("risk-mode-undeclared", "risk mode is not declared by the manifest", location=risk_id)
124
+ if spend.get("cert_id") != cert_id:
125
+ builder.error("risk-cert-mismatch", "risk spend cites a different certificate", location=risk_id)
126
+ if ledger_digest is not None and spend.get("ledger_digest") != ledger_digest:
127
+ builder.error("risk-ledger-mismatch", "risk spend cites a different ledger digest", location=risk_id)
128
+ if int(spend.get("spent_at", 0)) > at_time:
129
+ builder.error("risk-spend-after-use", "risk spend is committed after use", location=risk_id)
130
+ closed_at = spend.get("closed_at")
131
+ if closed_at is not None and int(closed_at) <= at_time:
132
+ builder.error("risk-spend-closed", "risk spend is already closed at use time", location=risk_id)
133
+ return builder.result().merge(check_certificate_live(state, cert_id, at_time, horizon=horizon))
134
+
135
+
136
+ def check_risk_claims(
137
+ state: FoldState,
138
+ records: tuple[RiskClaimRecord, ...],
139
+ *,
140
+ alpha: Fraction | str,
141
+ at_time: int,
142
+ horizon: Horizon,
143
+ ) -> CheckResult:
144
+ """Check finite installed risk claims and their union-bound spend."""
145
+
146
+ builder = CheckBuilder(footprint={"RiskLedger", "RiskTranscript"})
147
+ seen_risks: set[str] = set()
148
+ total = Fraction(0)
149
+ for record in records:
150
+ if record.risk_id in seen_risks:
151
+ builder.error("risk-claim-duplicate", "risk id appears in more than one claim", location=record.risk_id)
152
+ seen_risks.add(record.risk_id)
153
+ total += Fraction(record.eta)
154
+ spend = check_risk_spend_live(
155
+ state,
156
+ risk_id=record.risk_id,
157
+ hypothesis_id=record.hypothesis_id,
158
+ mode=record.mode,
159
+ cert_id=record.cert_id,
160
+ at_time=at_time,
161
+ ledger_digest=record.ledger_digest,
162
+ horizon=horizon,
163
+ )
164
+ if not spend.ok:
165
+ for issue in spend.issues:
166
+ builder.error(issue.code, issue.message, location=issue.location, details=issue.details)
167
+ if record.mode == "fixed" and not record.event_id:
168
+ builder.error("risk-fixed-event", "fixed mode requires a failure event", location=record.claim_id)
169
+ if record.mode == "selectedEvent" and not record.selection_event_id:
170
+ builder.error(
171
+ "risk-selection-event",
172
+ "selected-event mode requires a selection event",
173
+ location=record.claim_id,
174
+ )
175
+ if record.mode == "conditionalSelective" and not record.route_check:
176
+ builder.error(
177
+ "risk-conditional-route",
178
+ "conditional-selective mode requires an accepted route check",
179
+ location=record.claim_id,
180
+ )
181
+ if record.mode == "anytime" and not record.stopping_time_id:
182
+ builder.error(
183
+ "risk-stopping-time",
184
+ "anytime mode requires a certified stopping time",
185
+ location=record.claim_id,
186
+ )
187
+ if record.selection_time is not None:
188
+ reserve = state.component("risk").get("reserves", {}).get(record.risk_id, {})
189
+ if int(reserve.get("reserved_at", at_time + 1)) >= record.selection_time:
190
+ builder.error(
191
+ "risk-post-selection-reserve",
192
+ "risk reserve must precede the selection or stopping decision",
193
+ location=record.claim_id,
194
+ )
195
+ if record.assumption:
196
+ builder.add_assumption(record.assumption)
197
+ if total > Fraction(str(alpha)):
198
+ builder.error(
199
+ "risk-alpha-bound",
200
+ "finite risk spend exceeds the declared alpha bound",
201
+ details={"total": str(total), "alpha": str(alpha)},
202
+ )
203
+ return builder.result()
@@ -0,0 +1,88 @@
1
+ """Sensitive-data guardrails for audit-log payloads."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import re
6
+ from collections.abc import Mapping
7
+ from dataclasses import dataclass
8
+ from typing import Any
9
+
10
+ SECRET_KEY_NAME = re.compile(
11
+ r"(?i)(api[_-]?key|access[_-]?key|secret|private[_-]?key|password|credential|client[_-]?secret)"
12
+ )
13
+
14
+ SECRET_VALUE_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
15
+ ("aws access key", re.compile(r"\bAKIA[0-9A-Z]{16}\b")),
16
+ ("GitHub token", re.compile(r"\bgh[pousr]_[A-Za-z0-9_]{30,}\b")),
17
+ ("OpenAI API key", re.compile(r"\bsk-[A-Za-z0-9_\-]{20,}\b")),
18
+ ("private key block", re.compile(r"-----BEGIN [A-Z ]*PRIVATE KEY-----")),
19
+ (
20
+ "JWT-like token",
21
+ re.compile(r"\beyJ[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\.[A-Za-z0-9_\-]{10,}\b"),
22
+ ),
23
+ )
24
+
25
+ LOCAL_PATH_PATTERNS: tuple[tuple[str, re.Pattern[str]], ...] = (
26
+ ("Windows user path", re.compile(r"(?i)\b[A-Z]:\\Users\\[^\\\s]+")),
27
+ ("Unix home path", re.compile(r"(?i)(^|\s)/home/[^/\s]+")),
28
+ ("SSH material path", re.compile(r"(?i)(^|[/\\])\.ssh([/\\]|$)")),
29
+ ("cloud credentials path", re.compile(r"(?i)(^|[/\\])\.(aws|config/gcloud)([/\\]|$)")),
30
+ )
31
+
32
+
33
+ @dataclass(frozen=True, slots=True)
34
+ class SensitiveDataIssue:
35
+ path: str
36
+ reason: str
37
+ preview: str
38
+
39
+ def to_json(self) -> dict[str, str]:
40
+ return {"path": self.path, "reason": self.reason, "preview": self.preview}
41
+
42
+
43
+ def scan_for_sensitive_data(value: Any, *, allow_local_paths: bool = False) -> tuple[SensitiveDataIssue, ...]:
44
+ """Find common secrets and machine-local paths in a JSON-like value."""
45
+
46
+ issues: list[SensitiveDataIssue] = []
47
+ _walk(value, "$", issues, allow_local_paths=allow_local_paths)
48
+ return tuple(issues)
49
+
50
+
51
+ def _walk(value: Any, path: str, issues: list[SensitiveDataIssue], *, allow_local_paths: bool) -> None:
52
+ if isinstance(value, Mapping):
53
+ for key, child in value.items():
54
+ key_text = str(key)
55
+ child_path = f"{path}.{key_text}"
56
+ if SECRET_KEY_NAME.search(key_text) and _non_empty_scalar(child):
57
+ issues.append(SensitiveDataIssue(child_path, "secret-looking field name", _preview(child)))
58
+ _walk(child, child_path, issues, allow_local_paths=allow_local_paths)
59
+ return
60
+
61
+ if isinstance(value, tuple | list):
62
+ for index, child in enumerate(value):
63
+ _walk(child, f"{path}[{index}]", issues, allow_local_paths=allow_local_paths)
64
+ return
65
+
66
+ if not isinstance(value, str):
67
+ return
68
+
69
+ for reason, pattern in SECRET_VALUE_PATTERNS:
70
+ if pattern.search(value):
71
+ issues.append(SensitiveDataIssue(path, reason, _preview(value)))
72
+ if not allow_local_paths:
73
+ for reason, pattern in LOCAL_PATH_PATTERNS:
74
+ if pattern.search(value):
75
+ issues.append(SensitiveDataIssue(path, reason, _preview(value)))
76
+
77
+
78
+ def _non_empty_scalar(value: Any) -> bool:
79
+ if isinstance(value, str):
80
+ return bool(value.strip())
81
+ return value is not None and not isinstance(value, Mapping | list | tuple)
82
+
83
+
84
+ def _preview(value: Any) -> str:
85
+ text = str(value).replace("\n", "\\n")
86
+ if len(text) <= 16:
87
+ return "***"
88
+ return f"{text[:4]}...{text[-4:]}"