clean-room-skill 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/marketplace.json +19 -0
- package/.claude-plugin/plugin.json +20 -0
- package/.codex-plugin/plugin.json +36 -0
- package/LICENSE +21 -0
- package/README.md +376 -0
- package/agents/clean-architect.md +27 -0
- package/agents/clean-qa-editor.md +27 -0
- package/agents/contaminated-manager-verifier.md +35 -0
- package/agents/contaminated-source-analyst.md +26 -0
- package/bin/install.js +535 -0
- package/examples/codex/.codex/agents/clean-architect.toml +17 -0
- package/examples/codex/.codex/agents/clean-qa-editor.toml +17 -0
- package/examples/codex/.codex/agents/contaminated-manager-verifier.toml +21 -0
- package/examples/codex/.codex/agents/contaminated-source-analyst.toml +17 -0
- package/hooks/check-artifact-leakage.py +317 -0
- package/hooks/clean-room-hook.py +88 -0
- package/hooks/clean_room_paths.py +130 -0
- package/hooks/deny-clean-room-shell.py +30 -0
- package/hooks/deny-clean-source-read.py +104 -0
- package/hooks/deny-contaminated-clean-write.py +134 -0
- package/hooks/hooks.json +44 -0
- package/hooks/require-clean-room-env.py +127 -0
- package/hooks/validate-handoff-package.py +140 -0
- package/hooks/validate-json-schema.py +283 -0
- package/lib/fs-utils.cjs +123 -0
- package/lib/hooks.cjs +214 -0
- package/package.json +49 -0
- package/plugin.json +20 -0
- package/skills/attended/SKILL.md +25 -0
- package/skills/clean-room/SKILL.md +134 -0
- package/skills/clean-room/assets/behavior-spec.schema.json +367 -0
- package/skills/clean-room/assets/contamination-incident.schema.json +60 -0
- package/skills/clean-room/assets/coverage-ledger.schema.json +139 -0
- package/skills/clean-room/assets/evidence-ledger.schema.json +80 -0
- package/skills/clean-room/assets/handoff-package.schema.json +114 -0
- package/skills/clean-room/assets/qc-report.schema.json +248 -0
- package/skills/clean-room/assets/skeleton-manifest.schema.json +239 -0
- package/skills/clean-room/assets/source-index.schema.json +622 -0
- package/skills/clean-room/assets/task-manifest.schema.json +593 -0
- package/skills/clean-room/examples/README.md +18 -0
- package/skills/clean-room/examples/minimal-spec-package/behavior-spec.json +61 -0
- package/skills/clean-room/examples/minimal-spec-package/coverage-ledger.json +27 -0
- package/skills/clean-room/examples/minimal-spec-package/evidence-ledger.json +17 -0
- package/skills/clean-room/examples/minimal-spec-package/handoff-package.json +26 -0
- package/skills/clean-room/examples/minimal-spec-package/qc-report.json +25 -0
- package/skills/clean-room/examples/minimal-spec-package/skeleton-manifest.json +45 -0
- package/skills/clean-room/examples/minimal-spec-package/source-index.json +156 -0
- package/skills/clean-room/examples/minimal-spec-package/task-manifest.json +220 -0
- package/skills/clean-room/references/LEAKAGE-RULES.md +92 -0
- package/skills/clean-room/references/PROCESS.md +185 -0
- package/skills/clean-room/references/SPEC-SCHEMA.md +185 -0
- package/skills/clean-room/references/TARGET-LANGUAGE-GUIDE.md +43 -0
- package/skills/clean-room/scripts/build_source_index.py +1253 -0
- package/skills/clean-room/scripts/clean_room_tool_manager.py +199 -0
- package/skills/clean-room/scripts/clean_room_tooling.py +370 -0
- package/skills/unattended/SKILL.md +26 -0
|
@@ -0,0 +1,317 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan clean-room artifacts for high-risk leakage markers."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
from clean_room_paths import checked_write_paths, load_payload
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
MAX_SCAN_BYTES = 1_000_000
|
|
16
|
+
MAX_DENYLIST_BYTES = 1_000_000
|
|
17
|
+
MAX_DENYLIST_TERMS = 20_000
|
|
18
|
+
MAX_DENYLIST_TERM_LENGTH = 512
|
|
19
|
+
PRIVATE_IDENTIFIER_DENYLIST_ENV = "CLEAN_ROOM_PRIVATE_IDENTIFIER_DENYLIST"
|
|
20
|
+
PUBLIC_NAME_KEYS = {"name", "kind", "compatibility_reason", "visibility"}
|
|
21
|
+
PUBLIC_NAME_VISIBILITIES = {"public", "destination", "protocol", "user-required"}
|
|
22
|
+
NEVER_SCAN_JSON_STRING_KEYS = {
|
|
23
|
+
"$schema",
|
|
24
|
+
"allowed_artifacts",
|
|
25
|
+
"artifact_type",
|
|
26
|
+
"blocked_material_type",
|
|
27
|
+
"category",
|
|
28
|
+
"compatibility_level",
|
|
29
|
+
"confidence",
|
|
30
|
+
"coverage",
|
|
31
|
+
"coverage_status",
|
|
32
|
+
"created_at",
|
|
33
|
+
"created_by_role",
|
|
34
|
+
"domain",
|
|
35
|
+
"evidence_status",
|
|
36
|
+
"final_status",
|
|
37
|
+
"from_domain",
|
|
38
|
+
"kind",
|
|
39
|
+
"leakage_risk",
|
|
40
|
+
"leakage_status",
|
|
41
|
+
"producer_role",
|
|
42
|
+
"reviewed_at",
|
|
43
|
+
"reviewer_role",
|
|
44
|
+
"role",
|
|
45
|
+
"schema_status",
|
|
46
|
+
"schema_validator_version",
|
|
47
|
+
"selection_basis",
|
|
48
|
+
"severity",
|
|
49
|
+
"status",
|
|
50
|
+
"target_profile",
|
|
51
|
+
"to_domain",
|
|
52
|
+
"trust_domain",
|
|
53
|
+
"visibility",
|
|
54
|
+
}
|
|
55
|
+
DENYLIST_ONLY_JSON_STRING_KEYS = {
|
|
56
|
+
"affected_artifacts",
|
|
57
|
+
"artifact",
|
|
58
|
+
"artifact_hashes",
|
|
59
|
+
"artifact_id",
|
|
60
|
+
"artifact_paths",
|
|
61
|
+
"audit_log_refs",
|
|
62
|
+
"behavior_spec_refs",
|
|
63
|
+
"contaminated_artifact_roots",
|
|
64
|
+
"contaminated_artifacts",
|
|
65
|
+
"contract_id",
|
|
66
|
+
"decision_id",
|
|
67
|
+
"evidence_location_ref",
|
|
68
|
+
"evidence_refs",
|
|
69
|
+
"expected_artifacts",
|
|
70
|
+
"incident_id",
|
|
71
|
+
"manifest_id",
|
|
72
|
+
"native_artifacts",
|
|
73
|
+
"owner",
|
|
74
|
+
"package_id",
|
|
75
|
+
"path",
|
|
76
|
+
"profile_id",
|
|
77
|
+
"report_id",
|
|
78
|
+
"reviewed_artifacts",
|
|
79
|
+
"scenario_id",
|
|
80
|
+
"sha256",
|
|
81
|
+
"source_hash",
|
|
82
|
+
"source_index_ref",
|
|
83
|
+
"source_index_refs",
|
|
84
|
+
"source_spec_id",
|
|
85
|
+
"source_unit_refs",
|
|
86
|
+
"spec_id",
|
|
87
|
+
"task_id",
|
|
88
|
+
"test_id",
|
|
89
|
+
"ticket_id",
|
|
90
|
+
"unit_id",
|
|
91
|
+
"workspace_id",
|
|
92
|
+
}
|
|
93
|
+
SCAN_LIGHT_JSON_STRING_KEYS = {
|
|
94
|
+
"action",
|
|
95
|
+
"formatting_rules",
|
|
96
|
+
}
|
|
97
|
+
BLOCKED_PATTERNS = {
|
|
98
|
+
"raw_diff": re.compile(r"(?m)^(diff --git|@@ -\d+(?:,\d+)? \+\d+(?:,\d+)? @@)"),
|
|
99
|
+
"source_fence": re.compile(
|
|
100
|
+
r"(?m)^```\s*(?:$|(?:c|cc|cpp|go|java|javascript|js|jsx|kotlin|kt|m|mm|objective-c|py|python|rs|rust|swift|ts|tsx|typescript)\b)",
|
|
101
|
+
re.I,
|
|
102
|
+
),
|
|
103
|
+
"decompiled_marker": re.compile(r"\b(decompiled|jadx|apktool|asar extraction|source excerpt)\b", re.I),
|
|
104
|
+
"stack_source_line": re.compile(r"\bFile \"[^\"]+\", line \d+|\bat [\w.$<>]+\([^)]*:\d+:\d+\)"),
|
|
105
|
+
}
|
|
106
|
+
IDENTIFIER_PATTERNS = {
|
|
107
|
+
"package_or_module_identifier": re.compile(r"\b[a-z][a-z0-9_]*(?:\.[a-z][a-z0-9_]*){2,}\b"),
|
|
108
|
+
"source_like_call": re.compile(r"\b[A-Za-z_][A-Za-z0-9_]{2,}\s*\("),
|
|
109
|
+
"source_like_scoped_identifier": re.compile(
|
|
110
|
+
r"\b[A-Za-z_][A-Za-z0-9_]{1,}(?:(?:->|::|#)[A-Za-z_][A-Za-z0-9_]{1,}|(?:\.[A-Za-z_][A-Za-z0-9_]{1,}){2,})\b"
|
|
111
|
+
),
|
|
112
|
+
}
|
|
113
|
+
URL_PATTERN = re.compile(r"\b[a-z][a-z0-9+.-]*://[^\s\"')]+", re.I)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def is_clean_artifact(path: Path) -> bool:
|
|
117
|
+
clean_roots = [Path(p).expanduser().resolve() for p in os.environ.get("CLEAN_ROOM_CLEAN_ROOTS", "").split(os.pathsep) if p]
|
|
118
|
+
if clean_roots and not any(path == root or root in path.parents for root in clean_roots):
|
|
119
|
+
return False
|
|
120
|
+
return path.suffix.lower() in {".json", ".md", ".yaml", ".yml", ".txt"}
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def load_private_identifier_terms() -> tuple[list[str], list[str]]:
|
|
124
|
+
configured = os.environ.get(PRIVATE_IDENTIFIER_DENYLIST_ENV, "")
|
|
125
|
+
terms: list[str] = []
|
|
126
|
+
errors: list[str] = []
|
|
127
|
+
for item in configured.split(os.pathsep):
|
|
128
|
+
if not item:
|
|
129
|
+
continue
|
|
130
|
+
path = Path(item).expanduser()
|
|
131
|
+
try:
|
|
132
|
+
data = path.read_bytes()
|
|
133
|
+
except OSError as exc:
|
|
134
|
+
errors.append(f"could not read {PRIVATE_IDENTIFIER_DENYLIST_ENV} file {path}: {exc}")
|
|
135
|
+
continue
|
|
136
|
+
if len(data) > MAX_DENYLIST_BYTES:
|
|
137
|
+
errors.append(f"{PRIVATE_IDENTIFIER_DENYLIST_ENV} file {path} exceeds {MAX_DENYLIST_BYTES} bytes")
|
|
138
|
+
continue
|
|
139
|
+
raw_terms = data.decode("utf-8", errors="replace").splitlines()
|
|
140
|
+
for raw_term in raw_terms:
|
|
141
|
+
if len(terms) >= MAX_DENYLIST_TERMS:
|
|
142
|
+
errors.append(f"{PRIVATE_IDENTIFIER_DENYLIST_ENV} exceeds {MAX_DENYLIST_TERMS} terms")
|
|
143
|
+
return terms, errors
|
|
144
|
+
term = raw_term.strip()
|
|
145
|
+
if not term or term.startswith("#") or len(term) < 3:
|
|
146
|
+
continue
|
|
147
|
+
if len(term) > MAX_DENYLIST_TERM_LENGTH:
|
|
148
|
+
errors.append(
|
|
149
|
+
f"{PRIVATE_IDENTIFIER_DENYLIST_ENV} term exceeds {MAX_DENYLIST_TERM_LENGTH} characters"
|
|
150
|
+
)
|
|
151
|
+
return terms, errors
|
|
152
|
+
terms.append(term)
|
|
153
|
+
return terms, errors
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def private_identifier_pattern(term: str) -> re.Pattern[str]:
|
|
157
|
+
escaped = re.escape(term)
|
|
158
|
+
if re.fullmatch(r"\w+", term):
|
|
159
|
+
return re.compile(rf"\b{escaped}\b")
|
|
160
|
+
return re.compile(rf"(?<![\w.]){escaped}(?![\w-]|\.[A-Za-z_])")
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def public_names(value: object, path: tuple[str | int, ...] = ()) -> set[str]:
|
|
164
|
+
names: set[str] = set()
|
|
165
|
+
if isinstance(value, dict):
|
|
166
|
+
is_public_record = (
|
|
167
|
+
len(path) == 2
|
|
168
|
+
and path[0] in {"public_surface", "public_contracts"}
|
|
169
|
+
and isinstance(path[1], int)
|
|
170
|
+
and PUBLIC_NAME_KEYS <= set(value)
|
|
171
|
+
and value.get("visibility") in PUBLIC_NAME_VISIBILITIES
|
|
172
|
+
and isinstance(value.get("name"), str)
|
|
173
|
+
and value["name"].strip()
|
|
174
|
+
)
|
|
175
|
+
if is_public_record:
|
|
176
|
+
names.add(value["name"])
|
|
177
|
+
for key, item in value.items():
|
|
178
|
+
names.update(public_names(item, path + (key,)))
|
|
179
|
+
elif isinstance(value, list):
|
|
180
|
+
for index, item in enumerate(value):
|
|
181
|
+
names.update(public_names(item, path + (index,)))
|
|
182
|
+
return names
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def strip_allowed_text(text: str, allowed_names: set[str]) -> str:
|
|
186
|
+
stripped = URL_PATTERN.sub(" ", text)
|
|
187
|
+
for name in sorted(allowed_names, key=len, reverse=True):
|
|
188
|
+
if not name:
|
|
189
|
+
continue
|
|
190
|
+
stripped = re.sub(rf"(?<![\w.]){re.escape(name)}(?![\w]|\.[A-Za-z_])", " ", stripped)
|
|
191
|
+
return stripped
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def json_scan_strings(
|
|
195
|
+
value: object,
|
|
196
|
+
allowed_names: set[str],
|
|
197
|
+
path: tuple[str | int, ...] = (),
|
|
198
|
+
) -> tuple[list[str], list[str], list[str]]:
|
|
199
|
+
full_scan: list[str] = []
|
|
200
|
+
light_scan: list[str] = []
|
|
201
|
+
denylist_scan: list[str] = []
|
|
202
|
+
if isinstance(value, dict):
|
|
203
|
+
for key, item in value.items():
|
|
204
|
+
child_full, child_light, child_denylist = json_scan_strings(item, allowed_names, path + (key,))
|
|
205
|
+
full_scan.extend(child_full)
|
|
206
|
+
light_scan.extend(child_light)
|
|
207
|
+
denylist_scan.extend(child_denylist)
|
|
208
|
+
elif isinstance(value, list):
|
|
209
|
+
for index, item in enumerate(value):
|
|
210
|
+
child_full, child_light, child_denylist = json_scan_strings(item, allowed_names, path + (index,))
|
|
211
|
+
full_scan.extend(child_full)
|
|
212
|
+
light_scan.extend(child_light)
|
|
213
|
+
denylist_scan.extend(child_denylist)
|
|
214
|
+
elif isinstance(value, str):
|
|
215
|
+
keys = {item for item in path if isinstance(item, str)}
|
|
216
|
+
leaf_key = next((item for item in reversed(path) if isinstance(item, str)), None)
|
|
217
|
+
if leaf_key in NEVER_SCAN_JSON_STRING_KEYS:
|
|
218
|
+
return full_scan, light_scan, denylist_scan
|
|
219
|
+
stripped = strip_allowed_text(value, allowed_names)
|
|
220
|
+
if keys & DENYLIST_ONLY_JSON_STRING_KEYS:
|
|
221
|
+
denylist_scan.append(stripped)
|
|
222
|
+
elif leaf_key in SCAN_LIGHT_JSON_STRING_KEYS:
|
|
223
|
+
light_scan.append(stripped)
|
|
224
|
+
else:
|
|
225
|
+
full_scan.append(stripped)
|
|
226
|
+
return full_scan, light_scan, denylist_scan
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def scan_private_identifier_denylist(texts: list[str], private_terms: list[str]) -> list[str]:
|
|
230
|
+
findings: set[str] = set()
|
|
231
|
+
for text in texts:
|
|
232
|
+
for term in private_terms:
|
|
233
|
+
if private_identifier_pattern(term).search(text):
|
|
234
|
+
findings.add("private_identifier_denylist")
|
|
235
|
+
break
|
|
236
|
+
return sorted(findings)
|
|
237
|
+
|
|
238
|
+
|
|
239
|
+
def scan_identifier_patterns(
|
|
240
|
+
texts: list[str],
|
|
241
|
+
private_terms: list[str],
|
|
242
|
+
skipped_patterns: set[str] | None = None,
|
|
243
|
+
) -> list[str]:
|
|
244
|
+
findings: set[str] = set()
|
|
245
|
+
skipped_patterns = skipped_patterns or set()
|
|
246
|
+
for text in texts:
|
|
247
|
+
for term in private_terms:
|
|
248
|
+
if private_identifier_pattern(term).search(text):
|
|
249
|
+
findings.add("private_identifier_denylist")
|
|
250
|
+
break
|
|
251
|
+
for name, pattern in IDENTIFIER_PATTERNS.items():
|
|
252
|
+
if name in skipped_patterns:
|
|
253
|
+
continue
|
|
254
|
+
if pattern.search(text):
|
|
255
|
+
findings.add(name)
|
|
256
|
+
return sorted(findings)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def identifier_scan_texts(path: Path, text: str) -> tuple[list[str], list[str], list[str]]:
|
|
260
|
+
if path.suffix.lower() != ".json":
|
|
261
|
+
return [strip_allowed_text(text, set())], [], []
|
|
262
|
+
try:
|
|
263
|
+
data = json.loads(text)
|
|
264
|
+
except json.JSONDecodeError:
|
|
265
|
+
return [strip_allowed_text(text, set())], [], []
|
|
266
|
+
allowed_names = public_names(data)
|
|
267
|
+
return json_scan_strings(data, allowed_names)
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def main() -> int:
|
|
271
|
+
payload, payload_error = load_payload()
|
|
272
|
+
if payload_error:
|
|
273
|
+
print(f"clean-room leakage scan failed: {payload_error}", file=sys.stderr)
|
|
274
|
+
return 1
|
|
275
|
+
paths, path_errors = checked_write_paths(payload, "clean-room leakage scan")
|
|
276
|
+
if path_errors:
|
|
277
|
+
for error in path_errors:
|
|
278
|
+
print(f"clean-room leakage scan failed: {error}", file=sys.stderr)
|
|
279
|
+
return 1
|
|
280
|
+
private_terms, load_errors = load_private_identifier_terms()
|
|
281
|
+
if load_errors:
|
|
282
|
+
for error in load_errors:
|
|
283
|
+
print(f"clean-room leakage scan failed: {error}", file=sys.stderr)
|
|
284
|
+
return 1
|
|
285
|
+
for path in paths:
|
|
286
|
+
if not is_clean_artifact(path):
|
|
287
|
+
continue
|
|
288
|
+
if path.stat().st_size > MAX_SCAN_BYTES:
|
|
289
|
+
print(
|
|
290
|
+
f"clean-room leakage scan failed for {path}: artifact exceeds scan cap of {MAX_SCAN_BYTES} bytes",
|
|
291
|
+
file=sys.stderr,
|
|
292
|
+
)
|
|
293
|
+
return 1
|
|
294
|
+
data = path.read_bytes()
|
|
295
|
+
text = data.decode("utf-8", errors="replace")
|
|
296
|
+
findings = [name for name, pattern in BLOCKED_PATTERNS.items() if pattern.search(text)]
|
|
297
|
+
full_scan_texts, light_scan_texts, denylist_scan_texts = identifier_scan_texts(path, text)
|
|
298
|
+
findings.extend(scan_identifier_patterns(full_scan_texts, private_terms))
|
|
299
|
+
findings.extend(
|
|
300
|
+
scan_identifier_patterns(
|
|
301
|
+
light_scan_texts,
|
|
302
|
+
private_terms,
|
|
303
|
+
skipped_patterns={"source_like_call"},
|
|
304
|
+
)
|
|
305
|
+
)
|
|
306
|
+
findings.extend(scan_private_identifier_denylist(denylist_scan_texts, private_terms))
|
|
307
|
+
if findings:
|
|
308
|
+
print(
|
|
309
|
+
f"clean-room leakage scan failed for {path}: {', '.join(sorted(set(findings)))}",
|
|
310
|
+
file=sys.stderr,
|
|
311
|
+
)
|
|
312
|
+
return 1
|
|
313
|
+
return 0
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
if __name__ == "__main__":
|
|
317
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Dispatch clean-room hook checks behind safe opt-in enforcement."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import argparse
|
|
7
|
+
import os
|
|
8
|
+
import subprocess
|
|
9
|
+
import sys
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
TRUTHY = {"1", "true", "yes", "on", "strict"}
|
|
14
|
+
CLEAN_ROOM_ENV_NAMES = {
|
|
15
|
+
"CLEAN_ROOM_ROLE",
|
|
16
|
+
"CLEAN_ROOM_SOURCE_ROOTS",
|
|
17
|
+
"CLEAN_ROOM_CONTAMINATED_ARTIFACT_ROOTS",
|
|
18
|
+
"CLEAN_ROOM_CLEAN_ROOTS",
|
|
19
|
+
"CLEAN_ROOM_SCHEMA_DIR",
|
|
20
|
+
"CLEAN_ROOM_ALLOWED_READ_ROOTS",
|
|
21
|
+
"CLEAN_ROOM_PRIVATE_IDENTIFIER_DENYLIST",
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_args() -> argparse.Namespace:
|
|
26
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--mode",
|
|
29
|
+
choices=("safe", "strict"),
|
|
30
|
+
default=os.environ.get("CLEAN_ROOM_HOOK_MODE", "safe"),
|
|
31
|
+
)
|
|
32
|
+
parser.add_argument(
|
|
33
|
+
"--check",
|
|
34
|
+
action="append",
|
|
35
|
+
default=[],
|
|
36
|
+
help="Hook script basename in the hooks directory. Repeat for multiple checks.",
|
|
37
|
+
)
|
|
38
|
+
return parser.parse_args()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def should_enforce(mode: str) -> bool:
|
|
42
|
+
if mode == "strict":
|
|
43
|
+
return True
|
|
44
|
+
if os.environ.get("CLEAN_ROOM_HOOK_ENFORCE", "").lower() in TRUTHY:
|
|
45
|
+
return True
|
|
46
|
+
return any(os.environ.get(name) for name in CLEAN_ROOM_ENV_NAMES)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def resolve_check(script_dir: Path, check: str) -> Path:
|
|
50
|
+
path = Path(check)
|
|
51
|
+
if path.name != check or path.is_absolute() or check in {"", ".", ".."}:
|
|
52
|
+
raise ValueError(f"invalid hook check name: {check!r}")
|
|
53
|
+
resolved = script_dir / check
|
|
54
|
+
if not resolved.is_file():
|
|
55
|
+
raise FileNotFoundError(f"hook check does not exist: {resolved}")
|
|
56
|
+
return resolved
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def main() -> int:
|
|
60
|
+
args = parse_args()
|
|
61
|
+
if not should_enforce(args.mode):
|
|
62
|
+
return 0
|
|
63
|
+
if not args.check:
|
|
64
|
+
print("clean-room hook wrapper has no checks configured", file=sys.stderr)
|
|
65
|
+
return 1
|
|
66
|
+
|
|
67
|
+
payload = sys.stdin.buffer.read()
|
|
68
|
+
script_dir = Path(__file__).resolve().parent
|
|
69
|
+
for check in args.check:
|
|
70
|
+
try:
|
|
71
|
+
script = resolve_check(script_dir, check)
|
|
72
|
+
except (FileNotFoundError, ValueError) as exc:
|
|
73
|
+
print(f"clean-room hook configuration failed: {exc}", file=sys.stderr)
|
|
74
|
+
return 1
|
|
75
|
+
result = subprocess.run(
|
|
76
|
+
[sys.executable, str(script)],
|
|
77
|
+
input=payload,
|
|
78
|
+
stdout=sys.stdout,
|
|
79
|
+
stderr=sys.stderr,
|
|
80
|
+
check=False,
|
|
81
|
+
)
|
|
82
|
+
if result.returncode != 0:
|
|
83
|
+
return result.returncode
|
|
84
|
+
return 0
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
if __name__ == "__main__":
|
|
88
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
"""Shared path and payload helpers for clean-room hook scripts."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
ROLES = {
|
|
13
|
+
"contaminated-manager-verifier",
|
|
14
|
+
"contaminated-source-analyst",
|
|
15
|
+
"clean-architect",
|
|
16
|
+
"clean-qa-editor",
|
|
17
|
+
}
|
|
18
|
+
CLEAN_ROLES = {"clean-architect", "clean-qa-editor"}
|
|
19
|
+
WRITE_TOOL_NAMES = {"Write", "Edit", "MultiEdit"}
|
|
20
|
+
PATH_KEYS = {
|
|
21
|
+
"file_path",
|
|
22
|
+
"filePath",
|
|
23
|
+
"path",
|
|
24
|
+
"output_path",
|
|
25
|
+
"outputPath",
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def path_is_under(path: Path, root: Path) -> bool:
|
|
30
|
+
return path == root or root in path.parents
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def paths_overlap(left: Path, right: Path) -> bool:
|
|
34
|
+
return path_is_under(left, right) or path_is_under(right, left)
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def split_paths(value: str) -> list[Path]:
|
|
38
|
+
paths: list[Path] = []
|
|
39
|
+
for item in value.split(os.pathsep):
|
|
40
|
+
if not item:
|
|
41
|
+
continue
|
|
42
|
+
paths.append(Path(item).expanduser().resolve())
|
|
43
|
+
return paths
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def env_roots(name: str) -> list[Path]:
|
|
47
|
+
try:
|
|
48
|
+
return split_paths(os.environ.get(name, ""))
|
|
49
|
+
except OSError:
|
|
50
|
+
return []
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def path_under_env(path: Path, name: str) -> bool:
|
|
54
|
+
roots = env_roots(name)
|
|
55
|
+
return bool(roots) and any(path_is_under(path, root) for root in roots)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def active_clean_room_role() -> str:
|
|
59
|
+
role = os.environ.get("CLEAN_ROOM_ROLE", "")
|
|
60
|
+
return role if role in ROLES else ""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def load_payload() -> tuple[dict[str, Any], str | None]:
|
|
64
|
+
raw = sys.stdin.read()
|
|
65
|
+
if not raw.strip():
|
|
66
|
+
return {}, None
|
|
67
|
+
try:
|
|
68
|
+
data = json.loads(raw)
|
|
69
|
+
except json.JSONDecodeError as exc:
|
|
70
|
+
return {}, f"malformed hook JSON payload: {exc}"
|
|
71
|
+
if not isinstance(data, dict):
|
|
72
|
+
return {}, "hook payload must be a JSON object"
|
|
73
|
+
return data, None
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def tool_name(payload: dict[str, Any]) -> str:
|
|
77
|
+
for key in ("tool_name", "tool", "name"):
|
|
78
|
+
value = payload.get(key)
|
|
79
|
+
if isinstance(value, str):
|
|
80
|
+
return value
|
|
81
|
+
return ""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def should_fail_closed_for_write(payload: dict[str, Any]) -> bool:
|
|
85
|
+
if not active_clean_room_role():
|
|
86
|
+
return False
|
|
87
|
+
name = tool_name(payload)
|
|
88
|
+
return not name or name in WRITE_TOOL_NAMES
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _path_values(value: Any) -> list[str]:
|
|
92
|
+
paths: list[str] = []
|
|
93
|
+
if isinstance(value, dict):
|
|
94
|
+
for key, item in value.items():
|
|
95
|
+
if key in PATH_KEYS and isinstance(item, str):
|
|
96
|
+
paths.append(item)
|
|
97
|
+
elif isinstance(item, (dict, list)):
|
|
98
|
+
paths.extend(_path_values(item))
|
|
99
|
+
elif isinstance(value, list):
|
|
100
|
+
for item in value:
|
|
101
|
+
paths.extend(_path_values(item))
|
|
102
|
+
return paths
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def candidate_paths(payload: dict[str, Any]) -> tuple[list[Path], list[str]]:
|
|
106
|
+
paths: list[Path] = []
|
|
107
|
+
errors: list[str] = []
|
|
108
|
+
seen: set[Path] = set()
|
|
109
|
+
for value in _path_values(payload):
|
|
110
|
+
try:
|
|
111
|
+
path = Path(value).expanduser().resolve()
|
|
112
|
+
except OSError as exc:
|
|
113
|
+
errors.append(f"invalid hook path {value!r}: {exc}")
|
|
114
|
+
continue
|
|
115
|
+
if path in seen:
|
|
116
|
+
continue
|
|
117
|
+
seen.add(path)
|
|
118
|
+
paths.append(path)
|
|
119
|
+
return paths, errors
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
def checked_write_paths(payload: dict[str, Any], hook_name: str) -> tuple[list[Path], list[str]]:
|
|
123
|
+
paths, errors = candidate_paths(payload)
|
|
124
|
+
if should_fail_closed_for_write(payload):
|
|
125
|
+
if not paths:
|
|
126
|
+
errors.append(f"{hook_name} could not determine the written path from the hook payload")
|
|
127
|
+
for path in paths:
|
|
128
|
+
if not path.is_file():
|
|
129
|
+
errors.append(f"{hook_name} could not read written file: {path}")
|
|
130
|
+
return paths, errors
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Block shell-style tools for clean-room role sessions."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
ROLES = {
|
|
11
|
+
"contaminated-manager-verifier",
|
|
12
|
+
"contaminated-source-analyst",
|
|
13
|
+
"clean-architect",
|
|
14
|
+
"clean-qa-editor",
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def main() -> int:
|
|
19
|
+
role = os.environ.get("CLEAN_ROOM_ROLE", "")
|
|
20
|
+
if role in ROLES:
|
|
21
|
+
print(
|
|
22
|
+
f"clean-room policy denied shell tool use for role {role}",
|
|
23
|
+
file=sys.stderr,
|
|
24
|
+
)
|
|
25
|
+
return 1
|
|
26
|
+
return 0
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
if __name__ == "__main__":
|
|
30
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Deny clean-role reads outside explicitly configured clean roots."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import json
|
|
7
|
+
import os
|
|
8
|
+
import sys
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
CLEAN_ROLES = {"clean-architect", "clean-qa-editor"}
|
|
13
|
+
ADDITIONAL_CLEAN_READ_ROOTS = "CLEAN_ROOM_ALLOWED_READ_ROOTS"
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def load_payload() -> dict:
|
|
17
|
+
raw = sys.stdin.read()
|
|
18
|
+
if not raw.strip():
|
|
19
|
+
return {}
|
|
20
|
+
try:
|
|
21
|
+
data = json.loads(raw)
|
|
22
|
+
except json.JSONDecodeError:
|
|
23
|
+
return {}
|
|
24
|
+
return data if isinstance(data, dict) else {}
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def configured_roots(name: str) -> list[Path]:
|
|
28
|
+
value = os.environ.get(name, "")
|
|
29
|
+
roots = []
|
|
30
|
+
for item in value.split(os.pathsep):
|
|
31
|
+
if item:
|
|
32
|
+
roots.append(Path(item).expanduser().resolve())
|
|
33
|
+
return roots
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def append_path_value(paths: list[Path], value: str) -> None:
|
|
37
|
+
try:
|
|
38
|
+
paths.append(Path(value).expanduser().resolve())
|
|
39
|
+
except OSError:
|
|
40
|
+
return
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def candidate_paths(payload: dict) -> list[Path]:
|
|
44
|
+
tool_input = payload.get("tool_input")
|
|
45
|
+
if not isinstance(tool_input, dict):
|
|
46
|
+
tool_input = {}
|
|
47
|
+
tool_name = str(payload.get("tool_name") or payload.get("tool") or "").lower()
|
|
48
|
+
paths = []
|
|
49
|
+
for key in ("file_path", "path", "cwd"):
|
|
50
|
+
value = tool_input.get(key) or payload.get(key)
|
|
51
|
+
if isinstance(value, str):
|
|
52
|
+
append_path_value(paths, value)
|
|
53
|
+
glob_value = tool_input.get("glob") or payload.get("glob")
|
|
54
|
+
if isinstance(glob_value, str) and (glob_value.startswith(("/", "~")) or "/" in glob_value):
|
|
55
|
+
append_path_value(paths, glob_value)
|
|
56
|
+
pattern_value = tool_input.get("pattern") or payload.get("pattern")
|
|
57
|
+
if "glob" in tool_name and isinstance(pattern_value, str) and (
|
|
58
|
+
pattern_value.startswith(("/", "~")) or "/" in pattern_value
|
|
59
|
+
):
|
|
60
|
+
append_path_value(paths, pattern_value)
|
|
61
|
+
return paths
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def is_under(path: Path, root: Path) -> bool:
|
|
65
|
+
return path == root or root in path.parents
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def main() -> int:
|
|
69
|
+
role = os.environ.get("CLEAN_ROOM_ROLE", "")
|
|
70
|
+
if role not in CLEAN_ROLES:
|
|
71
|
+
return 0
|
|
72
|
+
source_roots = configured_roots("CLEAN_ROOM_SOURCE_ROOTS")
|
|
73
|
+
allowed_roots = configured_roots("CLEAN_ROOM_CLEAN_ROOTS") + configured_roots(ADDITIONAL_CLEAN_READ_ROOTS)
|
|
74
|
+
paths = candidate_paths(load_payload())
|
|
75
|
+
if not paths:
|
|
76
|
+
print(
|
|
77
|
+
f"clean-room policy denied clean role {role} read with no resolved path",
|
|
78
|
+
file=sys.stderr,
|
|
79
|
+
)
|
|
80
|
+
return 1
|
|
81
|
+
for path in paths:
|
|
82
|
+
if any(is_under(path, root) for root in source_roots):
|
|
83
|
+
print(
|
|
84
|
+
f"clean-room policy denied clean role {role} reading source path {path}",
|
|
85
|
+
file=sys.stderr,
|
|
86
|
+
)
|
|
87
|
+
return 1
|
|
88
|
+
if not allowed_roots:
|
|
89
|
+
print(
|
|
90
|
+
f"clean-room policy denied clean role {role} reading {path}: no clean read roots configured",
|
|
91
|
+
file=sys.stderr,
|
|
92
|
+
)
|
|
93
|
+
return 1
|
|
94
|
+
if not any(is_under(path, root) for root in allowed_roots):
|
|
95
|
+
print(
|
|
96
|
+
f"clean-room policy denied clean role {role} reading outside allowed clean roots: {path}",
|
|
97
|
+
file=sys.stderr,
|
|
98
|
+
)
|
|
99
|
+
return 1
|
|
100
|
+
return 0
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
if __name__ == "__main__":
|
|
104
|
+
raise SystemExit(main())
|