@openthread/claude-code-plugin 0.1.5 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude-plugin/plugin.json +2 -2
- package/README.md +111 -17
- package/bin/__tests__/settings-writer.test.js +122 -0
- package/bin/cli.sh +5 -28
- package/bin/lib/settings-writer.js +108 -0
- package/bin/postinstall.js +59 -25
- package/commands/export.md +22 -0
- package/commands/import.md +26 -0
- package/commands/search.md +15 -0
- package/commands/share.md +24 -3
- package/package.json +23 -5
- package/scripts/auth.sh +21 -3
- package/scripts/lib/__init__.py +1 -0
- package/scripts/lib/export_client.py +666 -0
- package/scripts/lib/import_client.py +510 -0
- package/scripts/lib/jsonl.py +88 -0
- package/scripts/lib/keychain.js +59 -0
- package/scripts/lib/mask.py +669 -0
- package/scripts/lib/sanitize.py +92 -0
- package/scripts/lib/search_client.py +218 -0
- package/scripts/lib/thread_to_md.py +156 -0
- package/scripts/share.sh +230 -47
- package/scripts/token.sh +215 -23
- package/skills/export-thread/SKILL.md +166 -0
- package/skills/import-thread/SKILL.md +171 -0
- package/skills/search-threads/SKILL.md +103 -0
- package/skills/share-thread/SKILL.md +25 -43
|
@@ -0,0 +1,669 @@
|
|
|
1
|
+
"""Privacy masking library for Claude Code session content.
|
|
2
|
+
|
|
3
|
+
This is a Python port of ``apps/api/src/lib/privacy-mask.ts``. The
|
|
4
|
+
pipeline, regex patterns, and replacement values are kept in lock-step
|
|
5
|
+
with the TypeScript source so that client-side masked content matches
|
|
6
|
+
the server's expectations.
|
|
7
|
+
|
|
8
|
+
Pipeline:
|
|
9
|
+
0. sanitize.normalize(text)
|
|
10
|
+
A. rewrite_project(text, cwds) -> cwd-relative [project] rewrite
|
|
11
|
+
B. paths -> home directories across OSes
|
|
12
|
+
C. secrets -> known prefixes, JWTs, etc.
|
|
13
|
+
D. entropy fallback -> high-entropy near sensitive labels
|
|
14
|
+
E. PII -> email / IP / MAC / shell prompts
|
|
15
|
+
F. usernames -> back-reference bare usernames
|
|
16
|
+
|
|
17
|
+
Python regex caveats vs JavaScript (documented here because they affect
|
|
18
|
+
fidelity with the TS source):
|
|
19
|
+
|
|
20
|
+
* Python's ``re`` does not support ``\p{L}`` / ``\p{N}``. We emulate
|
|
21
|
+
with a custom character class that matches unicode letters and
|
|
22
|
+
digits where needed. ``re.UNICODE`` is on by default in Python 3.
|
|
23
|
+
* Python does not need the TS "lastIndex" reset dance — we use
|
|
24
|
+
``finditer`` and module-level compiled patterns.
|
|
25
|
+
* The TS ``^`` / ``$`` with ``/m`` is the same as Python's
|
|
26
|
+
``re.MULTILINE``.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
from __future__ import annotations
|
|
30
|
+
|
|
31
|
+
import math
|
|
32
|
+
import re
|
|
33
|
+
from typing import Any, Iterable
|
|
34
|
+
|
|
35
|
+
try:
|
|
36
|
+
from . import sanitize # package-style import (preferred)
|
|
37
|
+
except ImportError: # pragma: no cover - fallback when lib is on sys.path
|
|
38
|
+
import sanitize # type: ignore[no-redef]
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
# ---------------------------------------------------------------------------
|
|
42
|
+
# Unicode character-class helpers
|
|
43
|
+
# ---------------------------------------------------------------------------
|
|
44
|
+
#
|
|
45
|
+
# Python's stdlib re does not support \p{L} / \p{N}. We approximate the TS
|
|
46
|
+
# "username segment" class by matching the Latin-compatible character set:
|
|
47
|
+
# all letters/digits that Python treats as word characters except ``_`` and
|
|
48
|
+
# the additional punctuation the TS class allows. Because ``\w`` under
|
|
49
|
+
# ``re.UNICODE`` already spans ``\p{L}\p{N}_``, we widen it with
|
|
50
|
+
# ``._+-`` and explicitly strip the leading underscore behaviour when we
|
|
51
|
+
# need the stricter variant.
|
|
52
|
+
|
|
53
|
+
# Matches \p{L}\p{N}._+-
|
|
54
|
+
USER_SEG_CHARS = r"\w.+\-" # \w already covers letters/digits/underscore
|
|
55
|
+
|
|
56
|
+
# Bounded username segment: 1..64 of the above characters.
|
|
57
|
+
USER_SEG = r"[" + USER_SEG_CHARS + r"]{1,64}"
|
|
58
|
+
|
|
59
|
+
# A stricter "letter or digit" class used for word-boundary assertions in
|
|
60
|
+
# the username back-reference pass. Matches Python's \w minus underscore.
|
|
61
|
+
LETTER_OR_DIGIT = r"[^\W_]"
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
# ---------------------------------------------------------------------------
|
|
65
|
+
# A. Project (cwd) rewrite
|
|
66
|
+
# ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
# Chars that end a "path-like" run. We don't let the cwd rewriter consume
|
|
69
|
+
# trailing punctuation (comma, colon, etc.).
|
|
70
|
+
_PROJECT_BOUNDARY = r"(?=/|\s|$|[,;:\)\]\}\"'`>])"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def rewrite_project(text: str, cwds: Iterable[str] | None) -> str:
|
|
74
|
+
"""Replace occurrences of the user's cwd with the literal token
|
|
75
|
+
``[project]``. The cwd rewrite runs BEFORE the generic home-dir rules
|
|
76
|
+
so that deeper paths collapse to ``[project]/sub/path`` instead of
|
|
77
|
+
``[user-home]/...``.
|
|
78
|
+
|
|
79
|
+
* ``cwds`` is sorted longest-first so nested working directories
|
|
80
|
+
(e.g. ``/Users/a/code/app`` and ``/Users/a/code``) are matched in
|
|
81
|
+
the most-specific-first order.
|
|
82
|
+
* Matching is case-insensitive to handle macOS case-preserving
|
|
83
|
+
filesystems.
|
|
84
|
+
* The bare root ``/`` is silently skipped: rewriting ``/`` would
|
|
85
|
+
blank every absolute path in the document.
|
|
86
|
+
"""
|
|
87
|
+
if not text or not cwds:
|
|
88
|
+
return text
|
|
89
|
+
# De-duplicate + longest-first.
|
|
90
|
+
unique = {c.rstrip("/") for c in cwds if isinstance(c, str) and c and c != "/"}
|
|
91
|
+
if not unique:
|
|
92
|
+
return text
|
|
93
|
+
ordered = sorted(unique, key=len, reverse=True)
|
|
94
|
+
out = text
|
|
95
|
+
for cwd in ordered:
|
|
96
|
+
pattern = re.compile(re.escape(cwd) + _PROJECT_BOUNDARY, re.IGNORECASE)
|
|
97
|
+
out = pattern.sub("[project]", out)
|
|
98
|
+
return out
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
# ---------------------------------------------------------------------------
|
|
102
|
+
# B. Path rules
|
|
103
|
+
# ---------------------------------------------------------------------------
|
|
104
|
+
|
|
105
|
+
# Order matters: longest / most specific first. Each tuple is
|
|
106
|
+
# ``(compiled_regex, replacement)``.
|
|
107
|
+
_PATH_RULES: list[tuple[re.Pattern[str], str]] = [
|
|
108
|
+
# Windows UNC: \\server\share\... -> \\[server]\[share]
|
|
109
|
+
(
|
|
110
|
+
re.compile(r"\\\\[A-Za-z0-9._\-$]{1,64}\\[A-Za-z0-9._\-$]{1,64}"),
|
|
111
|
+
r"\\\\[server]\\[share]",
|
|
112
|
+
),
|
|
113
|
+
# Windows drive: C:\Users\<name>
|
|
114
|
+
(
|
|
115
|
+
re.compile(
|
|
116
|
+
r"[A-Za-z]:\\[Uu][Ss][Ee][Rr][Ss]\\" + USER_SEG + r"(?=\\|$|[^\w.+\-])",
|
|
117
|
+
),
|
|
118
|
+
r"C:\\Users\\[user]",
|
|
119
|
+
),
|
|
120
|
+
# WSL: /mnt/<drive>/Users/<name>
|
|
121
|
+
(
|
|
122
|
+
re.compile(r"/mnt/[a-zA-Z]/[Uu][Ss][Ee][Rr][Ss]/" + USER_SEG),
|
|
123
|
+
"/mnt/[drive]/Users/[user]",
|
|
124
|
+
),
|
|
125
|
+
# macOS Volumes: /Volumes/<vol>
|
|
126
|
+
(
|
|
127
|
+
re.compile(r"/Volumes/[\w.+\- ]{1,64}"),
|
|
128
|
+
"/Volumes/[volume]",
|
|
129
|
+
),
|
|
130
|
+
# Linux mounted media: /mnt/<name> or /media/<name>
|
|
131
|
+
(
|
|
132
|
+
re.compile(r"/(?:mnt|media)/" + USER_SEG),
|
|
133
|
+
"/[mount]/[user]",
|
|
134
|
+
),
|
|
135
|
+
# POSIX /Users/<name>
|
|
136
|
+
(
|
|
137
|
+
re.compile(r"/[Uu][Ss][Ee][Rr][Ss]/" + USER_SEG),
|
|
138
|
+
"/Users/[user]",
|
|
139
|
+
),
|
|
140
|
+
# Linux /home/<name>
|
|
141
|
+
(
|
|
142
|
+
re.compile(r"/home/" + USER_SEG),
|
|
143
|
+
"/home/[user]",
|
|
144
|
+
),
|
|
145
|
+
# /root
|
|
146
|
+
(
|
|
147
|
+
re.compile(r"/root(?=/|$|[^\w])"),
|
|
148
|
+
"/root",
|
|
149
|
+
),
|
|
150
|
+
# ~/... tilde paths. Exclude shell metacharacters so we don't eat a
|
|
151
|
+
# shell prompt terminator and don't re-match an already redacted
|
|
152
|
+
# "~/[path]". Mirrors the TS exclude set.
|
|
153
|
+
(
|
|
154
|
+
re.compile(r"~/[^\s\"'`\)\]\}>$#\[\]]{1,512}"),
|
|
155
|
+
"~/[path]",
|
|
156
|
+
),
|
|
157
|
+
]
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def _apply_path_rules(text: str) -> str:
|
|
161
|
+
out = text
|
|
162
|
+
for pattern, repl in _PATH_RULES:
|
|
163
|
+
out = pattern.sub(repl, out)
|
|
164
|
+
return out
|
|
165
|
+
|
|
166
|
+
|
|
167
|
+
# A second pass that converts the stable "[user-home]" marker used in
|
|
168
|
+
# replacements into a shorter form. After ``_apply_path_rules`` runs, all
|
|
169
|
+
# POSIX ``/Users/<name>`` segments become ``/Users/[user]``. For the
|
|
170
|
+
# plugin-side flow the user wants the leading root path to become
|
|
171
|
+
# ``[user-home]`` when no cwd matched. We therefore run a final pass to
|
|
172
|
+
# collapse ``/Users/[user]``, ``/home/[user]``, and tilde forms to
|
|
173
|
+
# ``[user-home]`` ONLY when not already rewritten to ``[project]``.
|
|
174
|
+
_HOME_COLLAPSE_RE = re.compile(
|
|
175
|
+
r"(?:/Users/\[user\]|/home/\[user\]|C:\\Users\\\[user\]|/mnt/\[drive\]/Users/\[user\])"
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _collapse_home(text: str) -> str:
|
|
180
|
+
return _HOME_COLLAPSE_RE.sub("[user-home]", text)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
# ---------------------------------------------------------------------------
|
|
184
|
+
# C. Secret rules
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
_SECRET_RULES: list[tuple[re.Pattern[str], str]] = [
|
|
188
|
+
# PEM private keys (generic)
|
|
189
|
+
(
|
|
190
|
+
re.compile(
|
|
191
|
+
r"-----BEGIN[ \t]+(?:[A-Z0-9]{1,12}[ \t]){0,4}PRIVATE KEY-----"
|
|
192
|
+
r"[\s\S]{1,16384}?"
|
|
193
|
+
r"-----END[ \t]+(?:[A-Z0-9]{1,12}[ \t]){0,4}PRIVATE KEY-----"
|
|
194
|
+
),
|
|
195
|
+
"[REDACTED_PRIVATE_KEY]",
|
|
196
|
+
),
|
|
197
|
+
# PGP private key block
|
|
198
|
+
(
|
|
199
|
+
re.compile(
|
|
200
|
+
r"-----BEGIN PGP PRIVATE KEY BLOCK-----"
|
|
201
|
+
r"[\s\S]{1,16384}?"
|
|
202
|
+
r"-----END PGP PRIVATE KEY BLOCK-----"
|
|
203
|
+
),
|
|
204
|
+
"[REDACTED_PRIVATE_KEY]",
|
|
205
|
+
),
|
|
206
|
+
# OpenSSH private key
|
|
207
|
+
(
|
|
208
|
+
re.compile(
|
|
209
|
+
r"-----BEGIN OPENSSH PRIVATE KEY-----"
|
|
210
|
+
r"[\s\S]{1,16384}?"
|
|
211
|
+
r"-----END OPENSSH PRIVATE KEY-----"
|
|
212
|
+
),
|
|
213
|
+
"[REDACTED_PRIVATE_KEY]",
|
|
214
|
+
),
|
|
215
|
+
# JWT (header.payload.signature)
|
|
216
|
+
(
|
|
217
|
+
re.compile(
|
|
218
|
+
r"\beyJ[A-Za-z0-9_-]{10,4096}\.[A-Za-z0-9_-]{10,4096}"
|
|
219
|
+
r"(?:\.[A-Za-z0-9_-]{0,4096})?"
|
|
220
|
+
),
|
|
221
|
+
"[REDACTED_JWT]",
|
|
222
|
+
),
|
|
223
|
+
# Authorization: <anything>
|
|
224
|
+
(
|
|
225
|
+
re.compile(r"Authorization[ \t]*:[ \t]*[^\r\n]{1,4096}", re.IGNORECASE),
|
|
226
|
+
"Authorization: [REDACTED]",
|
|
227
|
+
),
|
|
228
|
+
# Bearer tokens
|
|
229
|
+
(
|
|
230
|
+
re.compile(r"\bBearer[ \t]+[A-Za-z0-9_\-.~+/]{8,4096}=*"),
|
|
231
|
+
"Bearer [REDACTED_TOKEN]",
|
|
232
|
+
),
|
|
233
|
+
# Basic auth embedded in URL: scheme://user:pass@host
|
|
234
|
+
(
|
|
235
|
+
re.compile(
|
|
236
|
+
r"\b([a-zA-Z][a-zA-Z0-9+.\-]{1,16}):"
|
|
237
|
+
r"//[^\s/:@]{1,256}:[^\s/@]{1,256}@"
|
|
238
|
+
),
|
|
239
|
+
r"\1://[REDACTED_BASIC_AUTH]@",
|
|
240
|
+
),
|
|
241
|
+
# DB connection strings (runs after basic-auth scrub above)
|
|
242
|
+
(
|
|
243
|
+
re.compile(
|
|
244
|
+
r"\b(?:postgres(?:ql)?|mysql|mongodb(?:\+srv)?|redis|amqp|amqps)"
|
|
245
|
+
r"[+\w]{0,16}://[^\s\"'`\)\]\}>]{1,2048}",
|
|
246
|
+
re.IGNORECASE,
|
|
247
|
+
),
|
|
248
|
+
"[REDACTED_CONNECTION_STRING]",
|
|
249
|
+
),
|
|
250
|
+
# Anthropic
|
|
251
|
+
(re.compile(r"\bsk-ant-[A-Za-z0-9_\-]{20,200}"), "[REDACTED_KEY]"),
|
|
252
|
+
(re.compile(r"\banthropic-api03-[A-Za-z0-9_\-]{20,200}"), "[REDACTED_KEY]"),
|
|
253
|
+
# OpenAI
|
|
254
|
+
(re.compile(r"\bsk-(?:proj-|svcacct-)?[A-Za-z0-9_\-]{20,200}"), "[REDACTED_KEY]"),
|
|
255
|
+
# Google API key
|
|
256
|
+
(re.compile(r"\bAIza[0-9A-Za-z_\-]{35}"), "[REDACTED_KEY]"),
|
|
257
|
+
# AWS access keys
|
|
258
|
+
(re.compile(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}"), "[REDACTED_KEY]"),
|
|
259
|
+
# GitHub
|
|
260
|
+
(re.compile(r"\bgh[pousr]_[A-Za-z0-9]{20,255}"), "[REDACTED_KEY]"),
|
|
261
|
+
(re.compile(r"\bgithub_pat_[A-Za-z0-9_]{20,255}"), "[REDACTED_KEY]"),
|
|
262
|
+
# GitLab
|
|
263
|
+
(re.compile(r"\bglpat-[A-Za-z0-9_\-]{20,255}"), "[REDACTED_KEY]"),
|
|
264
|
+
# Slack
|
|
265
|
+
(re.compile(r"\bxox[bpars]-[A-Za-z0-9\-]{10,255}"), "[REDACTED_KEY]"),
|
|
266
|
+
(re.compile(r"\bxapp-[A-Za-z0-9\-]{10,255}"), "[REDACTED_KEY]"),
|
|
267
|
+
# Stripe live/test secret/restricted/publishable
|
|
268
|
+
(re.compile(r"\b(?:sk|rk|pk)_live_[A-Za-z0-9]{20,255}"), "[REDACTED_KEY]"),
|
|
269
|
+
(re.compile(r"\b(?:sk|rk|pk)_test_[A-Za-z0-9]{20,255}"), "[REDACTED_KEY]"),
|
|
270
|
+
# Stripe legacy SK<32> / AC<32>
|
|
271
|
+
(re.compile(r"\b(?:SK|AC)[0-9a-fA-F]{32}\b"), "[REDACTED_KEY]"),
|
|
272
|
+
# Hugging Face
|
|
273
|
+
(re.compile(r"\bhf_[A-Za-z0-9]{20,255}"), "[REDACTED_KEY]"),
|
|
274
|
+
# xAI
|
|
275
|
+
(re.compile(r"\bxai-[A-Za-z0-9]{20,255}"), "[REDACTED_KEY]"),
|
|
276
|
+
# DigitalOcean PAT
|
|
277
|
+
(re.compile(r"\bdop_v1_[a-f0-9]{40,128}"), "[REDACTED_KEY]"),
|
|
278
|
+
# Generic env-var assignment lines: KEY=value
|
|
279
|
+
(
|
|
280
|
+
re.compile(
|
|
281
|
+
r"^([A-Z][A-Z0-9_]{2,64})[ \t]*=[ \t]*(?![ \t]*[{(])[^\r\n]{1,2048}$",
|
|
282
|
+
re.MULTILINE,
|
|
283
|
+
),
|
|
284
|
+
r"\1=[REDACTED]",
|
|
285
|
+
),
|
|
286
|
+
]
|
|
287
|
+
|
|
288
|
+
|
|
289
|
+
def _apply_secret_rules(text: str) -> str:
|
|
290
|
+
out = text
|
|
291
|
+
for pattern, repl in _SECRET_RULES:
|
|
292
|
+
out = pattern.sub(repl, out)
|
|
293
|
+
return out
|
|
294
|
+
|
|
295
|
+
|
|
296
|
+
# ---------------------------------------------------------------------------
|
|
297
|
+
# D. Entropy fallback
|
|
298
|
+
# ---------------------------------------------------------------------------
|
|
299
|
+
|
|
300
|
+
_SECRET_LABELS_RE = re.compile(
|
|
301
|
+
r"(api[_\-]?key|api[_\-]?secret|auth(?:orization)?|access[_\-]?token|"
|
|
302
|
+
r"refresh[_\-]?token|secret|password|passwd|passphrase|token|bearer|"
|
|
303
|
+
r"credential|client[_\-]?secret|private[_\-]?key)",
|
|
304
|
+
re.IGNORECASE,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
_ADJACENT_TOKEN_RE = re.compile(r"[A-Za-z0-9+/=._\-]{24,512}")
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
def _shannon_entropy(s: str) -> float:
|
|
311
|
+
if not s:
|
|
312
|
+
return 0.0
|
|
313
|
+
counts: dict[str, int] = {}
|
|
314
|
+
for ch in s:
|
|
315
|
+
counts[ch] = counts.get(ch, 0) + 1
|
|
316
|
+
length = len(s)
|
|
317
|
+
h = 0.0
|
|
318
|
+
for c in counts.values():
|
|
319
|
+
p = c / length
|
|
320
|
+
h -= p * math.log2(p)
|
|
321
|
+
return h
|
|
322
|
+
|
|
323
|
+
|
|
324
|
+
def _apply_entropy_rule(text: str) -> str:
|
|
325
|
+
out = text
|
|
326
|
+
replacements: list[tuple[int, int]] = []
|
|
327
|
+
for label in _SECRET_LABELS_RE.finditer(out):
|
|
328
|
+
label_end = label.end()
|
|
329
|
+
window = out[label_end : label_end + 256]
|
|
330
|
+
for tok in _ADJACENT_TOKEN_RE.finditer(window):
|
|
331
|
+
value = tok.group(0)
|
|
332
|
+
if value.startswith("[REDACTED"):
|
|
333
|
+
continue
|
|
334
|
+
if _shannon_entropy(value) >= 4.0:
|
|
335
|
+
start = label_end + tok.start()
|
|
336
|
+
replacements.append((start, start + len(value)))
|
|
337
|
+
# Mask only the first qualifying token per label.
|
|
338
|
+
break
|
|
339
|
+
if not replacements:
|
|
340
|
+
return out
|
|
341
|
+
# Apply right-to-left so indices remain valid.
|
|
342
|
+
replacements.sort(key=lambda r: r[0], reverse=True)
|
|
343
|
+
for start, end in replacements:
|
|
344
|
+
out = out[:start] + "[REDACTED_KEY]" + out[end:]
|
|
345
|
+
return out
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
# ---------------------------------------------------------------------------
|
|
349
|
+
# E. PII rules
|
|
350
|
+
# ---------------------------------------------------------------------------
|
|
351
|
+
|
|
352
|
+
# IPv4 with RFC1918 + loopback + link-local exemptions.
|
|
353
|
+
_IPV4_RE = re.compile(
|
|
354
|
+
r"\b"
|
|
355
|
+
r"(?!127\.)"
|
|
356
|
+
r"(?!10\.)"
|
|
357
|
+
r"(?!192\.168\.)"
|
|
358
|
+
r"(?!172\.(?:1[6-9]|2\d|3[01])\.)"
|
|
359
|
+
r"(?!169\.254\.)"
|
|
360
|
+
r"(?!0\.0\.0\.0\b)"
|
|
361
|
+
r"(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)"
|
|
362
|
+
r"(?:\.(?:25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}"
|
|
363
|
+
r"\b"
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
_IPV6_RE = re.compile(
|
|
367
|
+
r"\b(?:[0-9A-Fa-f]{1,4}:){2,7}[0-9A-Fa-f]{0,4}\b"
|
|
368
|
+
r"|\b(?:[0-9A-Fa-f]{1,4}:){1,7}:"
|
|
369
|
+
r"|\b::(?:[0-9A-Fa-f]{1,4}:){0,6}[0-9A-Fa-f]{1,4}\b"
|
|
370
|
+
)
|
|
371
|
+
|
|
372
|
+
_MAC_RE = re.compile(r"\b(?:[0-9A-Fa-f]{2}[:\-]){5}[0-9A-Fa-f]{2}\b")
|
|
373
|
+
|
|
374
|
+
_EMAIL_RE = re.compile(
|
|
375
|
+
r"[A-Za-z0-9._%+\-]{1,64}@[A-Za-z0-9.\-]{1,253}\.[A-Za-z]{2,24}"
|
|
376
|
+
)
|
|
377
|
+
|
|
378
|
+
# Shell prompt: user@host:path[$#]. We accept unicode letters/digits via
|
|
379
|
+
# \w (which includes _) and extend with the TS punctuation class.
|
|
380
|
+
_SHELL_PROMPT_RE = re.compile(
|
|
381
|
+
r"\b[\w.\-]{1,64}@[\w.\-]{1,253}:[^\s$#]{0,256}[$#]"
|
|
382
|
+
)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _mask_ipv6(match: "re.Match[str]") -> str:
|
|
386
|
+
m = match.group(0)
|
|
387
|
+
colons = m.count(":")
|
|
388
|
+
if colons < 2:
|
|
389
|
+
return m
|
|
390
|
+
if m == "::1":
|
|
391
|
+
return m
|
|
392
|
+
return "[ipv6]"
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
def _apply_pii_rules(text: str) -> str:
|
|
396
|
+
out = text
|
|
397
|
+
# Shell prompts first so user@host isn't chewed by email.
|
|
398
|
+
out = _SHELL_PROMPT_RE.sub("[shell-prompt]", out)
|
|
399
|
+
out = _EMAIL_RE.sub("[email]", out)
|
|
400
|
+
# MAC before IPv6: IPv6 regex would otherwise eat MAC sequences.
|
|
401
|
+
out = _MAC_RE.sub("[mac]", out)
|
|
402
|
+
out = _IPV6_RE.sub(_mask_ipv6, out)
|
|
403
|
+
out = _IPV4_RE.sub("[ip_address]", out)
|
|
404
|
+
return out
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# ---------------------------------------------------------------------------
|
|
408
|
+
# F. Username back-references
|
|
409
|
+
# ---------------------------------------------------------------------------
|
|
410
|
+
|
|
411
|
+
_USERNAME_EXTRACT_PATTERNS: list[re.Pattern[str]] = [
|
|
412
|
+
re.compile(r"/[Uu][Ss][Ee][Rr][Ss]/([" + USER_SEG_CHARS + r"]{2,64})"),
|
|
413
|
+
re.compile(r"/home/([" + USER_SEG_CHARS + r"]{2,64})"),
|
|
414
|
+
re.compile(
|
|
415
|
+
r"[A-Za-z]:\\[Uu][Ss][Ee][Rr][Ss]\\([" + USER_SEG_CHARS + r"]{2,64})"
|
|
416
|
+
),
|
|
417
|
+
re.compile(
|
|
418
|
+
r"/mnt/[a-zA-Z]/[Uu][Ss][Ee][Rr][Ss]/([" + USER_SEG_CHARS + r"]{2,64})"
|
|
419
|
+
),
|
|
420
|
+
]
|
|
421
|
+
|
|
422
|
+
_RESERVED_USERNAMES = frozenset(
|
|
423
|
+
{
|
|
424
|
+
"[user]",
|
|
425
|
+
"[path]",
|
|
426
|
+
"Public",
|
|
427
|
+
"Shared",
|
|
428
|
+
"Default",
|
|
429
|
+
"All Users",
|
|
430
|
+
"root",
|
|
431
|
+
"admin",
|
|
432
|
+
}
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
def _extract_usernames(text: str) -> set[str]:
|
|
437
|
+
found: set[str] = set()
|
|
438
|
+
for pattern in _USERNAME_EXTRACT_PATTERNS:
|
|
439
|
+
for match in pattern.finditer(text):
|
|
440
|
+
name = match.group(1)
|
|
441
|
+
if not name:
|
|
442
|
+
continue
|
|
443
|
+
if len(name) < 2 or len(name) > 64:
|
|
444
|
+
continue
|
|
445
|
+
if name in _RESERVED_USERNAMES:
|
|
446
|
+
continue
|
|
447
|
+
found.add(name)
|
|
448
|
+
return found
|
|
449
|
+
|
|
450
|
+
|
|
451
|
+
def _mask_bare_usernames(text: str, usernames: set[str]) -> str:
|
|
452
|
+
result = text
|
|
453
|
+
for username in usernames:
|
|
454
|
+
if len(username) < 3:
|
|
455
|
+
continue
|
|
456
|
+
escaped = re.escape(username)
|
|
457
|
+
# Hostname-style: alice-macbook, alice.dev
|
|
458
|
+
result = re.sub(
|
|
459
|
+
r"\b" + escaped
|
|
460
|
+
+ r"s?[-.](?:macbook|laptop|desktop|pc|dev|server|local)[\w.\-]{0,64}",
|
|
461
|
+
"[hostname]",
|
|
462
|
+
result,
|
|
463
|
+
flags=re.IGNORECASE,
|
|
464
|
+
)
|
|
465
|
+
# Bare standalone reference. Approximate JS \p{L}\p{N}_ boundaries
|
|
466
|
+
# using Python's \w (which in Python 3 covers unicode letters +
|
|
467
|
+
# digits + underscore).
|
|
468
|
+
result = re.sub(
|
|
469
|
+
r"(^|[^\w])" + escaped + r"(?=$|[^\w])",
|
|
470
|
+
r"\1[user]",
|
|
471
|
+
result,
|
|
472
|
+
)
|
|
473
|
+
return result
|
|
474
|
+
|
|
475
|
+
|
|
476
|
+
# ---------------------------------------------------------------------------
|
|
477
|
+
# Public API
|
|
478
|
+
# ---------------------------------------------------------------------------
|
|
479
|
+
|
|
480
|
+
|
|
481
|
+
def mask(
|
|
482
|
+
text: str,
|
|
483
|
+
cwds: list[str] | None = None,
|
|
484
|
+
home: str | None = None,
|
|
485
|
+
) -> str:
|
|
486
|
+
"""Mask sensitive data in a single string.
|
|
487
|
+
|
|
488
|
+
``cwds`` — optional list of working directories to rewrite to
|
|
489
|
+
``[project]``. Provide this for Claude Code session content so that
|
|
490
|
+
the actual project root becomes a stable placeholder regardless of
|
|
491
|
+
where the user cloned the repo.
|
|
492
|
+
|
|
493
|
+
``home`` — optional explicit home-directory path to rewrite to
|
|
494
|
+
``[user-home]`` early. If omitted, falls back to the path-rule
|
|
495
|
+
pipeline which still collapses home dirs via pattern matching.
|
|
496
|
+
"""
|
|
497
|
+
if not isinstance(text, str) or not text:
|
|
498
|
+
return text
|
|
499
|
+
|
|
500
|
+
out = sanitize.normalize(text)
|
|
501
|
+
|
|
502
|
+
# Shell prompts first (see TS: before path rules so prompt "$" doesn't
|
|
503
|
+
# get consumed).
|
|
504
|
+
out = _SHELL_PROMPT_RE.sub("[shell-prompt]", out)
|
|
505
|
+
|
|
506
|
+
# A. cwd -> [project]
|
|
507
|
+
if cwds:
|
|
508
|
+
out = rewrite_project(out, cwds)
|
|
509
|
+
|
|
510
|
+
# A'. explicit home rewrite (optional). Done after cwd so cwd wins
|
|
511
|
+
# when it is a subpath of home.
|
|
512
|
+
if home and isinstance(home, str) and home.strip() and home != "/":
|
|
513
|
+
home_clean = home.rstrip("/")
|
|
514
|
+
out = re.sub(
|
|
515
|
+
re.escape(home_clean) + _PROJECT_BOUNDARY,
|
|
516
|
+
"[user-home]",
|
|
517
|
+
out,
|
|
518
|
+
flags=re.IGNORECASE,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
# B. generic path rules
|
|
522
|
+
out = _apply_path_rules(out)
|
|
523
|
+
|
|
524
|
+
# B'. Collapse residual /Users/[user], C:\Users\[user], etc. to
|
|
525
|
+
# the shorter [user-home] placeholder for a nicer UX.
|
|
526
|
+
out = _collapse_home(out)
|
|
527
|
+
|
|
528
|
+
# C. secrets
|
|
529
|
+
out = _apply_secret_rules(out)
|
|
530
|
+
|
|
531
|
+
# D. entropy fallback
|
|
532
|
+
out = _apply_entropy_rule(out)
|
|
533
|
+
|
|
534
|
+
# E. PII
|
|
535
|
+
out = _apply_pii_rules(out)
|
|
536
|
+
|
|
537
|
+
# F. username back-references. Extract from normalized original so
|
|
538
|
+
# the path rule hasn't erased them yet.
|
|
539
|
+
normalized_original = sanitize.normalize(text)
|
|
540
|
+
usernames = _extract_usernames(normalized_original)
|
|
541
|
+
if usernames:
|
|
542
|
+
out = _mask_bare_usernames(out, usernames)
|
|
543
|
+
|
|
544
|
+
return out
|
|
545
|
+
|
|
546
|
+
|
|
547
|
+
def _mask_value_deep(value: Any, usernames: set[str], cwds: list[str] | None, home: str | None) -> Any:
|
|
548
|
+
if isinstance(value, str):
|
|
549
|
+
masked = mask(value, cwds=cwds, home=home)
|
|
550
|
+
if usernames:
|
|
551
|
+
masked = _mask_bare_usernames(masked, usernames)
|
|
552
|
+
return masked
|
|
553
|
+
if isinstance(value, list):
|
|
554
|
+
return [_mask_value_deep(v, usernames, cwds, home) for v in value]
|
|
555
|
+
if isinstance(value, dict):
|
|
556
|
+
return {k: _mask_value_deep(v, usernames, cwds, home) for k, v in value.items()}
|
|
557
|
+
return value
|
|
558
|
+
|
|
559
|
+
|
|
560
|
+
def _collect_block_strings(block: dict) -> list[str]:
|
|
561
|
+
"""Return every text-bearing string in a Claude Code content block so
|
|
562
|
+
the two-pass username scan can see them."""
|
|
563
|
+
parts: list[str] = []
|
|
564
|
+
|
|
565
|
+
def visit(v: Any) -> None:
|
|
566
|
+
if isinstance(v, str):
|
|
567
|
+
parts.append(v)
|
|
568
|
+
elif isinstance(v, list):
|
|
569
|
+
for x in v:
|
|
570
|
+
visit(x)
|
|
571
|
+
elif isinstance(v, dict):
|
|
572
|
+
for x in v.values():
|
|
573
|
+
visit(x)
|
|
574
|
+
|
|
575
|
+
btype = block.get("type")
|
|
576
|
+
if btype in ("text", "thinking", "tool_result"):
|
|
577
|
+
content = block.get("content") or block.get("text") or ""
|
|
578
|
+
if isinstance(content, str):
|
|
579
|
+
parts.append(content)
|
|
580
|
+
elif btype == "code":
|
|
581
|
+
parts.append(str(block.get("content", "")))
|
|
582
|
+
if block.get("filename"):
|
|
583
|
+
parts.append(str(block["filename"]))
|
|
584
|
+
elif btype == "file":
|
|
585
|
+
parts.append(str(block.get("content", "")))
|
|
586
|
+
parts.append(str(block.get("filename", "")))
|
|
587
|
+
elif btype == "error":
|
|
588
|
+
parts.append(str(block.get("message", "")))
|
|
589
|
+
elif btype == "artifact":
|
|
590
|
+
parts.append(str(block.get("content", "")))
|
|
591
|
+
parts.append(str(block.get("title", "")))
|
|
592
|
+
parts.append(str(block.get("identifier", "")))
|
|
593
|
+
elif btype == "tool_use":
|
|
594
|
+
visit(block.get("input"))
|
|
595
|
+
return parts
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def mask_block_content(
|
|
599
|
+
block: dict,
|
|
600
|
+
usernames: set[str],
|
|
601
|
+
*,
|
|
602
|
+
cwds: list[str] | None = None,
|
|
603
|
+
home: str | None = None,
|
|
604
|
+
) -> dict:
|
|
605
|
+
"""Mask a single content block. ``usernames`` is the pre-gathered
|
|
606
|
+
set from a two-pass scan so cross-block bare references are caught.
|
|
607
|
+
"""
|
|
608
|
+
btype = block.get("type")
|
|
609
|
+
|
|
610
|
+
def _mask_str(s: Any) -> Any:
|
|
611
|
+
if not isinstance(s, str):
|
|
612
|
+
return s
|
|
613
|
+
masked = mask(s, cwds=cwds, home=home)
|
|
614
|
+
if usernames:
|
|
615
|
+
masked = _mask_bare_usernames(masked, usernames)
|
|
616
|
+
return masked
|
|
617
|
+
|
|
618
|
+
if btype in ("text", "thinking", "tool_result"):
|
|
619
|
+
return {**block, "content": _mask_str(block.get("content", ""))}
|
|
620
|
+
if btype == "code":
|
|
621
|
+
out = {**block, "content": _mask_str(block.get("content", ""))}
|
|
622
|
+
if block.get("filename"):
|
|
623
|
+
out["filename"] = _mask_str(block["filename"])
|
|
624
|
+
return out
|
|
625
|
+
if btype == "file":
|
|
626
|
+
return {
|
|
627
|
+
**block,
|
|
628
|
+
"content": _mask_str(block.get("content", "")),
|
|
629
|
+
"filename": _mask_str(block.get("filename", "")),
|
|
630
|
+
}
|
|
631
|
+
if btype == "error":
|
|
632
|
+
return {**block, "message": _mask_str(block.get("message", ""))}
|
|
633
|
+
if btype == "artifact":
|
|
634
|
+
return {
|
|
635
|
+
**block,
|
|
636
|
+
"content": _mask_str(block.get("content", "")),
|
|
637
|
+
"title": _mask_str(block.get("title", "")),
|
|
638
|
+
}
|
|
639
|
+
if btype == "tool_use":
|
|
640
|
+
return {
|
|
641
|
+
**block,
|
|
642
|
+
"input": _mask_value_deep(block.get("input"), usernames, cwds, home),
|
|
643
|
+
}
|
|
644
|
+
# image, math, unknown: pass through.
|
|
645
|
+
return block
|
|
646
|
+
|
|
647
|
+
|
|
648
|
+
def mask_thread_blocks(
|
|
649
|
+
blocks: list[dict],
|
|
650
|
+
*,
|
|
651
|
+
cwds: list[str] | None = None,
|
|
652
|
+
home: str | None = None,
|
|
653
|
+
) -> list[dict]:
|
|
654
|
+
"""Two-pass masking over a list of Claude Code content blocks.
|
|
655
|
+
|
|
656
|
+
Pass 1 scans every text-bearing field to gather usernames so that a
|
|
657
|
+
bare reference in block B can be masked because the path appeared in
|
|
658
|
+
block A.
|
|
659
|
+
"""
|
|
660
|
+
haystack_parts: list[str] = []
|
|
661
|
+
for block in blocks:
|
|
662
|
+
haystack_parts.extend(_collect_block_strings(block))
|
|
663
|
+
haystack = sanitize.normalize("\n".join(haystack_parts))
|
|
664
|
+
usernames = _extract_usernames(haystack)
|
|
665
|
+
|
|
666
|
+
return [
|
|
667
|
+
mask_block_content(block, usernames, cwds=cwds, home=home)
|
|
668
|
+
for block in blocks
|
|
669
|
+
]
|