@ictechgy/context-guard 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.ko.md +92 -37
- package/README.md +111 -37
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +27 -12
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +2211 -121
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -2339
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,725 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Sanitize grep/diff/log output before it enters Claude context.
|
|
3
|
-
|
|
4
|
-
The helper can wrap a command while preserving its exit code, or sanitize stdin.
|
|
5
|
-
It redacts common credential patterns, anonymizes absolute paths by default, and
|
|
6
|
-
keeps only bounded head/anchor/tail context when output is too large.
|
|
7
|
-
"""
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import argparse
|
|
11
|
-
import collections
|
|
12
|
-
import hashlib
|
|
13
|
-
import os
|
|
14
|
-
from pathlib import PurePosixPath
|
|
15
|
-
import queue
|
|
16
|
-
import re
|
|
17
|
-
import signal
|
|
18
|
-
import subprocess
|
|
19
|
-
import sys
|
|
20
|
-
import threading
|
|
21
|
-
import time
|
|
22
|
-
from typing import Iterable, Iterator, TextIO
|
|
23
|
-
|
|
24
|
-
TERMINAL_CONTROL_RE = re.compile(
|
|
25
|
-
r"(?:"
|
|
26
|
-
r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|" # OSC title/clipboard controls
|
|
27
|
-
r"\x1b[@-_][0-?]*[ -/]*[@-~]|" # CSI and other ESC sequences
|
|
28
|
-
r"[\x00-\x08\x0b\x0c\x0d\x0e-\x1f\x7f-\x9f]"
|
|
29
|
-
r")"
|
|
30
|
-
)
|
|
31
|
-
# Match plausible absolute file paths without treating operators (`//`) or
|
|
32
|
-
# tiny string literals (`"/"`) as paths. Requiring at least one directory plus
|
|
33
|
-
# one leaf keeps the sanitizer from corrupting code while still anonymizing
|
|
34
|
-
# common grep/test output like /Users/me/project/app.py:12.
|
|
35
|
-
PATH_SEGMENT = r"[A-Za-z0-9._~+\-]+"
|
|
36
|
-
ABSOLUTE_PATH_RE = re.compile(
|
|
37
|
-
rf"(?P<prefix>^|[\s('\"=])(?P<path>/(?:{PATH_SEGMENT}/)+{PATH_SEGMENT})"
|
|
38
|
-
)
|
|
39
|
-
WINDOWS_PATH_RE = re.compile(
|
|
40
|
-
rf"(?P<prefix>^|[\s('\"=])(?P<path>[A-Za-z]:\\(?:{PATH_SEGMENT}\\)+{PATH_SEGMENT})"
|
|
41
|
-
)
|
|
42
|
-
PRIVATE_KEY_BEGIN_RE = re.compile(
|
|
43
|
-
r"-----BEGIN (?:[A-Z0-9 ]*PRIVATE KEY|OPENSSH PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----"
|
|
44
|
-
)
|
|
45
|
-
PRIVATE_KEY_END_RE = re.compile(
|
|
46
|
-
r"-----END (?:[A-Z0-9 ]*PRIVATE KEY|OPENSSH PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----"
|
|
47
|
-
)
|
|
48
|
-
AUTH_HEADER_RE = re.compile(
|
|
49
|
-
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
|
|
50
|
-
)
|
|
51
|
-
SECRET_KEY = (
|
|
52
|
-
r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
|
|
53
|
-
r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
|
|
54
|
-
r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
|
|
55
|
-
r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
|
|
56
|
-
)
|
|
57
|
-
INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
58
|
-
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
59
|
-
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
60
|
-
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
61
|
-
rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
|
|
62
|
-
)
|
|
63
|
-
INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
64
|
-
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
65
|
-
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
66
|
-
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
67
|
-
rf"(?P<value>[^\s,;}}\]]+)"
|
|
68
|
-
)
|
|
69
|
-
URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
|
|
70
|
-
URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
|
|
71
|
-
SAFE_UNQUOTED_VALUES = {
|
|
72
|
-
"[redacted]",
|
|
73
|
-
"false",
|
|
74
|
-
"none",
|
|
75
|
-
"null",
|
|
76
|
-
"os.getenv",
|
|
77
|
-
"process.env",
|
|
78
|
-
"true",
|
|
79
|
-
"undefined",
|
|
80
|
-
}
|
|
81
|
-
IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
|
|
82
|
-
INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
|
83
|
-
(re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
84
|
-
(re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
85
|
-
(re.compile(rf"(?i)([?&#](?:{SECRET_KEY})=)[^\s&#;]+"), r"\1[REDACTED]"),
|
|
86
|
-
(re.compile(r"(?i)(--(?:api[_-]?key|token|secret|password|client[_-]?secret)\s+)\S+"), r"\1[REDACTED]"),
|
|
87
|
-
(re.compile(r"(?i)(--(?:api[_-]?key|token|secret|password|client[_-]?secret)=)\S+"), r"\1[REDACTED]"),
|
|
88
|
-
(re.compile(r"(?i)((?:-p|-u|--user)\s+)\S+:\S+"), r"\1[REDACTED]"),
|
|
89
|
-
(re.compile(r"gh[pousr]_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
|
|
90
|
-
(re.compile(r"github_pat_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
|
|
91
|
-
(re.compile(r"glpat-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
|
|
92
|
-
(re.compile(r"xox[abprs]-[A-Za-z0-9-]{10,}"), "[REDACTED]"),
|
|
93
|
-
(re.compile(r"(?:AKIA|ASIA)[0-9A-Z]{16}"), "[REDACTED]"),
|
|
94
|
-
(re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}"), "[REDACTED]"),
|
|
95
|
-
(re.compile(r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
|
|
96
|
-
(re.compile(r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}"), "[REDACTED]"),
|
|
97
|
-
(re.compile(r"npm_[A-Za-z0-9]{20,}"), "[REDACTED]"),
|
|
98
|
-
(re.compile(r"AIza[0-9A-Za-z_\-]{20,}"), "[REDACTED]"),
|
|
99
|
-
(re.compile(r"SG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}"), "[REDACTED]"),
|
|
100
|
-
(re.compile(r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "[REDACTED]"),
|
|
101
|
-
(re.compile(r"([a-z][a-z0-9+.-]*://)[^/\s:@]+:[^/\s@]+@", re.IGNORECASE), r"\1[REDACTED]@"),
|
|
102
|
-
)
|
|
103
|
-
ANCHOR_RE = re.compile(
|
|
104
|
-
r"^(?:diff --git |index [0-9a-f]|--- |\+\+\+ |@@ |Binary files |(?:[^:\n]+):\d+(?::\d+)?:)",
|
|
105
|
-
re.IGNORECASE,
|
|
106
|
-
)
|
|
107
|
-
SECRET_WORD_RE = re.compile(r"(?i)\b(api[_-]?key|token|secret|password|private[_-]?key|client[_-]?secret)\b")
|
|
108
|
-
MAX_LINES_LIMIT = 5_000
|
|
109
|
-
MAX_CHARS_LIMIT = 1_000_000
|
|
110
|
-
MAX_LINE_CHARS_LIMIT = 100_000
|
|
111
|
-
MAX_SECTION_LINES_LIMIT = 2_000
|
|
112
|
-
DEFAULT_TIMEOUT_SECONDS = 600
|
|
113
|
-
MAX_TIMEOUT_SECONDS = 86_400
|
|
114
|
-
TIMEOUT_EXIT_CODE = 124
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
118
|
-
try:
|
|
119
|
-
number = int(value)
|
|
120
|
-
except (TypeError, ValueError, OverflowError):
|
|
121
|
-
return default
|
|
122
|
-
return min(max(number, minimum), maximum)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def normalize_budgets(args: argparse.Namespace) -> None:
|
|
126
|
-
args.max_lines = bounded_int(args.max_lines, 240, 1, MAX_LINES_LIMIT)
|
|
127
|
-
args.max_chars = bounded_int(args.max_chars, 24000, 1, MAX_CHARS_LIMIT)
|
|
128
|
-
args.max_line_chars = bounded_int(args.max_line_chars, 3000, 1, MAX_LINE_CHARS_LIMIT)
|
|
129
|
-
args.head_lines = bounded_int(args.head_lines, 50, 0, MAX_SECTION_LINES_LIMIT)
|
|
130
|
-
args.tail_lines = bounded_int(args.tail_lines, 90, 0, MAX_SECTION_LINES_LIMIT)
|
|
131
|
-
args.anchor_lines = bounded_int(args.anchor_lines, 80, 0, MAX_SECTION_LINES_LIMIT)
|
|
132
|
-
args.timeout_seconds = bounded_int(
|
|
133
|
-
args.timeout_seconds,
|
|
134
|
-
DEFAULT_TIMEOUT_SECONDS,
|
|
135
|
-
1,
|
|
136
|
-
MAX_TIMEOUT_SECONDS,
|
|
137
|
-
)
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
def strip_ansi(text: str) -> str:
|
|
141
|
-
return TERMINAL_CONTROL_RE.sub("", text)
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
def stable_hash(value: str, length: int = 12) -> str:
|
|
145
|
-
return hashlib.sha256(value.encode("utf-8", errors="replace")).hexdigest()[:length]
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def anonymize_absolute_paths(text: str) -> str:
|
|
149
|
-
def repl(match: re.Match[str]) -> str:
|
|
150
|
-
prefix = match.group("prefix")
|
|
151
|
-
path = match.group("path")
|
|
152
|
-
normalized = path.replace("\\", "/")
|
|
153
|
-
name = PurePosixPath(normalized).name or "path"
|
|
154
|
-
return f"{prefix}{name}#path:{stable_hash(path)}"
|
|
155
|
-
|
|
156
|
-
text = ABSOLUTE_PATH_RE.sub(repl, text)
|
|
157
|
-
return WINDOWS_PATH_RE.sub(repl, text)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
|
|
161
|
-
if max_line_chars <= 0 or len(line) <= max_line_chars:
|
|
162
|
-
return line, False
|
|
163
|
-
newline = "\n" if line.endswith("\n") else ""
|
|
164
|
-
body = line[:-1] if newline else line
|
|
165
|
-
marker = f"...[line trimmed: {len(body)} chars]"
|
|
166
|
-
keep = max(0, max_line_chars - len(marker) - len(newline))
|
|
167
|
-
return body[:keep] + marker + newline, True
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
|
|
171
|
-
value = match.group("value").strip()
|
|
172
|
-
if not value:
|
|
173
|
-
return False
|
|
174
|
-
if value.lower() in SAFE_UNQUOTED_VALUES:
|
|
175
|
-
return False
|
|
176
|
-
if IDENTIFIER_CHAIN_RE.match(value):
|
|
177
|
-
return False
|
|
178
|
-
end = match.end("value")
|
|
179
|
-
if end < len(line) and line[end] in "([{":
|
|
180
|
-
# Likely a function call or expression (`api_key = os.getenv(...)`);
|
|
181
|
-
# preserve it so Claude can still reason about code flow.
|
|
182
|
-
return False
|
|
183
|
-
if any(ch in value for ch in "()[]{}"):
|
|
184
|
-
return False
|
|
185
|
-
return True
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
def redact_url_like_secret_params(line: str) -> tuple[str, bool]:
|
|
189
|
-
redacted = False
|
|
190
|
-
|
|
191
|
-
def url_repl(match: re.Match[str]) -> str:
|
|
192
|
-
nonlocal redacted
|
|
193
|
-
url, count = URL_SECRET_PARAM_RE.subn(r"\1[REDACTED]", match.group(0))
|
|
194
|
-
if count:
|
|
195
|
-
redacted = True
|
|
196
|
-
return url
|
|
197
|
-
|
|
198
|
-
return URL_LIKE_RE.sub(url_repl, line), redacted
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
def redact_secret_assignments(line: str) -> tuple[str, bool]:
|
|
202
|
-
line, redacted = redact_url_like_secret_params(line)
|
|
203
|
-
|
|
204
|
-
def quoted_repl(match: re.Match[str]) -> str:
|
|
205
|
-
nonlocal redacted
|
|
206
|
-
redacted = True
|
|
207
|
-
return f"{match.group('lead')}{match.group('prefix')}{match.group('quote')}[REDACTED]{match.group('quote')}"
|
|
208
|
-
|
|
209
|
-
def unquoted_repl(match: re.Match[str]) -> str:
|
|
210
|
-
nonlocal redacted
|
|
211
|
-
if not should_redact_unquoted_secret_value(line, match):
|
|
212
|
-
return match.group(0)
|
|
213
|
-
redacted = True
|
|
214
|
-
return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
|
|
215
|
-
|
|
216
|
-
line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
|
|
217
|
-
line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
218
|
-
return line, redacted
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
|
|
222
|
-
rf"(?i)(?:^|[\s;{{\[,])(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
223
|
-
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<quote>[\"'])"
|
|
224
|
-
)
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
def find_unescaped_quote_end(text: str, quote: str, start: int = 0) -> int | None:
|
|
228
|
-
"""Return the index after the first unescaped quote delimiter, if present."""
|
|
229
|
-
escaped = False
|
|
230
|
-
for index, char in enumerate(text[start:], start=start):
|
|
231
|
-
if escaped:
|
|
232
|
-
escaped = False
|
|
233
|
-
continue
|
|
234
|
-
if char == "\\":
|
|
235
|
-
escaped = True
|
|
236
|
-
continue
|
|
237
|
-
if char == quote:
|
|
238
|
-
return index + 1
|
|
239
|
-
return None
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
def has_unescaped_quote(text: str, quote: str, start: int = 0) -> bool:
|
|
243
|
-
"""Return True when text contains an unescaped quote delimiter."""
|
|
244
|
-
return find_unescaped_quote_end(text, quote, start) is not None
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
def detect_multiline_secret_assignment(line: str) -> str | None:
|
|
248
|
-
"""Return the quote delimiter when any secret assignment starts a multiline value."""
|
|
249
|
-
for marker in MULTILINE_SECRET_ASSIGNMENT_RE.finditer(line):
|
|
250
|
-
quote = marker.group("quote")
|
|
251
|
-
if not has_unescaped_quote(line, quote, marker.end("quote")):
|
|
252
|
-
return quote
|
|
253
|
-
return None
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def private_key_state_after_line(line: str) -> bool | None:
|
|
257
|
-
"""Return updated private-key state for a line, or None when no marker appears."""
|
|
258
|
-
if PRIVATE_KEY_BEGIN_RE.search(line):
|
|
259
|
-
return not bool(PRIVATE_KEY_END_RE.search(line))
|
|
260
|
-
if PRIVATE_KEY_END_RE.search(line):
|
|
261
|
-
return False
|
|
262
|
-
return None
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
def secret_or_private_key_redaction_label(line: str) -> str:
|
|
266
|
-
if PRIVATE_KEY_BEGIN_RE.search(line) or PRIVATE_KEY_END_RE.search(line):
|
|
267
|
-
return "[REDACTED PRIVATE KEY BLOCK]\n"
|
|
268
|
-
return "[REDACTED MULTILINE SECRET]\n"
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
class LineSanitizer:
|
|
272
|
-
def __init__(self, *, show_paths: bool = False) -> None:
|
|
273
|
-
self.show_paths = show_paths
|
|
274
|
-
self.in_private_key_block = False
|
|
275
|
-
self.multiline_secret_quote: str | None = None
|
|
276
|
-
self.redactions = 0
|
|
277
|
-
|
|
278
|
-
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
279
|
-
line = strip_ansi(raw_line)
|
|
280
|
-
redacted = False
|
|
281
|
-
diff_prefix = ""
|
|
282
|
-
stripped_for_key = line.lstrip()
|
|
283
|
-
if stripped_for_key.startswith(('+', '-')):
|
|
284
|
-
diff_prefix = stripped_for_key[0]
|
|
285
|
-
|
|
286
|
-
if self.multiline_secret_quote is not None:
|
|
287
|
-
redacted = True
|
|
288
|
-
label = "[REDACTED PRIVATE KEY BLOCK]\n" if (
|
|
289
|
-
self.in_private_key_block or PRIVATE_KEY_BEGIN_RE.search(line) or PRIVATE_KEY_END_RE.search(line)
|
|
290
|
-
) else "[REDACTED MULTILINE SECRET]\n"
|
|
291
|
-
key_state = private_key_state_after_line(line)
|
|
292
|
-
if key_state is not None:
|
|
293
|
-
self.in_private_key_block = key_state
|
|
294
|
-
closing_index = find_unescaped_quote_end(line, self.multiline_secret_quote)
|
|
295
|
-
if closing_index is not None:
|
|
296
|
-
self.multiline_secret_quote = detect_multiline_secret_assignment(line[closing_index:])
|
|
297
|
-
return self._finish(diff_prefix + label, redacted)
|
|
298
|
-
|
|
299
|
-
if self.in_private_key_block:
|
|
300
|
-
redacted = True
|
|
301
|
-
multiline_quote = detect_multiline_secret_assignment(line)
|
|
302
|
-
if multiline_quote is not None:
|
|
303
|
-
self.multiline_secret_quote = multiline_quote
|
|
304
|
-
if PRIVATE_KEY_END_RE.search(line):
|
|
305
|
-
self.in_private_key_block = False
|
|
306
|
-
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
307
|
-
|
|
308
|
-
multiline_quote = detect_multiline_secret_assignment(line)
|
|
309
|
-
if multiline_quote is not None:
|
|
310
|
-
self.multiline_secret_quote = multiline_quote
|
|
311
|
-
key_state = private_key_state_after_line(line)
|
|
312
|
-
if key_state is not None:
|
|
313
|
-
self.in_private_key_block = key_state
|
|
314
|
-
return self._finish(diff_prefix + secret_or_private_key_redaction_label(line), True)
|
|
315
|
-
|
|
316
|
-
if PRIVATE_KEY_BEGIN_RE.search(line):
|
|
317
|
-
redacted = True
|
|
318
|
-
if not PRIVATE_KEY_END_RE.search(line):
|
|
319
|
-
self.in_private_key_block = True
|
|
320
|
-
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
321
|
-
|
|
322
|
-
new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
|
|
323
|
-
if count:
|
|
324
|
-
redacted = True
|
|
325
|
-
line = new_line
|
|
326
|
-
|
|
327
|
-
line, assignment_redacted = redact_secret_assignments(line)
|
|
328
|
-
if assignment_redacted:
|
|
329
|
-
redacted = True
|
|
330
|
-
|
|
331
|
-
for pattern, replacement in INLINE_PATTERNS:
|
|
332
|
-
line, count = pattern.subn(replacement, line)
|
|
333
|
-
if count:
|
|
334
|
-
redacted = True
|
|
335
|
-
|
|
336
|
-
return self._finish(line, redacted)
|
|
337
|
-
|
|
338
|
-
def _finish(self, line: str, redacted: bool) -> tuple[str, bool]:
|
|
339
|
-
if redacted:
|
|
340
|
-
self.redactions += 1
|
|
341
|
-
if not self.show_paths:
|
|
342
|
-
line = anonymize_absolute_paths(line)
|
|
343
|
-
return line, redacted
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
class BoundedOutput:
|
|
347
|
-
def __init__(
|
|
348
|
-
self,
|
|
349
|
-
*,
|
|
350
|
-
max_lines: int,
|
|
351
|
-
max_chars: int,
|
|
352
|
-
max_line_chars: int,
|
|
353
|
-
head_lines: int,
|
|
354
|
-
tail_lines: int,
|
|
355
|
-
anchor_lines: int,
|
|
356
|
-
) -> None:
|
|
357
|
-
self.max_lines = max_lines
|
|
358
|
-
self.max_chars = max_chars
|
|
359
|
-
self.max_line_chars = max_line_chars
|
|
360
|
-
self.head_limit = max(0, head_lines)
|
|
361
|
-
self.tail = collections.deque(maxlen=max(0, tail_lines))
|
|
362
|
-
self.anchor_limit = max(0, anchor_lines)
|
|
363
|
-
self.head: list[str] = []
|
|
364
|
-
self.anchors: list[str] = []
|
|
365
|
-
self.anchor_seen: set[str] = set()
|
|
366
|
-
self.full: list[str] = []
|
|
367
|
-
self.line_count = 0
|
|
368
|
-
self.raw_chars = 0
|
|
369
|
-
self.visible_chars = 0
|
|
370
|
-
self.line_caps = 0
|
|
371
|
-
self.trimmed = False
|
|
372
|
-
|
|
373
|
-
def add(self, raw_line: str, sanitized_line: str, *, redacted: bool) -> None:
|
|
374
|
-
self.line_count += 1
|
|
375
|
-
self.raw_chars += len(raw_line)
|
|
376
|
-
capped, was_capped = cap_line(sanitized_line, self.max_line_chars)
|
|
377
|
-
if was_capped:
|
|
378
|
-
self.line_caps += 1
|
|
379
|
-
self.visible_chars += len(capped)
|
|
380
|
-
|
|
381
|
-
if len(self.head) < self.head_limit:
|
|
382
|
-
self.head.append(capped)
|
|
383
|
-
self.tail.append(capped)
|
|
384
|
-
if self._is_anchor(capped, redacted):
|
|
385
|
-
key = capped.rstrip("\n")
|
|
386
|
-
if key not in self.anchor_seen and len(self.anchors) < self.anchor_limit:
|
|
387
|
-
self.anchor_seen.add(key)
|
|
388
|
-
self.anchors.append(capped)
|
|
389
|
-
|
|
390
|
-
if not self.trimmed:
|
|
391
|
-
self.full.append(capped)
|
|
392
|
-
if (self.max_lines > 0 and self.line_count > self.max_lines) or (
|
|
393
|
-
self.max_chars > 0 and self.visible_chars > self.max_chars
|
|
394
|
-
):
|
|
395
|
-
self.trimmed = True
|
|
396
|
-
|
|
397
|
-
def _is_anchor(self, line: str, redacted: bool) -> bool:
|
|
398
|
-
return redacted or bool(ANCHOR_RE.search(line)) or bool(SECRET_WORD_RE.search(line))
|
|
399
|
-
|
|
400
|
-
def render(self, redactions: int) -> str:
|
|
401
|
-
if not self.trimmed:
|
|
402
|
-
return "".join(self.full)
|
|
403
|
-
|
|
404
|
-
lines_budget = self.max_lines if self.max_lines > 0 else 240
|
|
405
|
-
remaining = max(0, lines_budget - 8)
|
|
406
|
-
head_n = min(len(self.head), max(1, remaining // 3) if remaining else 0)
|
|
407
|
-
anchor_n = min(len(self.anchors), max(0, remaining // 3))
|
|
408
|
-
tail_n = min(len(self.tail), max(0, remaining - head_n - anchor_n))
|
|
409
|
-
|
|
410
|
-
rendered: list[str] = [
|
|
411
|
-
(
|
|
412
|
-
"[context-guard-kit] sanitized output trimmed: "
|
|
413
|
-
f"lines={self.line_count} raw_chars={self.raw_chars} "
|
|
414
|
-
f"sanitized_chars={self.visible_chars} redacted_lines={redactions} "
|
|
415
|
-
f"line_caps={self.line_caps}\n"
|
|
416
|
-
)
|
|
417
|
-
]
|
|
418
|
-
if head_n:
|
|
419
|
-
rendered.append(f"--- head ({head_n} lines) ---\n")
|
|
420
|
-
rendered.extend(self.head[:head_n])
|
|
421
|
-
if anchor_n:
|
|
422
|
-
rendered.append(f"--- grep/diff/security anchors ({anchor_n} lines) ---\n")
|
|
423
|
-
rendered.extend(self.anchors[:anchor_n])
|
|
424
|
-
if tail_n:
|
|
425
|
-
rendered.append(f"--- tail ({tail_n} lines) ---\n")
|
|
426
|
-
rendered.extend(list(self.tail)[-tail_n:])
|
|
427
|
-
text = "".join(rendered)
|
|
428
|
-
if self.max_chars > 0 and len(text) > self.max_chars:
|
|
429
|
-
marker = f"\n[context-guard-kit] rendered sanitized summary capped: {len(text)} chars\n"
|
|
430
|
-
keep = max(0, self.max_chars - len(marker))
|
|
431
|
-
text = text[:keep].rstrip() + marker
|
|
432
|
-
return text
|
|
433
|
-
|
|
434
|
-
|
|
435
|
-
def sanitize_stream(stream: Iterable[str], args: argparse.Namespace) -> tuple[str, int, int]:
|
|
436
|
-
sanitizer = LineSanitizer(show_paths=args.show_paths)
|
|
437
|
-
bounded = BoundedOutput(
|
|
438
|
-
max_lines=args.max_lines,
|
|
439
|
-
max_chars=args.max_chars,
|
|
440
|
-
max_line_chars=args.max_line_chars,
|
|
441
|
-
head_lines=args.head_lines,
|
|
442
|
-
tail_lines=args.tail_lines,
|
|
443
|
-
anchor_lines=args.anchor_lines,
|
|
444
|
-
)
|
|
445
|
-
for raw_line in stream:
|
|
446
|
-
sanitized, redacted = sanitizer.sanitize(raw_line)
|
|
447
|
-
bounded.add(raw_line, sanitized, redacted=redacted)
|
|
448
|
-
return bounded.render(sanitizer.redactions), sanitizer.redactions, bounded.line_count
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
_STREAM_END = object()
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
def process_group_exists(pgid: int) -> bool:
|
|
455
|
-
try:
|
|
456
|
-
os.killpg(pgid, 0)
|
|
457
|
-
except ProcessLookupError:
|
|
458
|
-
return False
|
|
459
|
-
except PermissionError:
|
|
460
|
-
return True
|
|
461
|
-
except OSError:
|
|
462
|
-
return False
|
|
463
|
-
return True
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
def terminate_process_tree(
|
|
467
|
-
proc: subprocess.Popen[str],
|
|
468
|
-
*,
|
|
469
|
-
process_group_id: int | None = None,
|
|
470
|
-
include_exited_group: bool = False,
|
|
471
|
-
) -> None:
|
|
472
|
-
if os.name != "nt":
|
|
473
|
-
pgid = process_group_id if process_group_id is not None else proc.pid
|
|
474
|
-
if proc.poll() is not None and not include_exited_group:
|
|
475
|
-
return
|
|
476
|
-
try:
|
|
477
|
-
os.killpg(pgid, signal.SIGTERM)
|
|
478
|
-
except ProcessLookupError:
|
|
479
|
-
return
|
|
480
|
-
deadline = time.monotonic() + 2
|
|
481
|
-
while time.monotonic() < deadline:
|
|
482
|
-
if proc.poll() is None:
|
|
483
|
-
try:
|
|
484
|
-
proc.wait(timeout=0.05)
|
|
485
|
-
except subprocess.TimeoutExpired:
|
|
486
|
-
pass
|
|
487
|
-
if not process_group_exists(pgid):
|
|
488
|
-
return
|
|
489
|
-
time.sleep(0.05)
|
|
490
|
-
try:
|
|
491
|
-
os.killpg(pgid, signal.SIGKILL)
|
|
492
|
-
except ProcessLookupError:
|
|
493
|
-
return
|
|
494
|
-
return
|
|
495
|
-
|
|
496
|
-
if proc.poll() is not None:
|
|
497
|
-
return
|
|
498
|
-
try:
|
|
499
|
-
proc.terminate()
|
|
500
|
-
except ProcessLookupError:
|
|
501
|
-
return
|
|
502
|
-
except OSError:
|
|
503
|
-
try:
|
|
504
|
-
proc.kill()
|
|
505
|
-
except OSError:
|
|
506
|
-
return
|
|
507
|
-
try:
|
|
508
|
-
proc.wait(timeout=2)
|
|
509
|
-
return
|
|
510
|
-
except subprocess.TimeoutExpired:
|
|
511
|
-
pass
|
|
512
|
-
try:
|
|
513
|
-
proc.kill()
|
|
514
|
-
except ProcessLookupError:
|
|
515
|
-
return
|
|
516
|
-
except OSError:
|
|
517
|
-
return
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
class TimedCommandStream:
|
|
521
|
-
def __init__(
|
|
522
|
-
self,
|
|
523
|
-
proc: subprocess.Popen[str],
|
|
524
|
-
stdout: TextIO,
|
|
525
|
-
*,
|
|
526
|
-
timeout_seconds: int,
|
|
527
|
-
process_group_id: int | None = None,
|
|
528
|
-
) -> None:
|
|
529
|
-
self.proc = proc
|
|
530
|
-
self.timeout_seconds = timeout_seconds
|
|
531
|
-
self.process_group_id = process_group_id
|
|
532
|
-
self.deadline = time.monotonic() + timeout_seconds
|
|
533
|
-
self.timed_out = False
|
|
534
|
-
self.timeout_reported = False
|
|
535
|
-
self._stream_closed = False
|
|
536
|
-
self._queue: queue.Queue[str | object] = queue.Queue(maxsize=1024)
|
|
537
|
-
self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
|
|
538
|
-
self._thread.start()
|
|
539
|
-
|
|
540
|
-
def _read_stdout(self, stdout: TextIO) -> None:
|
|
541
|
-
try:
|
|
542
|
-
for line in stdout:
|
|
543
|
-
self._queue.put(line)
|
|
544
|
-
finally:
|
|
545
|
-
self._stream_closed = True
|
|
546
|
-
self._queue.put(_STREAM_END)
|
|
547
|
-
|
|
548
|
-
def timeout_message(self) -> str:
|
|
549
|
-
return (
|
|
550
|
-
f"[context-guard-kit] command timed out after {self.timeout_seconds}s; "
|
|
551
|
-
"terminated wrapped process\n"
|
|
552
|
-
)
|
|
553
|
-
|
|
554
|
-
def _mark_timed_out(self) -> None:
|
|
555
|
-
if not self.timed_out:
|
|
556
|
-
self.timed_out = True
|
|
557
|
-
terminate_process_tree(
|
|
558
|
-
self.proc,
|
|
559
|
-
process_group_id=self.process_group_id,
|
|
560
|
-
include_exited_group=True,
|
|
561
|
-
)
|
|
562
|
-
|
|
563
|
-
def _timeout_line(self) -> str:
|
|
564
|
-
self._mark_timed_out()
|
|
565
|
-
self.timeout_reported = True
|
|
566
|
-
return self.timeout_message()
|
|
567
|
-
|
|
568
|
-
def __iter__(self) -> Iterator[str]:
|
|
569
|
-
while True:
|
|
570
|
-
remaining = self.deadline - time.monotonic()
|
|
571
|
-
wait_time = 0.05 if self.proc.poll() is not None or self.timed_out else min(0.05, max(0.0, remaining))
|
|
572
|
-
try:
|
|
573
|
-
item = self._queue.get(timeout=wait_time)
|
|
574
|
-
except queue.Empty:
|
|
575
|
-
if remaining <= 0 and not self._stream_closed:
|
|
576
|
-
if not self.timeout_reported:
|
|
577
|
-
yield self._timeout_line()
|
|
578
|
-
break
|
|
579
|
-
continue
|
|
580
|
-
if item is _STREAM_END:
|
|
581
|
-
break
|
|
582
|
-
if not isinstance(item, str):
|
|
583
|
-
continue
|
|
584
|
-
yield item
|
|
585
|
-
if not self._stream_closed and time.monotonic() >= self.deadline:
|
|
586
|
-
if not self.timeout_reported:
|
|
587
|
-
yield self._timeout_line()
|
|
588
|
-
break
|
|
589
|
-
|
|
590
|
-
def returncode(self) -> int:
|
|
591
|
-
if self.timed_out:
|
|
592
|
-
return TIMEOUT_EXIT_CODE
|
|
593
|
-
remaining = self.deadline - time.monotonic()
|
|
594
|
-
try:
|
|
595
|
-
return self.proc.wait(timeout=max(0.0, remaining))
|
|
596
|
-
except subprocess.TimeoutExpired:
|
|
597
|
-
self._mark_timed_out()
|
|
598
|
-
return TIMEOUT_EXIT_CODE
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
|
|
602
|
-
if os.name == "nt":
|
|
603
|
-
return None
|
|
604
|
-
try:
|
|
605
|
-
return os.getpgid(proc.pid)
|
|
606
|
-
except ProcessLookupError:
|
|
607
|
-
# start_new_session=True makes the child the group leader; if it exits
|
|
608
|
-
# before getpgid(), the group id is still the leader pid while inherited
|
|
609
|
-
# stdout descendants remain alive.
|
|
610
|
-
return proc.pid
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
def run_command(
|
|
614
|
-
command: list[str],
|
|
615
|
-
timeout_seconds: int,
|
|
616
|
-
) -> tuple[Iterable[str], subprocess.Popen[str] | None, int | None]:
|
|
617
|
-
popen_kwargs: dict[str, object] = {}
|
|
618
|
-
if os.name != "nt":
|
|
619
|
-
popen_kwargs["start_new_session"] = True
|
|
620
|
-
try:
|
|
621
|
-
proc = subprocess.Popen(
|
|
622
|
-
command,
|
|
623
|
-
stdout=subprocess.PIPE,
|
|
624
|
-
stderr=subprocess.STDOUT,
|
|
625
|
-
text=True,
|
|
626
|
-
bufsize=1,
|
|
627
|
-
errors="replace",
|
|
628
|
-
**popen_kwargs,
|
|
629
|
-
)
|
|
630
|
-
except OSError as exc:
|
|
631
|
-
print(f"context-guard-sanitize-output: command failed to start: {exc}", file=sys.stderr)
|
|
632
|
-
return [], None, 127
|
|
633
|
-
if proc.stdout is None:
|
|
634
|
-
print("context-guard-sanitize-output: subprocess produced no stdout pipe", file=sys.stderr)
|
|
635
|
-
return [], proc, 1
|
|
636
|
-
return (
|
|
637
|
-
TimedCommandStream(
|
|
638
|
-
proc,
|
|
639
|
-
proc.stdout,
|
|
640
|
-
timeout_seconds=timeout_seconds,
|
|
641
|
-
process_group_id=process_group_id_for(proc),
|
|
642
|
-
),
|
|
643
|
-
proc,
|
|
644
|
-
None,
|
|
645
|
-
)
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
def stdin_has_data(stdin: TextIO) -> bool:
|
|
649
|
-
return not stdin.isatty()
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
def build_parser() -> argparse.ArgumentParser:
|
|
653
|
-
parser = argparse.ArgumentParser(
|
|
654
|
-
description="Redact secrets and budget grep/diff/log output before sending it to Claude."
|
|
655
|
-
)
|
|
656
|
-
parser.add_argument("--max-lines", type=int, default=240)
|
|
657
|
-
parser.add_argument("--max-chars", type=int, default=24000)
|
|
658
|
-
parser.add_argument("--max-line-chars", type=int, default=3000)
|
|
659
|
-
parser.add_argument("--head-lines", type=int, default=50)
|
|
660
|
-
parser.add_argument("--tail-lines", type=int, default=90)
|
|
661
|
-
parser.add_argument("--anchor-lines", type=int, default=80)
|
|
662
|
-
parser.add_argument(
|
|
663
|
-
"--timeout-seconds",
|
|
664
|
-
type=int,
|
|
665
|
-
default=DEFAULT_TIMEOUT_SECONDS,
|
|
666
|
-
help=(
|
|
667
|
-
"maximum runtime for wrapped commands before terminating the process group "
|
|
668
|
-
f"(default: {DEFAULT_TIMEOUT_SECONDS}, max: {MAX_TIMEOUT_SECONDS})"
|
|
669
|
-
),
|
|
670
|
-
)
|
|
671
|
-
parser.add_argument(
|
|
672
|
-
"--show-paths",
|
|
673
|
-
action="store_true",
|
|
674
|
-
help="show raw absolute paths instead of basename#path:<hash>; local debugging only because private paths may be exposed",
|
|
675
|
-
)
|
|
676
|
-
parser.add_argument("command", nargs=argparse.REMAINDER)
|
|
677
|
-
return parser
|
|
678
|
-
|
|
679
|
-
|
|
680
|
-
def main() -> int:
|
|
681
|
-
parser = build_parser()
|
|
682
|
-
args = parser.parse_args()
|
|
683
|
-
normalize_budgets(args)
|
|
684
|
-
command = args.command
|
|
685
|
-
if command and command[0] == "--":
|
|
686
|
-
command = command[1:]
|
|
687
|
-
|
|
688
|
-
proc: subprocess.Popen[str] | None = None
|
|
689
|
-
command_stream: TimedCommandStream | None = None
|
|
690
|
-
early_rc: int | None = None
|
|
691
|
-
if command:
|
|
692
|
-
stream, proc, early_rc = run_command(command, args.timeout_seconds)
|
|
693
|
-
if isinstance(stream, TimedCommandStream):
|
|
694
|
-
command_stream = stream
|
|
695
|
-
if early_rc is not None and proc is None:
|
|
696
|
-
return early_rc
|
|
697
|
-
elif stdin_has_data(sys.stdin):
|
|
698
|
-
stream = sys.stdin
|
|
699
|
-
else:
|
|
700
|
-
print("context-guard-sanitize-output: missing command or stdin", file=sys.stderr)
|
|
701
|
-
return 2
|
|
702
|
-
|
|
703
|
-
output, _redactions, _line_count = sanitize_stream(stream, args)
|
|
704
|
-
rc: int | None = None
|
|
705
|
-
if proc is not None:
|
|
706
|
-
rc = command_stream.returncode() if command_stream is not None else proc.wait()
|
|
707
|
-
if command_stream is not None and command_stream.timed_out and not command_stream.timeout_reported:
|
|
708
|
-
timeout_line, _redacted = LineSanitizer(show_paths=args.show_paths).sanitize(
|
|
709
|
-
command_stream.timeout_message()
|
|
710
|
-
)
|
|
711
|
-
command_stream.timeout_reported = True
|
|
712
|
-
output = output + timeout_line
|
|
713
|
-
|
|
714
|
-
if output:
|
|
715
|
-
sys.stdout.write(output)
|
|
716
|
-
if not output.endswith("\n"):
|
|
717
|
-
sys.stdout.write("\n")
|
|
718
|
-
|
|
719
|
-
if proc is not None:
|
|
720
|
-
return early_rc if early_rc is not None else rc
|
|
721
|
-
return 0
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
if __name__ == "__main__":
|
|
725
|
-
raise SystemExit(main())
|