@ictechgy/context-guard 0.4.10 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.ko.md +46 -28
- package/README.md +42 -33
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/experimental-benchmark-fixtures.md +24 -7
- package/package.json +2 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +14 -11
- package/plugins/context-guard/README.md +15 -14
- package/plugins/context-guard/bin/context-guard +48 -17
- package/plugins/context-guard/bin/context-guard-artifact +342 -33
- package/plugins/context-guard/bin/context-guard-audit +36 -5
- package/plugins/context-guard/bin/context-guard-bench +1675 -44
- package/plugins/context-guard/bin/context-guard-cache-score +347 -35
- package/plugins/context-guard/bin/context-guard-compress +89 -27
- package/plugins/context-guard/bin/context-guard-cost +7 -2
- package/plugins/context-guard/bin/context-guard-experiments +364 -8
- package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
- package/plugins/context-guard/bin/context-guard-filter +88 -18
- package/plugins/context-guard/bin/context-guard-pack +329 -19
- package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
- package/plugins/context-guard/bin/context-guard-setup +21 -5
- package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
- package/plugins/context-guard/bin/context-guard-trim-output +394 -90
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
- package/plugins/context-guard/lib/context_guard_commands.py +217 -190
|
@@ -11,6 +11,7 @@ import argparse
|
|
|
11
11
|
import ast
|
|
12
12
|
import errno
|
|
13
13
|
import hashlib
|
|
14
|
+
import importlib.machinery
|
|
14
15
|
import importlib.util
|
|
15
16
|
import json
|
|
16
17
|
import os
|
|
@@ -39,8 +40,27 @@ def _load_hook_secret_patterns():
|
|
|
39
40
|
raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
|
|
40
41
|
|
|
41
42
|
|
|
43
|
+
def _load_sanitize_output():
|
|
44
|
+
searched = []
|
|
45
|
+
for helper_path in (SCRIPT_DIR / "sanitize_output.py", SCRIPT_DIR / "context-guard-sanitize-output"):
|
|
46
|
+
searched.append(str(helper_path))
|
|
47
|
+
if not helper_path.is_file():
|
|
48
|
+
continue
|
|
49
|
+
loader = importlib.machinery.SourceFileLoader("_claude_token_sanitize_output", str(helper_path))
|
|
50
|
+
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
51
|
+
if spec is None:
|
|
52
|
+
continue
|
|
53
|
+
module = importlib.util.module_from_spec(spec)
|
|
54
|
+
loader.exec_module(module)
|
|
55
|
+
return module
|
|
56
|
+
raise ImportError("sanitize_output helper not found in " + ", ".join(searched))
|
|
57
|
+
|
|
58
|
+
|
|
42
59
|
_hook_secret_patterns = _load_hook_secret_patterns()
|
|
60
|
+
_sanitize_output = _load_sanitize_output()
|
|
43
61
|
hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
|
|
62
|
+
redact_sensitive_hook_text = _hook_secret_patterns.redact_sensitive_hook_text
|
|
63
|
+
LineSanitizer = _sanitize_output.LineSanitizer
|
|
44
64
|
|
|
45
65
|
DEFAULT_CONTEXT_LINES = 3
|
|
46
66
|
DEFAULT_MAX_CHARS = 16_000
|
|
@@ -391,6 +411,11 @@ def strip_line_for_brace_count(line: str, in_block_comment: bool = False) -> tup
|
|
|
391
411
|
return "".join(output), in_block_comment
|
|
392
412
|
|
|
393
413
|
|
|
414
|
+
def redact_symbol_content(content: str) -> str:
|
|
415
|
+
sanitizer = LineSanitizer(show_paths=True)
|
|
416
|
+
return "".join(sanitizer.sanitize(line)[0] for line in content.splitlines(keepends=True))
|
|
417
|
+
|
|
418
|
+
|
|
394
419
|
def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, show_paths: bool) -> SymbolSlice | None:
|
|
395
420
|
text, scan_truncated = read_text_bounded(path)
|
|
396
421
|
lines = text.splitlines(keepends=True)
|
|
@@ -409,6 +434,8 @@ def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, sho
|
|
|
409
434
|
start_with_context = max(0, start - max(0, context))
|
|
410
435
|
end_with_context = min(len(lines), end + max(0, context))
|
|
411
436
|
content = "".join(lines[start_with_context:end_with_context])
|
|
437
|
+
content = redact_symbol_content(content)
|
|
438
|
+
content = redact_sensitive_hook_text(content, "[REDACTED]")
|
|
412
439
|
capped = False
|
|
413
440
|
if max_chars > 0 and len(content) > max_chars:
|
|
414
441
|
marker = f"\n[context-guard-kit] symbol slice capped: {len(content)} chars total\n"
|
|
@@ -8,6 +8,7 @@ keeps only bounded head/anchor/tail context when output is too large.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
+
import codecs
|
|
11
12
|
import collections
|
|
12
13
|
import hashlib
|
|
13
14
|
import os
|
|
@@ -19,7 +20,7 @@ import subprocess
|
|
|
19
20
|
import sys
|
|
20
21
|
import threading
|
|
21
22
|
import time
|
|
22
|
-
from typing import Iterable, Iterator, TextIO
|
|
23
|
+
from typing import BinaryIO, Iterable, Iterator, TextIO
|
|
23
24
|
|
|
24
25
|
TERMINAL_CONTROL_RE = re.compile(
|
|
25
26
|
r"(?:"
|
|
@@ -48,9 +49,17 @@ PRIVATE_KEY_END_RE = re.compile(
|
|
|
48
49
|
AUTH_HEADER_RE = re.compile(
|
|
49
50
|
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
|
|
50
51
|
)
|
|
52
|
+
COOKIE_HEADER_RE = re.compile(
|
|
53
|
+
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Set-)?Cookie\s*:\s*).+$"
|
|
54
|
+
)
|
|
55
|
+
SESSION_SECRET_KEY = (
|
|
56
|
+
r"(?:session(?:[_-]?(?:id|token))?|sessionid|sid|jsessionid|"
|
|
57
|
+
r"csrf(?:[_-]?token)?|xsrf(?:[_-]?token)?)"
|
|
58
|
+
)
|
|
51
59
|
SECRET_KEY = (
|
|
52
60
|
r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
|
|
53
61
|
r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
|
|
62
|
+
rf"|{SESSION_SECRET_KEY}"
|
|
54
63
|
r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
|
|
55
64
|
r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
|
|
56
65
|
)
|
|
@@ -60,11 +69,48 @@ INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
|
60
69
|
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
61
70
|
rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
|
|
62
71
|
)
|
|
72
|
+
CODE_IDENTIFIER = r"[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)*"
|
|
73
|
+
CALL_ARGUMENT_CHUNK = r"(?:[^()\"'\n;]+|\"(?:\\.|[^\"\\])*\"|'(?:\\.|[^'\\])*'|\([^()]*\))*"
|
|
74
|
+
INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE = re.compile(
|
|
75
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
76
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
77
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
78
|
+
rf"(?P<value>(?![\"']){CODE_IDENTIFIER}\({CALL_ARGUMENT_CHUNK}\))"
|
|
79
|
+
)
|
|
80
|
+
SECRET_IDENTIFIER_PART = (
|
|
81
|
+
r"(?:[A-Za-z_$][A-Za-z0-9_$]*(?:api_?key|apikey|token|secret|password|passwd|pwd|"
|
|
82
|
+
r"private_?key|access_?key|client_?secret|sessionid|session_id|session_token|"
|
|
83
|
+
r"csrf_token|xsrf_token)[A-Za-z0-9_$]*|session|sid|csrf|xsrf)"
|
|
84
|
+
)
|
|
85
|
+
FALLBACK_SECRET_OPERAND = rf"(?:[A-Za-z_$][A-Za-z0-9_$]*\.)*{SECRET_IDENTIFIER_PART}"
|
|
86
|
+
INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE = re.compile(
|
|
87
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
88
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
89
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
90
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])"
|
|
91
|
+
rf"[^;\n]*?(?:\bor\b|\|\||\?\?|\belse\b|\?[^:\n;]*:)\s*"
|
|
92
|
+
rf"(?:[\"'](?:\\.|[^\"'\\])*[\"']|{FALLBACK_SECRET_OPERAND})[^;\n]*)"
|
|
93
|
+
)
|
|
94
|
+
INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
95
|
+
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
96
|
+
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
97
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
98
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])"
|
|
99
|
+
rf"[^\s,;}}\]]*(?:\([^;\n]*?\)|\{{[^;\n]*?\}}|\[[^;\n]*?\])[^\s,;}}\]]*)"
|
|
100
|
+
)
|
|
63
101
|
INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
|
|
64
102
|
rf"(?i)(?P<lead>^|[\s;{{\[,])"
|
|
65
103
|
rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
66
104
|
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
|
|
67
|
-
rf"(?P<value>[^\s,;}}\]]+)"
|
|
105
|
+
rf"(?P<value>(?![\"']|\[REDACTED\])[^\s,;}}\]]+)"
|
|
106
|
+
)
|
|
107
|
+
UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
|
|
108
|
+
rf"(?i)(?:^|[\s;{{\[,])"
|
|
109
|
+
rf"(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
|
|
110
|
+
rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<value>(?![\"']).*)$"
|
|
111
|
+
)
|
|
112
|
+
CONTINUATION_OPERATOR_RE = re.compile(
|
|
113
|
+
r"(?i)(?:\\|\|\||&&|\?\?|[+*/%&|^?,]|\?|:|\bor\b|\band\b|\belse\b)\s*(?://.*|#.*)?$"
|
|
68
114
|
)
|
|
69
115
|
URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
|
|
70
116
|
URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
|
|
@@ -79,6 +125,43 @@ SAFE_UNQUOTED_VALUES = {
|
|
|
79
125
|
"undefined",
|
|
80
126
|
}
|
|
81
127
|
IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
|
|
128
|
+
SAFE_ENV_LOOKUP_CALL_RE = re.compile(r"^(?:os\.getenv|os\.environ\.get)\(\s*[\"'][A-Za-z0-9_.-]{1,80}[\"']\s*\)$")
|
|
129
|
+
SAFE_RE_COMPILE_CALL_RE = re.compile(r"^re\.compile\([^;\n]*\)$")
|
|
130
|
+
SAFE_CODE_EXPRESSION_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\(\s*(?:{CODE_IDENTIFIER}(?:\s*,\s*{CODE_IDENTIFIER})*)?\s*\)$")
|
|
131
|
+
GETTER_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\.get\(\s*[\"'](?P<key>[A-Za-z0-9_.-]{{1,80}})[\"']\s*\)$")
|
|
132
|
+
CAMEL_ACRONYM_BOUNDARY_RE = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
|
|
133
|
+
CAMEL_WORD_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
|
|
134
|
+
SAFE_GETTER_KEY_NAMES = {
|
|
135
|
+
"access_key",
|
|
136
|
+
"access_token",
|
|
137
|
+
"api_key",
|
|
138
|
+
"apikey",
|
|
139
|
+
"auth",
|
|
140
|
+
"authorization",
|
|
141
|
+
"aws_access_key_id",
|
|
142
|
+
"aws_secret_access_key",
|
|
143
|
+
"aws_session_token",
|
|
144
|
+
"azure_client_secret",
|
|
145
|
+
"client_id",
|
|
146
|
+
"client_secret",
|
|
147
|
+
"cookie",
|
|
148
|
+
"credential",
|
|
149
|
+
"credentials",
|
|
150
|
+
"csrf",
|
|
151
|
+
"google_application_credentials",
|
|
152
|
+
"jwt",
|
|
153
|
+
"password",
|
|
154
|
+
"passwd",
|
|
155
|
+
"private_key",
|
|
156
|
+
"pwd",
|
|
157
|
+
"refresh_token",
|
|
158
|
+
"secret",
|
|
159
|
+
"session",
|
|
160
|
+
"session_id",
|
|
161
|
+
"sessionid",
|
|
162
|
+
"sid",
|
|
163
|
+
"token",
|
|
164
|
+
}
|
|
82
165
|
INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
|
83
166
|
(re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
84
167
|
(re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
@@ -112,6 +195,9 @@ MAX_SECTION_LINES_LIMIT = 2_000
|
|
|
112
195
|
DEFAULT_TIMEOUT_SECONDS = 600
|
|
113
196
|
MAX_TIMEOUT_SECONDS = 86_400
|
|
114
197
|
TIMEOUT_EXIT_CODE = 124
|
|
198
|
+
COMMAND_READ_CHUNK_BYTES = 64 * 1024
|
|
199
|
+
COMMAND_MAX_UNTERMINATED_LINE_CHARS = 4_096
|
|
200
|
+
RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS = 1_024
|
|
115
201
|
|
|
116
202
|
|
|
117
203
|
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
@@ -167,20 +253,33 @@ def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
|
|
|
167
253
|
return body[:keep] + marker + newline, True
|
|
168
254
|
|
|
169
255
|
|
|
256
|
+
def normalize_getter_key(key: str) -> str:
|
|
257
|
+
key = CAMEL_ACRONYM_BOUNDARY_RE.sub("_", key)
|
|
258
|
+
key = CAMEL_WORD_BOUNDARY_RE.sub("_", key)
|
|
259
|
+
key = re.sub(r"[_.-]+", "_", key)
|
|
260
|
+
return re.sub(r"_+", "_", key).strip("_").lower()
|
|
261
|
+
|
|
262
|
+
|
|
263
|
+
def is_safe_getter_key(key: str) -> bool:
|
|
264
|
+
return normalize_getter_key(key) in SAFE_GETTER_KEY_NAMES
|
|
265
|
+
|
|
266
|
+
|
|
170
267
|
def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
|
|
171
268
|
value = match.group("value").strip()
|
|
269
|
+
prefix = match.group("prefix")
|
|
172
270
|
if not value:
|
|
173
271
|
return False
|
|
174
272
|
if value.lower() in SAFE_UNQUOTED_VALUES:
|
|
175
273
|
return False
|
|
176
274
|
if IDENTIFIER_CHAIN_RE.match(value):
|
|
177
275
|
return False
|
|
178
|
-
|
|
179
|
-
if end < len(line) and line[end] in "([{":
|
|
180
|
-
# Likely a function call or expression (`api_key = os.getenv(...)`);
|
|
181
|
-
# preserve it so Claude can still reason about code flow.
|
|
276
|
+
if SAFE_ENV_LOOKUP_CALL_RE.match(value) or SAFE_RE_COMPILE_CALL_RE.match(value):
|
|
182
277
|
return False
|
|
183
|
-
|
|
278
|
+
getter_match = GETTER_CALL_RE.match(value)
|
|
279
|
+
if re.search(r"\s[:=]\s*$", prefix) and (
|
|
280
|
+
SAFE_CODE_EXPRESSION_CALL_RE.match(value)
|
|
281
|
+
or (getter_match is not None and is_safe_getter_key(getter_match.group("key")))
|
|
282
|
+
):
|
|
184
283
|
return False
|
|
185
284
|
return True
|
|
186
285
|
|
|
@@ -214,6 +313,9 @@ def redact_secret_assignments(line: str) -> tuple[str, bool]:
|
|
|
214
313
|
return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
|
|
215
314
|
|
|
216
315
|
line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
|
|
316
|
+
line = INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
317
|
+
line = INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
318
|
+
line = INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
217
319
|
line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
|
|
218
320
|
return line, redacted
|
|
219
321
|
|
|
@@ -253,6 +355,54 @@ def detect_multiline_secret_assignment(line: str) -> str | None:
|
|
|
253
355
|
return None
|
|
254
356
|
|
|
255
357
|
|
|
358
|
+
def expression_bracket_delta(text: str) -> int:
|
|
359
|
+
delta = 0
|
|
360
|
+
quote: str | None = None
|
|
361
|
+
escaped = False
|
|
362
|
+
for char in text:
|
|
363
|
+
if quote is not None:
|
|
364
|
+
if escaped:
|
|
365
|
+
escaped = False
|
|
366
|
+
elif char == "\\":
|
|
367
|
+
escaped = True
|
|
368
|
+
elif char == quote:
|
|
369
|
+
quote = None
|
|
370
|
+
continue
|
|
371
|
+
if char in {"'", '"'}:
|
|
372
|
+
quote = char
|
|
373
|
+
elif char in "([{":
|
|
374
|
+
delta += 1
|
|
375
|
+
elif char in ")}]":
|
|
376
|
+
delta -= 1
|
|
377
|
+
return delta
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def ends_with_continuation_operator(text: str) -> bool:
|
|
381
|
+
return bool(CONTINUATION_OPERATOR_RE.search(text.rstrip()))
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def detect_multiline_secret_expression(line: str) -> int | None:
|
|
385
|
+
marker = UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE.search(line)
|
|
386
|
+
if marker is None:
|
|
387
|
+
return None
|
|
388
|
+
value = marker.group("value").strip()
|
|
389
|
+
if not value:
|
|
390
|
+
return 0
|
|
391
|
+
delta = expression_bracket_delta(value)
|
|
392
|
+
if delta > 0:
|
|
393
|
+
return delta
|
|
394
|
+
if ends_with_continuation_operator(value):
|
|
395
|
+
return max(delta, 0)
|
|
396
|
+
return None
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
def update_multiline_secret_expression_state(line: str, depth: int) -> int | None:
|
|
400
|
+
next_depth = max(0, depth + expression_bracket_delta(line))
|
|
401
|
+
if next_depth == 0 and not ends_with_continuation_operator(line):
|
|
402
|
+
return None
|
|
403
|
+
return next_depth
|
|
404
|
+
|
|
405
|
+
|
|
256
406
|
def private_key_state_after_line(line: str) -> bool | None:
|
|
257
407
|
"""Return updated private-key state for a line, or None when no marker appears."""
|
|
258
408
|
if PRIVATE_KEY_BEGIN_RE.search(line):
|
|
@@ -273,6 +423,7 @@ class LineSanitizer:
|
|
|
273
423
|
self.show_paths = show_paths
|
|
274
424
|
self.in_private_key_block = False
|
|
275
425
|
self.multiline_secret_quote: str | None = None
|
|
426
|
+
self.multiline_secret_expression_depth: int | None = None
|
|
276
427
|
self.redactions = 0
|
|
277
428
|
|
|
278
429
|
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
@@ -305,6 +456,12 @@ class LineSanitizer:
|
|
|
305
456
|
self.in_private_key_block = False
|
|
306
457
|
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
307
458
|
|
|
459
|
+
if self.multiline_secret_expression_depth is not None:
|
|
460
|
+
self.multiline_secret_expression_depth = update_multiline_secret_expression_state(
|
|
461
|
+
line, self.multiline_secret_expression_depth
|
|
462
|
+
)
|
|
463
|
+
return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
|
|
464
|
+
|
|
308
465
|
multiline_quote = detect_multiline_secret_assignment(line)
|
|
309
466
|
if multiline_quote is not None:
|
|
310
467
|
self.multiline_secret_quote = multiline_quote
|
|
@@ -319,11 +476,21 @@ class LineSanitizer:
|
|
|
319
476
|
self.in_private_key_block = True
|
|
320
477
|
return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
|
|
321
478
|
|
|
479
|
+
expression_depth = detect_multiline_secret_expression(line)
|
|
480
|
+
if expression_depth is not None:
|
|
481
|
+
self.multiline_secret_expression_depth = expression_depth
|
|
482
|
+
return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
|
|
483
|
+
|
|
322
484
|
new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
|
|
323
485
|
if count:
|
|
324
486
|
redacted = True
|
|
325
487
|
line = new_line
|
|
326
488
|
|
|
489
|
+
new_line, count = COOKIE_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
|
|
490
|
+
if count:
|
|
491
|
+
redacted = True
|
|
492
|
+
line = new_line
|
|
493
|
+
|
|
327
494
|
line, assignment_redacted = redact_secret_assignments(line)
|
|
328
495
|
if assignment_redacted:
|
|
329
496
|
redacted = True
|
|
@@ -520,14 +687,16 @@ def terminate_process_tree(
|
|
|
520
687
|
class TimedCommandStream:
|
|
521
688
|
def __init__(
|
|
522
689
|
self,
|
|
523
|
-
proc: subprocess.Popen[
|
|
524
|
-
stdout:
|
|
690
|
+
proc: subprocess.Popen[bytes],
|
|
691
|
+
stdout: BinaryIO,
|
|
525
692
|
*,
|
|
526
693
|
timeout_seconds: int,
|
|
694
|
+
max_line_chars: int = MAX_LINE_CHARS_LIMIT,
|
|
527
695
|
process_group_id: int | None = None,
|
|
528
696
|
) -> None:
|
|
529
697
|
self.proc = proc
|
|
530
698
|
self.timeout_seconds = timeout_seconds
|
|
699
|
+
self.max_unterminated_line_chars = max(1, max_line_chars)
|
|
531
700
|
self.process_group_id = process_group_id
|
|
532
701
|
self.deadline = time.monotonic() + timeout_seconds
|
|
533
702
|
self.timed_out = False
|
|
@@ -537,10 +706,62 @@ class TimedCommandStream:
|
|
|
537
706
|
self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
|
|
538
707
|
self._thread.start()
|
|
539
708
|
|
|
540
|
-
def
|
|
709
|
+
def _truncated_raw_line(self, text: str) -> str:
|
|
710
|
+
holdback = min(RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS, self.max_unterminated_line_chars)
|
|
711
|
+
safe_keep = max(0, self.max_unterminated_line_chars - holdback)
|
|
712
|
+
return (
|
|
713
|
+
text[:safe_keep]
|
|
714
|
+
+ (
|
|
715
|
+
"...[context-guard-kit: raw line truncated before newline "
|
|
716
|
+
f"after {self.max_unterminated_line_chars} chars; "
|
|
717
|
+
f"withheld {holdback} boundary chars for redaction safety]\n"
|
|
718
|
+
)
|
|
719
|
+
)
|
|
720
|
+
|
|
721
|
+
def _read_stdout(self, stdout: BinaryIO) -> None:
|
|
722
|
+
decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
723
|
+
pending = ""
|
|
724
|
+
discarding_oversized_line = False
|
|
725
|
+
|
|
726
|
+
def feed(text: str) -> None:
|
|
727
|
+
nonlocal pending, discarding_oversized_line
|
|
728
|
+
if not text:
|
|
729
|
+
return
|
|
730
|
+
pending += text
|
|
731
|
+
while pending:
|
|
732
|
+
if discarding_oversized_line:
|
|
733
|
+
newline_index = pending.find("\n")
|
|
734
|
+
if newline_index == -1:
|
|
735
|
+
pending = ""
|
|
736
|
+
return
|
|
737
|
+
pending = pending[newline_index + 1 :]
|
|
738
|
+
discarding_oversized_line = False
|
|
739
|
+
continue
|
|
740
|
+
|
|
741
|
+
newline_index = pending.find("\n")
|
|
742
|
+
if newline_index != -1:
|
|
743
|
+
if newline_index > self.max_unterminated_line_chars:
|
|
744
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
745
|
+
else:
|
|
746
|
+
self._queue.put(pending[: newline_index + 1])
|
|
747
|
+
pending = pending[newline_index + 1 :]
|
|
748
|
+
continue
|
|
749
|
+
|
|
750
|
+
if len(pending) > self.max_unterminated_line_chars:
|
|
751
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
752
|
+
pending = ""
|
|
753
|
+
discarding_oversized_line = True
|
|
754
|
+
return
|
|
755
|
+
|
|
541
756
|
try:
|
|
542
|
-
|
|
543
|
-
|
|
757
|
+
while True:
|
|
758
|
+
chunk = stdout.read(COMMAND_READ_CHUNK_BYTES)
|
|
759
|
+
if not chunk:
|
|
760
|
+
break
|
|
761
|
+
feed(decoder.decode(chunk, final=False))
|
|
762
|
+
feed(decoder.decode(b"", final=True))
|
|
763
|
+
if pending and not discarding_oversized_line:
|
|
764
|
+
self._queue.put(pending)
|
|
544
765
|
finally:
|
|
545
766
|
self._stream_closed = True
|
|
546
767
|
self._queue.put(_STREAM_END)
|
|
@@ -613,7 +834,9 @@ def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
|
|
|
613
834
|
def run_command(
|
|
614
835
|
command: list[str],
|
|
615
836
|
timeout_seconds: int,
|
|
616
|
-
|
|
837
|
+
*,
|
|
838
|
+
max_line_chars: int = MAX_LINE_CHARS_LIMIT,
|
|
839
|
+
) -> tuple[Iterable[str], subprocess.Popen[bytes] | None, int | None]:
|
|
617
840
|
popen_kwargs: dict[str, object] = {}
|
|
618
841
|
if os.name != "nt":
|
|
619
842
|
popen_kwargs["start_new_session"] = True
|
|
@@ -622,9 +845,8 @@ def run_command(
|
|
|
622
845
|
command,
|
|
623
846
|
stdout=subprocess.PIPE,
|
|
624
847
|
stderr=subprocess.STDOUT,
|
|
625
|
-
text=
|
|
626
|
-
bufsize=
|
|
627
|
-
errors="replace",
|
|
848
|
+
text=False,
|
|
849
|
+
bufsize=0,
|
|
628
850
|
**popen_kwargs,
|
|
629
851
|
)
|
|
630
852
|
except OSError as exc:
|
|
@@ -638,6 +860,7 @@ def run_command(
|
|
|
638
860
|
proc,
|
|
639
861
|
proc.stdout,
|
|
640
862
|
timeout_seconds=timeout_seconds,
|
|
863
|
+
max_line_chars=max_line_chars,
|
|
641
864
|
process_group_id=process_group_id_for(proc),
|
|
642
865
|
),
|
|
643
866
|
proc,
|
|
@@ -685,11 +908,15 @@ def main() -> int:
|
|
|
685
908
|
if command and command[0] == "--":
|
|
686
909
|
command = command[1:]
|
|
687
910
|
|
|
688
|
-
proc: subprocess.Popen[
|
|
911
|
+
proc: subprocess.Popen[bytes] | None = None
|
|
689
912
|
command_stream: TimedCommandStream | None = None
|
|
690
913
|
early_rc: int | None = None
|
|
691
914
|
if command:
|
|
692
|
-
stream, proc, early_rc = run_command(
|
|
915
|
+
stream, proc, early_rc = run_command(
|
|
916
|
+
command,
|
|
917
|
+
args.timeout_seconds,
|
|
918
|
+
max_line_chars=COMMAND_MAX_UNTERMINATED_LINE_CHARS,
|
|
919
|
+
)
|
|
693
920
|
if isinstance(stream, TimedCommandStream):
|
|
694
921
|
command_stream = stream
|
|
695
922
|
if early_rc is not None and proc is None:
|
|
@@ -2210,6 +2210,25 @@ def backup_existing(path: Path) -> Path | None:
|
|
|
2210
2210
|
return backup
|
|
2211
2211
|
|
|
2212
2212
|
|
|
2213
|
+
def rollback_restore_guidance(settings_path: Path, backup_path: Path | None, original_existed: bool) -> str:
|
|
2214
|
+
if backup_path is not None:
|
|
2215
|
+
return (
|
|
2216
|
+
"Restore only with a no-follow, symlink-safe copy that opens the backup and target parent "
|
|
2217
|
+
"without following links, then atomically replaces the target; do not use generic shell "
|
|
2218
|
+
f"copy/delete commands on this mutable target. Backup: {backup_path}. Target: {settings_path}."
|
|
2219
|
+
)
|
|
2220
|
+
if original_existed:
|
|
2221
|
+
return (
|
|
2222
|
+
"No backup path was recorded; inspect the target with no-follow file operations before any "
|
|
2223
|
+
f"manual recovery. Do not use generic shell copy/delete commands on this mutable target: {settings_path}."
|
|
2224
|
+
)
|
|
2225
|
+
return (
|
|
2226
|
+
"The target did not exist before setup. If cleanup is required, verify the target and every parent "
|
|
2227
|
+
"without following symlinks and remove only the verified regular file; do not use generic shell "
|
|
2228
|
+
f"delete commands on this mutable target: {settings_path}."
|
|
2229
|
+
)
|
|
2230
|
+
|
|
2231
|
+
|
|
2213
2232
|
def write_rollback_record(
|
|
2214
2233
|
*,
|
|
2215
2234
|
root: Path,
|
|
@@ -2237,11 +2256,8 @@ def write_rollback_record(
|
|
|
2237
2256
|
"target_path": str(settings_path),
|
|
2238
2257
|
"backup_path": str(backup_path) if backup_path else None,
|
|
2239
2258
|
"original_existed": original_existed,
|
|
2240
|
-
"restore": (
|
|
2241
|
-
|
|
2242
|
-
if backup_path
|
|
2243
|
-
else f"rm -f {shlex.quote(str(settings_path))}"
|
|
2244
|
-
),
|
|
2259
|
+
"restore": rollback_restore_guidance(settings_path, backup_path, original_existed),
|
|
2260
|
+
"restore_requires_no_follow": True,
|
|
2245
2261
|
}
|
|
2246
2262
|
atomic_write(rollback_path, json.dumps(record, indent=2, sort_keys=True) + "\n", 0o600)
|
|
2247
2263
|
return rollback_id, rollback_path
|