@ictechgy/context-guard 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.ko.md +46 -28
  3. package/README.md +42 -33
  4. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  5. package/docs/benchmark-workflow-examples.md +3 -0
  6. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  7. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  8. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  9. package/docs/experimental-benchmark-fixtures.md +24 -7
  10. package/package.json +2 -1
  11. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  12. package/plugins/context-guard/README.ko.md +14 -11
  13. package/plugins/context-guard/README.md +15 -14
  14. package/plugins/context-guard/bin/context-guard +48 -17
  15. package/plugins/context-guard/bin/context-guard-artifact +342 -33
  16. package/plugins/context-guard/bin/context-guard-audit +36 -5
  17. package/plugins/context-guard/bin/context-guard-bench +1675 -44
  18. package/plugins/context-guard/bin/context-guard-cache-score +347 -35
  19. package/plugins/context-guard/bin/context-guard-compress +89 -27
  20. package/plugins/context-guard/bin/context-guard-cost +7 -2
  21. package/plugins/context-guard/bin/context-guard-experiments +364 -8
  22. package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
  23. package/plugins/context-guard/bin/context-guard-filter +88 -18
  24. package/plugins/context-guard/bin/context-guard-pack +329 -19
  25. package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
  26. package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
  27. package/plugins/context-guard/bin/context-guard-setup +21 -5
  28. package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
  29. package/plugins/context-guard/bin/context-guard-trim-output +394 -90
  30. package/plugins/context-guard/brief/README.md +5 -5
  31. package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
  32. package/plugins/context-guard/lib/context_guard_commands.py +217 -190
@@ -11,6 +11,7 @@ import argparse
11
11
  import ast
12
12
  import errno
13
13
  import hashlib
14
+ import importlib.machinery
14
15
  import importlib.util
15
16
  import json
16
17
  import os
@@ -39,8 +40,27 @@ def _load_hook_secret_patterns():
39
40
  raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
40
41
 
41
42
 
43
+ def _load_sanitize_output():
44
+ searched = []
45
+ for helper_path in (SCRIPT_DIR / "sanitize_output.py", SCRIPT_DIR / "context-guard-sanitize-output"):
46
+ searched.append(str(helper_path))
47
+ if not helper_path.is_file():
48
+ continue
49
+ loader = importlib.machinery.SourceFileLoader("_claude_token_sanitize_output", str(helper_path))
50
+ spec = importlib.util.spec_from_loader(loader.name, loader)
51
+ if spec is None:
52
+ continue
53
+ module = importlib.util.module_from_spec(spec)
54
+ loader.exec_module(module)
55
+ return module
56
+ raise ImportError("sanitize_output helper not found in " + ", ".join(searched))
57
+
58
+
42
59
  _hook_secret_patterns = _load_hook_secret_patterns()
60
+ _sanitize_output = _load_sanitize_output()
43
61
  hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
62
+ redact_sensitive_hook_text = _hook_secret_patterns.redact_sensitive_hook_text
63
+ LineSanitizer = _sanitize_output.LineSanitizer
44
64
 
45
65
  DEFAULT_CONTEXT_LINES = 3
46
66
  DEFAULT_MAX_CHARS = 16_000
@@ -391,6 +411,11 @@ def strip_line_for_brace_count(line: str, in_block_comment: bool = False) -> tup
391
411
  return "".join(output), in_block_comment
392
412
 
393
413
 
414
+ def redact_symbol_content(content: str) -> str:
415
+ sanitizer = LineSanitizer(show_paths=True)
416
+ return "".join(sanitizer.sanitize(line)[0] for line in content.splitlines(keepends=True))
417
+
418
+
394
419
  def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, show_paths: bool) -> SymbolSlice | None:
395
420
  text, scan_truncated = read_text_bounded(path)
396
421
  lines = text.splitlines(keepends=True)
@@ -409,6 +434,8 @@ def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, sho
409
434
  start_with_context = max(0, start - max(0, context))
410
435
  end_with_context = min(len(lines), end + max(0, context))
411
436
  content = "".join(lines[start_with_context:end_with_context])
437
+ content = redact_symbol_content(content)
438
+ content = redact_sensitive_hook_text(content, "[REDACTED]")
412
439
  capped = False
413
440
  if max_chars > 0 and len(content) > max_chars:
414
441
  marker = f"\n[context-guard-kit] symbol slice capped: {len(content)} chars total\n"
@@ -8,6 +8,7 @@ keeps only bounded head/anchor/tail context when output is too large.
8
8
  from __future__ import annotations
9
9
 
10
10
  import argparse
11
+ import codecs
11
12
  import collections
12
13
  import hashlib
13
14
  import os
@@ -19,7 +20,7 @@ import subprocess
19
20
  import sys
20
21
  import threading
21
22
  import time
22
- from typing import Iterable, Iterator, TextIO
23
+ from typing import BinaryIO, Iterable, Iterator, TextIO
23
24
 
24
25
  TERMINAL_CONTROL_RE = re.compile(
25
26
  r"(?:"
@@ -48,9 +49,17 @@ PRIVATE_KEY_END_RE = re.compile(
48
49
  AUTH_HEADER_RE = re.compile(
49
50
  r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
50
51
  )
52
+ COOKIE_HEADER_RE = re.compile(
53
+ r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Set-)?Cookie\s*:\s*).+$"
54
+ )
55
+ SESSION_SECRET_KEY = (
56
+ r"(?:session(?:[_-]?(?:id|token))?|sessionid|sid|jsessionid|"
57
+ r"csrf(?:[_-]?token)?|xsrf(?:[_-]?token)?)"
58
+ )
51
59
  SECRET_KEY = (
52
60
  r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
53
61
  r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
62
+ rf"|{SESSION_SECRET_KEY}"
54
63
  r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
55
64
  r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
56
65
  )
@@ -60,11 +69,48 @@ INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
60
69
  rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
61
70
  rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
62
71
  )
72
+ CODE_IDENTIFIER = r"[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)*"
73
+ CALL_ARGUMENT_CHUNK = r"(?:[^()\"'\n;]+|\"(?:\\.|[^\"\\])*\"|'(?:\\.|[^'\\])*'|\([^()]*\))*"
74
+ INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE = re.compile(
75
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
76
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
77
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
78
+ rf"(?P<value>(?![\"']){CODE_IDENTIFIER}\({CALL_ARGUMENT_CHUNK}\))"
79
+ )
80
+ SECRET_IDENTIFIER_PART = (
81
+ r"(?:[A-Za-z_$][A-Za-z0-9_$]*(?:api_?key|apikey|token|secret|password|passwd|pwd|"
82
+ r"private_?key|access_?key|client_?secret|sessionid|session_id|session_token|"
83
+ r"csrf_token|xsrf_token)[A-Za-z0-9_$]*|session|sid|csrf|xsrf)"
84
+ )
85
+ FALLBACK_SECRET_OPERAND = rf"(?:[A-Za-z_$][A-Za-z0-9_$]*\.)*{SECRET_IDENTIFIER_PART}"
86
+ INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE = re.compile(
87
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
88
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
89
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
90
+ rf"(?P<value>(?![\"']|\[REDACTED\])"
91
+ rf"[^;\n]*?(?:\bor\b|\|\||\?\?|\belse\b|\?[^:\n;]*:)\s*"
92
+ rf"(?:[\"'](?:\\.|[^\"'\\])*[\"']|{FALLBACK_SECRET_OPERAND})[^;\n]*)"
93
+ )
94
+ INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE = re.compile(
95
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
96
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
97
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
98
+ rf"(?P<value>(?![\"']|\[REDACTED\])"
99
+ rf"[^\s,;}}\]]*(?:\([^;\n]*?\)|\{{[^;\n]*?\}}|\[[^;\n]*?\])[^\s,;}}\]]*)"
100
+ )
63
101
  INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
64
102
  rf"(?i)(?P<lead>^|[\s;{{\[,])"
65
103
  rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
66
104
  rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
67
- rf"(?P<value>[^\s,;}}\]]+)"
105
+ rf"(?P<value>(?![\"']|\[REDACTED\])[^\s,;}}\]]+)"
106
+ )
107
+ UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
108
+ rf"(?i)(?:^|[\s;{{\[,])"
109
+ rf"(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
110
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<value>(?![\"']).*)$"
111
+ )
112
+ CONTINUATION_OPERATOR_RE = re.compile(
113
+ r"(?i)(?:\\|\|\||&&|\?\?|[+*/%&|^?,]|\?|:|\bor\b|\band\b|\belse\b)\s*(?://.*|#.*)?$"
68
114
  )
69
115
  URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
70
116
  URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
@@ -79,6 +125,43 @@ SAFE_UNQUOTED_VALUES = {
79
125
  "undefined",
80
126
  }
81
127
  IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
128
+ SAFE_ENV_LOOKUP_CALL_RE = re.compile(r"^(?:os\.getenv|os\.environ\.get)\(\s*[\"'][A-Za-z0-9_.-]{1,80}[\"']\s*\)$")
129
+ SAFE_RE_COMPILE_CALL_RE = re.compile(r"^re\.compile\([^;\n]*\)$")
130
+ SAFE_CODE_EXPRESSION_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\(\s*(?:{CODE_IDENTIFIER}(?:\s*,\s*{CODE_IDENTIFIER})*)?\s*\)$")
131
+ GETTER_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\.get\(\s*[\"'](?P<key>[A-Za-z0-9_.-]{{1,80}})[\"']\s*\)$")
132
+ CAMEL_ACRONYM_BOUNDARY_RE = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
133
+ CAMEL_WORD_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
134
+ SAFE_GETTER_KEY_NAMES = {
135
+ "access_key",
136
+ "access_token",
137
+ "api_key",
138
+ "apikey",
139
+ "auth",
140
+ "authorization",
141
+ "aws_access_key_id",
142
+ "aws_secret_access_key",
143
+ "aws_session_token",
144
+ "azure_client_secret",
145
+ "client_id",
146
+ "client_secret",
147
+ "cookie",
148
+ "credential",
149
+ "credentials",
150
+ "csrf",
151
+ "google_application_credentials",
152
+ "jwt",
153
+ "password",
154
+ "passwd",
155
+ "private_key",
156
+ "pwd",
157
+ "refresh_token",
158
+ "secret",
159
+ "session",
160
+ "session_id",
161
+ "sessionid",
162
+ "sid",
163
+ "token",
164
+ }
82
165
  INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
83
166
  (re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
84
167
  (re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
@@ -112,6 +195,9 @@ MAX_SECTION_LINES_LIMIT = 2_000
112
195
  DEFAULT_TIMEOUT_SECONDS = 600
113
196
  MAX_TIMEOUT_SECONDS = 86_400
114
197
  TIMEOUT_EXIT_CODE = 124
198
+ COMMAND_READ_CHUNK_BYTES = 64 * 1024
199
+ COMMAND_MAX_UNTERMINATED_LINE_CHARS = 4_096
200
+ RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS = 1_024
115
201
 
116
202
 
117
203
  def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
@@ -167,20 +253,33 @@ def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
167
253
  return body[:keep] + marker + newline, True
168
254
 
169
255
 
256
+ def normalize_getter_key(key: str) -> str:
257
+ key = CAMEL_ACRONYM_BOUNDARY_RE.sub("_", key)
258
+ key = CAMEL_WORD_BOUNDARY_RE.sub("_", key)
259
+ key = re.sub(r"[_.-]+", "_", key)
260
+ return re.sub(r"_+", "_", key).strip("_").lower()
261
+
262
+
263
+ def is_safe_getter_key(key: str) -> bool:
264
+ return normalize_getter_key(key) in SAFE_GETTER_KEY_NAMES
265
+
266
+
170
267
  def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
171
268
  value = match.group("value").strip()
269
+ prefix = match.group("prefix")
172
270
  if not value:
173
271
  return False
174
272
  if value.lower() in SAFE_UNQUOTED_VALUES:
175
273
  return False
176
274
  if IDENTIFIER_CHAIN_RE.match(value):
177
275
  return False
178
- end = match.end("value")
179
- if end < len(line) and line[end] in "([{":
180
- # Likely a function call or expression (`api_key = os.getenv(...)`);
181
- # preserve it so Claude can still reason about code flow.
276
+ if SAFE_ENV_LOOKUP_CALL_RE.match(value) or SAFE_RE_COMPILE_CALL_RE.match(value):
182
277
  return False
183
- if any(ch in value for ch in "()[]{}"):
278
+ getter_match = GETTER_CALL_RE.match(value)
279
+ if re.search(r"\s[:=]\s*$", prefix) and (
280
+ SAFE_CODE_EXPRESSION_CALL_RE.match(value)
281
+ or (getter_match is not None and is_safe_getter_key(getter_match.group("key")))
282
+ ):
184
283
  return False
185
284
  return True
186
285
 
@@ -214,6 +313,9 @@ def redact_secret_assignments(line: str) -> tuple[str, bool]:
214
313
  return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
215
314
 
216
315
  line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
316
+ line = INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
317
+ line = INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
318
+ line = INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
217
319
  line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
218
320
  return line, redacted
219
321
 
@@ -253,6 +355,54 @@ def detect_multiline_secret_assignment(line: str) -> str | None:
253
355
  return None
254
356
 
255
357
 
358
+ def expression_bracket_delta(text: str) -> int:
359
+ delta = 0
360
+ quote: str | None = None
361
+ escaped = False
362
+ for char in text:
363
+ if quote is not None:
364
+ if escaped:
365
+ escaped = False
366
+ elif char == "\\":
367
+ escaped = True
368
+ elif char == quote:
369
+ quote = None
370
+ continue
371
+ if char in {"'", '"'}:
372
+ quote = char
373
+ elif char in "([{":
374
+ delta += 1
375
+ elif char in ")}]":
376
+ delta -= 1
377
+ return delta
378
+
379
+
380
+ def ends_with_continuation_operator(text: str) -> bool:
381
+ return bool(CONTINUATION_OPERATOR_RE.search(text.rstrip()))
382
+
383
+
384
+ def detect_multiline_secret_expression(line: str) -> int | None:
385
+ marker = UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE.search(line)
386
+ if marker is None:
387
+ return None
388
+ value = marker.group("value").strip()
389
+ if not value:
390
+ return 0
391
+ delta = expression_bracket_delta(value)
392
+ if delta > 0:
393
+ return delta
394
+ if ends_with_continuation_operator(value):
395
+ return max(delta, 0)
396
+ return None
397
+
398
+
399
+ def update_multiline_secret_expression_state(line: str, depth: int) -> int | None:
400
+ next_depth = max(0, depth + expression_bracket_delta(line))
401
+ if next_depth == 0 and not ends_with_continuation_operator(line):
402
+ return None
403
+ return next_depth
404
+
405
+
256
406
  def private_key_state_after_line(line: str) -> bool | None:
257
407
  """Return updated private-key state for a line, or None when no marker appears."""
258
408
  if PRIVATE_KEY_BEGIN_RE.search(line):
@@ -273,6 +423,7 @@ class LineSanitizer:
273
423
  self.show_paths = show_paths
274
424
  self.in_private_key_block = False
275
425
  self.multiline_secret_quote: str | None = None
426
+ self.multiline_secret_expression_depth: int | None = None
276
427
  self.redactions = 0
277
428
 
278
429
  def sanitize(self, raw_line: str) -> tuple[str, bool]:
@@ -305,6 +456,12 @@ class LineSanitizer:
305
456
  self.in_private_key_block = False
306
457
  return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
307
458
 
459
+ if self.multiline_secret_expression_depth is not None:
460
+ self.multiline_secret_expression_depth = update_multiline_secret_expression_state(
461
+ line, self.multiline_secret_expression_depth
462
+ )
463
+ return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
464
+
308
465
  multiline_quote = detect_multiline_secret_assignment(line)
309
466
  if multiline_quote is not None:
310
467
  self.multiline_secret_quote = multiline_quote
@@ -319,11 +476,21 @@ class LineSanitizer:
319
476
  self.in_private_key_block = True
320
477
  return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
321
478
 
479
+ expression_depth = detect_multiline_secret_expression(line)
480
+ if expression_depth is not None:
481
+ self.multiline_secret_expression_depth = expression_depth
482
+ return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
483
+
322
484
  new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
323
485
  if count:
324
486
  redacted = True
325
487
  line = new_line
326
488
 
489
+ new_line, count = COOKIE_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
490
+ if count:
491
+ redacted = True
492
+ line = new_line
493
+
327
494
  line, assignment_redacted = redact_secret_assignments(line)
328
495
  if assignment_redacted:
329
496
  redacted = True
@@ -520,14 +687,16 @@ def terminate_process_tree(
520
687
  class TimedCommandStream:
521
688
  def __init__(
522
689
  self,
523
- proc: subprocess.Popen[str],
524
- stdout: TextIO,
690
+ proc: subprocess.Popen[bytes],
691
+ stdout: BinaryIO,
525
692
  *,
526
693
  timeout_seconds: int,
694
+ max_line_chars: int = MAX_LINE_CHARS_LIMIT,
527
695
  process_group_id: int | None = None,
528
696
  ) -> None:
529
697
  self.proc = proc
530
698
  self.timeout_seconds = timeout_seconds
699
+ self.max_unterminated_line_chars = max(1, max_line_chars)
531
700
  self.process_group_id = process_group_id
532
701
  self.deadline = time.monotonic() + timeout_seconds
533
702
  self.timed_out = False
@@ -537,10 +706,62 @@ class TimedCommandStream:
537
706
  self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
538
707
  self._thread.start()
539
708
 
540
- def _read_stdout(self, stdout: TextIO) -> None:
709
+ def _truncated_raw_line(self, text: str) -> str:
710
+ holdback = min(RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS, self.max_unterminated_line_chars)
711
+ safe_keep = max(0, self.max_unterminated_line_chars - holdback)
712
+ return (
713
+ text[:safe_keep]
714
+ + (
715
+ "...[context-guard-kit: raw line truncated before newline "
716
+ f"after {self.max_unterminated_line_chars} chars; "
717
+ f"withheld {holdback} boundary chars for redaction safety]\n"
718
+ )
719
+ )
720
+
721
+ def _read_stdout(self, stdout: BinaryIO) -> None:
722
+ decoder = codecs.getincrementaldecoder("utf-8")("replace")
723
+ pending = ""
724
+ discarding_oversized_line = False
725
+
726
+ def feed(text: str) -> None:
727
+ nonlocal pending, discarding_oversized_line
728
+ if not text:
729
+ return
730
+ pending += text
731
+ while pending:
732
+ if discarding_oversized_line:
733
+ newline_index = pending.find("\n")
734
+ if newline_index == -1:
735
+ pending = ""
736
+ return
737
+ pending = pending[newline_index + 1 :]
738
+ discarding_oversized_line = False
739
+ continue
740
+
741
+ newline_index = pending.find("\n")
742
+ if newline_index != -1:
743
+ if newline_index > self.max_unterminated_line_chars:
744
+ self._queue.put(self._truncated_raw_line(pending))
745
+ else:
746
+ self._queue.put(pending[: newline_index + 1])
747
+ pending = pending[newline_index + 1 :]
748
+ continue
749
+
750
+ if len(pending) > self.max_unterminated_line_chars:
751
+ self._queue.put(self._truncated_raw_line(pending))
752
+ pending = ""
753
+ discarding_oversized_line = True
754
+ return
755
+
541
756
  try:
542
- for line in stdout:
543
- self._queue.put(line)
757
+ while True:
758
+ chunk = stdout.read(COMMAND_READ_CHUNK_BYTES)
759
+ if not chunk:
760
+ break
761
+ feed(decoder.decode(chunk, final=False))
762
+ feed(decoder.decode(b"", final=True))
763
+ if pending and not discarding_oversized_line:
764
+ self._queue.put(pending)
544
765
  finally:
545
766
  self._stream_closed = True
546
767
  self._queue.put(_STREAM_END)
@@ -613,7 +834,9 @@ def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
613
834
  def run_command(
614
835
  command: list[str],
615
836
  timeout_seconds: int,
616
- ) -> tuple[Iterable[str], subprocess.Popen[str] | None, int | None]:
837
+ *,
838
+ max_line_chars: int = MAX_LINE_CHARS_LIMIT,
839
+ ) -> tuple[Iterable[str], subprocess.Popen[bytes] | None, int | None]:
617
840
  popen_kwargs: dict[str, object] = {}
618
841
  if os.name != "nt":
619
842
  popen_kwargs["start_new_session"] = True
@@ -622,9 +845,8 @@ def run_command(
622
845
  command,
623
846
  stdout=subprocess.PIPE,
624
847
  stderr=subprocess.STDOUT,
625
- text=True,
626
- bufsize=1,
627
- errors="replace",
848
+ text=False,
849
+ bufsize=0,
628
850
  **popen_kwargs,
629
851
  )
630
852
  except OSError as exc:
@@ -638,6 +860,7 @@ def run_command(
638
860
  proc,
639
861
  proc.stdout,
640
862
  timeout_seconds=timeout_seconds,
863
+ max_line_chars=max_line_chars,
641
864
  process_group_id=process_group_id_for(proc),
642
865
  ),
643
866
  proc,
@@ -685,11 +908,15 @@ def main() -> int:
685
908
  if command and command[0] == "--":
686
909
  command = command[1:]
687
910
 
688
- proc: subprocess.Popen[str] | None = None
911
+ proc: subprocess.Popen[bytes] | None = None
689
912
  command_stream: TimedCommandStream | None = None
690
913
  early_rc: int | None = None
691
914
  if command:
692
- stream, proc, early_rc = run_command(command, args.timeout_seconds)
915
+ stream, proc, early_rc = run_command(
916
+ command,
917
+ args.timeout_seconds,
918
+ max_line_chars=COMMAND_MAX_UNTERMINATED_LINE_CHARS,
919
+ )
693
920
  if isinstance(stream, TimedCommandStream):
694
921
  command_stream = stream
695
922
  if early_rc is not None and proc is None:
@@ -2210,6 +2210,25 @@ def backup_existing(path: Path) -> Path | None:
2210
2210
  return backup
2211
2211
 
2212
2212
 
2213
+ def rollback_restore_guidance(settings_path: Path, backup_path: Path | None, original_existed: bool) -> str:
2214
+ if backup_path is not None:
2215
+ return (
2216
+ "Restore only with a no-follow, symlink-safe copy that opens the backup and target parent "
2217
+ "without following links, then atomically replaces the target; do not use generic shell "
2218
+ f"copy/delete commands on this mutable target. Backup: {backup_path}. Target: {settings_path}."
2219
+ )
2220
+ if original_existed:
2221
+ return (
2222
+ "No backup path was recorded; inspect the target with no-follow file operations before any "
2223
+ f"manual recovery. Do not use generic shell copy/delete commands on this mutable target: {settings_path}."
2224
+ )
2225
+ return (
2226
+ "The target did not exist before setup. If cleanup is required, verify the target and every parent "
2227
+ "without following symlinks and remove only the verified regular file; do not use generic shell "
2228
+ f"delete commands on this mutable target: {settings_path}."
2229
+ )
2230
+
2231
+
2213
2232
  def write_rollback_record(
2214
2233
  *,
2215
2234
  root: Path,
@@ -2237,11 +2256,8 @@ def write_rollback_record(
2237
2256
  "target_path": str(settings_path),
2238
2257
  "backup_path": str(backup_path) if backup_path else None,
2239
2258
  "original_existed": original_existed,
2240
- "restore": (
2241
- f"cp {shlex.quote(str(backup_path))} {shlex.quote(str(settings_path))}"
2242
- if backup_path
2243
- else f"rm -f {shlex.quote(str(settings_path))}"
2244
- ),
2259
+ "restore": rollback_restore_guidance(settings_path, backup_path, original_existed),
2260
+ "restore_requires_no_follow": True,
2245
2261
  }
2246
2262
  atomic_write(rollback_path, json.dumps(record, indent=2, sort_keys=True) + "\n", 0o600)
2247
2263
  return rollback_id, rollback_path