@ictechgy/context-guard 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -49,9 +49,17 @@ PRIVATE_KEY_END_RE = re.compile(
49
49
  AUTH_HEADER_RE = re.compile(
50
50
  r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
51
51
  )
52
+ COOKIE_HEADER_RE = re.compile(
53
+ r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Set-)?Cookie\s*:\s*).+$"
54
+ )
55
+ SESSION_SECRET_KEY = (
56
+ r"(?:session(?:[_-]?(?:id|token))?|sessionid|sid|jsessionid|"
57
+ r"csrf(?:[_-]?token)?|xsrf(?:[_-]?token)?)"
58
+ )
52
59
  SECRET_KEY = (
53
60
  r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
54
61
  r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
62
+ rf"|{SESSION_SECRET_KEY}"
55
63
  r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
56
64
  r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
57
65
  )
@@ -61,11 +69,48 @@ INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
61
69
  rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
62
70
  rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
63
71
  )
72
+ CODE_IDENTIFIER = r"[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)*"
73
+ CALL_ARGUMENT_CHUNK = r"(?:[^()\"'\n;]+|\"(?:\\.|[^\"\\])*\"|'(?:\\.|[^'\\])*'|\([^()]*\))*"
74
+ INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE = re.compile(
75
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
76
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
77
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
78
+ rf"(?P<value>(?![\"']){CODE_IDENTIFIER}\({CALL_ARGUMENT_CHUNK}\))"
79
+ )
80
+ SECRET_IDENTIFIER_PART = (
81
+ r"(?:[A-Za-z_$][A-Za-z0-9_$]*(?:api_?key|apikey|token|secret|password|passwd|pwd|"
82
+ r"private_?key|access_?key|client_?secret|sessionid|session_id|session_token|"
83
+ r"csrf_token|xsrf_token)[A-Za-z0-9_$]*|session|sid|csrf|xsrf)"
84
+ )
85
+ FALLBACK_SECRET_OPERAND = rf"(?:[A-Za-z_$][A-Za-z0-9_$]*\.)*{SECRET_IDENTIFIER_PART}"
86
+ INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE = re.compile(
87
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
88
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
89
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
90
+ rf"(?P<value>(?![\"']|\[REDACTED\])"
91
+ rf"[^;\n]*?(?:\bor\b|\|\||\?\?|\belse\b|\?[^:\n;]*:)\s*"
92
+ rf"(?:[\"'](?:\\.|[^\"'\\])*[\"']|{FALLBACK_SECRET_OPERAND})[^;\n]*)"
93
+ )
94
+ INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE = re.compile(
95
+ rf"(?i)(?P<lead>^|[\s;{{\[,])"
96
+ rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
97
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
98
+ rf"(?P<value>(?![\"']|\[REDACTED\])"
99
+ rf"[^\s,;}}\]]*(?:\([^;\n]*?\)|\{{[^;\n]*?\}}|\[[^;\n]*?\])[^\s,;}}\]]*)"
100
+ )
64
101
  INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
65
102
  rf"(?i)(?P<lead>^|[\s;{{\[,])"
66
103
  rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
67
104
  rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
68
- rf"(?P<value>[^\s,;}}\]]+)"
105
+ rf"(?P<value>(?![\"']|\[REDACTED\])[^\s,;}}\]]+)"
106
+ )
107
+ UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
108
+ rf"(?i)(?:^|[\s;{{\[,])"
109
+ rf"(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
110
+ rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<value>(?![\"']).*)$"
111
+ )
112
+ CONTINUATION_OPERATOR_RE = re.compile(
113
+ r"(?i)(?:\\|\|\||&&|\?\?|[+*/%&|^?,]|\?|:|\bor\b|\band\b|\belse\b)\s*(?://.*|#.*)?$"
69
114
  )
70
115
  URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
71
116
  URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
@@ -80,6 +125,43 @@ SAFE_UNQUOTED_VALUES = {
80
125
  "undefined",
81
126
  }
82
127
  IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
128
+ SAFE_ENV_LOOKUP_CALL_RE = re.compile(r"^(?:os\.getenv|os\.environ\.get)\(\s*[\"'][A-Za-z0-9_.-]{1,80}[\"']\s*\)$")
129
+ SAFE_RE_COMPILE_CALL_RE = re.compile(r"^re\.compile\([^;\n]*\)$")
130
+ SAFE_CODE_EXPRESSION_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\(\s*(?:{CODE_IDENTIFIER}(?:\s*,\s*{CODE_IDENTIFIER})*)?\s*\)$")
131
+ GETTER_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\.get\(\s*[\"'](?P<key>[A-Za-z0-9_.-]{{1,80}})[\"']\s*\)$")
132
+ CAMEL_ACRONYM_BOUNDARY_RE = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
133
+ CAMEL_WORD_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
134
+ SAFE_GETTER_KEY_NAMES = {
135
+ "access_key",
136
+ "access_token",
137
+ "api_key",
138
+ "apikey",
139
+ "auth",
140
+ "authorization",
141
+ "aws_access_key_id",
142
+ "aws_secret_access_key",
143
+ "aws_session_token",
144
+ "azure_client_secret",
145
+ "client_id",
146
+ "client_secret",
147
+ "cookie",
148
+ "credential",
149
+ "credentials",
150
+ "csrf",
151
+ "google_application_credentials",
152
+ "jwt",
153
+ "password",
154
+ "passwd",
155
+ "private_key",
156
+ "pwd",
157
+ "refresh_token",
158
+ "secret",
159
+ "session",
160
+ "session_id",
161
+ "sessionid",
162
+ "sid",
163
+ "token",
164
+ }
83
165
  INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
84
166
  (re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
85
167
  (re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
@@ -171,20 +253,33 @@ def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
171
253
  return body[:keep] + marker + newline, True
172
254
 
173
255
 
256
+ def normalize_getter_key(key: str) -> str:
257
+ key = CAMEL_ACRONYM_BOUNDARY_RE.sub("_", key)
258
+ key = CAMEL_WORD_BOUNDARY_RE.sub("_", key)
259
+ key = re.sub(r"[_.-]+", "_", key)
260
+ return re.sub(r"_+", "_", key).strip("_").lower()
261
+
262
+
263
+ def is_safe_getter_key(key: str) -> bool:
264
+ return normalize_getter_key(key) in SAFE_GETTER_KEY_NAMES
265
+
266
+
174
267
  def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
175
268
  value = match.group("value").strip()
269
+ prefix = match.group("prefix")
176
270
  if not value:
177
271
  return False
178
272
  if value.lower() in SAFE_UNQUOTED_VALUES:
179
273
  return False
180
274
  if IDENTIFIER_CHAIN_RE.match(value):
181
275
  return False
182
- end = match.end("value")
183
- if end < len(line) and line[end] in "([{":
184
- # Likely a function call or expression (`api_key = os.getenv(...)`);
185
- # preserve it so Claude can still reason about code flow.
276
+ if SAFE_ENV_LOOKUP_CALL_RE.match(value) or SAFE_RE_COMPILE_CALL_RE.match(value):
186
277
  return False
187
- if any(ch in value for ch in "()[]{}"):
278
+ getter_match = GETTER_CALL_RE.match(value)
279
+ if re.search(r"\s[:=]\s*$", prefix) and (
280
+ SAFE_CODE_EXPRESSION_CALL_RE.match(value)
281
+ or (getter_match is not None and is_safe_getter_key(getter_match.group("key")))
282
+ ):
188
283
  return False
189
284
  return True
190
285
 
@@ -218,6 +313,9 @@ def redact_secret_assignments(line: str) -> tuple[str, bool]:
218
313
  return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
219
314
 
220
315
  line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
316
+ line = INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
317
+ line = INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
318
+ line = INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
221
319
  line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
222
320
  return line, redacted
223
321
 
@@ -257,6 +355,54 @@ def detect_multiline_secret_assignment(line: str) -> str | None:
257
355
  return None
258
356
 
259
357
 
358
+ def expression_bracket_delta(text: str) -> int:
359
+ delta = 0
360
+ quote: str | None = None
361
+ escaped = False
362
+ for char in text:
363
+ if quote is not None:
364
+ if escaped:
365
+ escaped = False
366
+ elif char == "\\":
367
+ escaped = True
368
+ elif char == quote:
369
+ quote = None
370
+ continue
371
+ if char in {"'", '"'}:
372
+ quote = char
373
+ elif char in "([{":
374
+ delta += 1
375
+ elif char in ")}]":
376
+ delta -= 1
377
+ return delta
378
+
379
+
380
+ def ends_with_continuation_operator(text: str) -> bool:
381
+ return bool(CONTINUATION_OPERATOR_RE.search(text.rstrip()))
382
+
383
+
384
+ def detect_multiline_secret_expression(line: str) -> int | None:
385
+ marker = UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE.search(line)
386
+ if marker is None:
387
+ return None
388
+ value = marker.group("value").strip()
389
+ if not value:
390
+ return 0
391
+ delta = expression_bracket_delta(value)
392
+ if delta > 0:
393
+ return delta
394
+ if ends_with_continuation_operator(value):
395
+ return max(delta, 0)
396
+ return None
397
+
398
+
399
+ def update_multiline_secret_expression_state(line: str, depth: int) -> int | None:
400
+ next_depth = max(0, depth + expression_bracket_delta(line))
401
+ if next_depth == 0 and not ends_with_continuation_operator(line):
402
+ return None
403
+ return next_depth
404
+
405
+
260
406
  def private_key_state_after_line(line: str) -> bool | None:
261
407
  """Return updated private-key state for a line, or None when no marker appears."""
262
408
  if PRIVATE_KEY_BEGIN_RE.search(line):
@@ -277,6 +423,7 @@ class LineSanitizer:
277
423
  self.show_paths = show_paths
278
424
  self.in_private_key_block = False
279
425
  self.multiline_secret_quote: str | None = None
426
+ self.multiline_secret_expression_depth: int | None = None
280
427
  self.redactions = 0
281
428
 
282
429
  def sanitize(self, raw_line: str) -> tuple[str, bool]:
@@ -309,6 +456,12 @@ class LineSanitizer:
309
456
  self.in_private_key_block = False
310
457
  return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
311
458
 
459
+ if self.multiline_secret_expression_depth is not None:
460
+ self.multiline_secret_expression_depth = update_multiline_secret_expression_state(
461
+ line, self.multiline_secret_expression_depth
462
+ )
463
+ return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
464
+
312
465
  multiline_quote = detect_multiline_secret_assignment(line)
313
466
  if multiline_quote is not None:
314
467
  self.multiline_secret_quote = multiline_quote
@@ -323,11 +476,21 @@ class LineSanitizer:
323
476
  self.in_private_key_block = True
324
477
  return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
325
478
 
479
+ expression_depth = detect_multiline_secret_expression(line)
480
+ if expression_depth is not None:
481
+ self.multiline_secret_expression_depth = expression_depth
482
+ return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
483
+
326
484
  new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
327
485
  if count:
328
486
  redacted = True
329
487
  line = new_line
330
488
 
489
+ new_line, count = COOKIE_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
490
+ if count:
491
+ redacted = True
492
+ line = new_line
493
+
331
494
  line, assignment_redacted = redact_secret_assignments(line)
332
495
  if assignment_redacted:
333
496
  redacted = True
@@ -2210,6 +2210,25 @@ def backup_existing(path: Path) -> Path | None:
2210
2210
  return backup
2211
2211
 
2212
2212
 
2213
+ def rollback_restore_guidance(settings_path: Path, backup_path: Path | None, original_existed: bool) -> str:
2214
+ if backup_path is not None:
2215
+ return (
2216
+ "Restore only with a no-follow, symlink-safe copy that opens the backup and target parent "
2217
+ "without following links, then atomically replaces the target; do not use generic shell "
2218
+ f"copy/delete commands on this mutable target. Backup: {backup_path}. Target: {settings_path}."
2219
+ )
2220
+ if original_existed:
2221
+ return (
2222
+ "No backup path was recorded; inspect the target with no-follow file operations before any "
2223
+ f"manual recovery. Do not use generic shell copy/delete commands on this mutable target: {settings_path}."
2224
+ )
2225
+ return (
2226
+ "The target did not exist before setup. If cleanup is required, verify the target and every parent "
2227
+ "without following symlinks and remove only the verified regular file; do not use generic shell "
2228
+ f"delete commands on this mutable target: {settings_path}."
2229
+ )
2230
+
2231
+
2213
2232
  def write_rollback_record(
2214
2233
  *,
2215
2234
  root: Path,
@@ -2237,11 +2256,8 @@ def write_rollback_record(
2237
2256
  "target_path": str(settings_path),
2238
2257
  "backup_path": str(backup_path) if backup_path else None,
2239
2258
  "original_existed": original_existed,
2240
- "restore": (
2241
- f"cp {shlex.quote(str(backup_path))} {shlex.quote(str(settings_path))}"
2242
- if backup_path
2243
- else f"rm -f {shlex.quote(str(settings_path))}"
2244
- ),
2259
+ "restore": rollback_restore_guidance(settings_path, backup_path, original_existed),
2260
+ "restore_requires_no_follow": True,
2245
2261
  }
2246
2262
  atomic_write(rollback_path, json.dumps(record, indent=2, sort_keys=True) + "\n", 0o600)
2247
2263
  return rollback_id, rollback_path
@@ -87,6 +87,8 @@ class Candidate:
87
87
  index: int
88
88
  score: float = 0.0
89
89
  rank: int = 0
90
+ schema_bytes: int = 0
91
+ parameter_terms: frozenset[str] | None = None
90
92
 
91
93
 
92
94
  def fail(message: str) -> NoReturn:
@@ -276,7 +278,15 @@ def tool_schema_from_dict(raw: dict[str, Any], *, fallback_name: str | None = No
276
278
  schema["description"] = description
277
279
  if server and "server" not in schema:
278
280
  schema["server"] = server
279
- return Candidate(name=name, server=cap_text(server, MAX_LABEL_CHARS) if server else None, description=description, schema=schema, index=index)
281
+ return Candidate(
282
+ name=name,
283
+ server=cap_text(server, MAX_LABEL_CHARS) if server else None,
284
+ description=description,
285
+ schema=schema,
286
+ index=index,
287
+ schema_bytes=byte_len_json(schema),
288
+ parameter_terms=frozenset(terms(" ".join(collect_parameter_text(schema)))),
289
+ )
280
290
 
281
291
 
282
292
  def normalize_catalog(raw: Any) -> list[Candidate]:
@@ -362,7 +372,11 @@ def score_candidate(candidate: Candidate, query_terms: set[str]) -> float:
362
372
  return 0.0
363
373
  name_terms = terms(candidate.name)
364
374
  desc_terms = terms(candidate.description)
365
- parameter_terms = terms(" ".join(collect_parameter_text(candidate.schema)))
375
+ parameter_terms = (
376
+ set(candidate.parameter_terms)
377
+ if candidate.parameter_terms is not None
378
+ else terms(" ".join(collect_parameter_text(candidate.schema)))
379
+ )
366
380
  score = 0.0
367
381
  score += 4.0 * len(query_terms & name_terms)
368
382
  score += 1.5 * len(query_terms & desc_terms)
@@ -379,14 +393,38 @@ def rank_candidates(candidates: list[Candidate], query: str) -> list[Candidate]:
379
393
  query_terms = terms(query)
380
394
  scored: list[Candidate] = []
381
395
  for cand in candidates:
382
- scored.append(Candidate(cand.name, cand.server, cand.description, cand.schema, cand.index, score_candidate(cand, query_terms), 0))
396
+ scored.append(Candidate(
397
+ cand.name,
398
+ cand.server,
399
+ cand.description,
400
+ cand.schema,
401
+ cand.index,
402
+ score_candidate(cand, query_terms),
403
+ 0,
404
+ schema_bytes=cand.schema_bytes,
405
+ parameter_terms=cand.parameter_terms,
406
+ ))
383
407
  scored.sort(key=lambda item: (-item.score, item.index))
384
408
  ranked: list[Candidate] = []
385
409
  for rank, cand in enumerate(scored, start=1):
386
- ranked.append(Candidate(cand.name, cand.server, cand.description, cand.schema, cand.index, cand.score, rank))
410
+ ranked.append(Candidate(
411
+ cand.name,
412
+ cand.server,
413
+ cand.description,
414
+ cand.schema,
415
+ cand.index,
416
+ cand.score,
417
+ rank,
418
+ schema_bytes=cand.schema_bytes,
419
+ parameter_terms=cand.parameter_terms,
420
+ ))
387
421
  return ranked
388
422
 
389
423
 
424
+ def candidate_schema_bytes(cand: Candidate) -> int:
425
+ return cand.schema_bytes if cand.schema_bytes > 0 else byte_len_json(cand.schema)
426
+
427
+
390
428
  def normalized_link_target(parent: Path, raw_target: str) -> Path:
391
429
  target = Path(raw_target)
392
430
  if not target.is_absolute():
@@ -707,7 +745,7 @@ def build_payload(receipt_id: str, ranked: list[Candidate], query: str, redactio
707
745
  "description": cand.description,
708
746
  "score": cand.score,
709
747
  "rank": cand.rank,
710
- "schema_bytes": byte_len_json(cand.schema),
748
+ "schema_bytes": candidate_schema_bytes(cand),
711
749
  "schema": cand.schema,
712
750
  }
713
751
  for cand in ranked
@@ -739,7 +777,7 @@ def retrieval_command(receipt_id: str, *, store_dir: str, tool_name: str | None
739
777
 
740
778
 
741
779
  def selected_tool_record(cand: Candidate, receipt_id: str, budget_left: int, *, store_dir: str) -> tuple[dict[str, Any], int]:
742
- schema_size = byte_len_json(cand.schema)
780
+ schema_size = candidate_schema_bytes(cand)
743
781
  record: dict[str, Any] = {
744
782
  "name": cand.name,
745
783
  "server": cand.server,
@@ -765,7 +803,7 @@ def deferred_tool_record(cand: Candidate, receipt_id: str, *, store_dir: str) ->
765
803
  "score": cand.score,
766
804
  "rank": cand.rank,
767
805
  "description": cand.description,
768
- "schema_bytes": byte_len_json(cand.schema),
806
+ "schema_bytes": candidate_schema_bytes(cand),
769
807
  "reason": "deferred_after_core_top",
770
808
  "retrieval": retrieval_command(receipt_id, store_dir=store_dir, tool_name=cand.name),
771
809
  }
@@ -1008,9 +1046,9 @@ def defer_report(args: argparse.Namespace) -> str:
1008
1046
  store_dir=args.store_dir,
1009
1047
  namespace_top=namespace_top,
1010
1048
  )
1011
- all_schema_bytes = sum(byte_len_json(cand.schema) for cand in ranked)
1012
- listed_deferred_schema_bytes = sum(byte_len_json(cand.schema) for cand in deferred_candidates)
1013
- total_deferred_schema_bytes = sum(byte_len_json(cand.schema) for cand in ranked[core_top:])
1049
+ all_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked)
1050
+ listed_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in deferred_candidates)
1051
+ total_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked[core_top:])
1014
1052
  tool_stub_report_bytes = byte_len_json(core_tools) + byte_len_json(deferred_tools)
1015
1053
  all_schema_tokens = proxy_tokens(all_schema_bytes)
1016
1054
  inline_core_schema_tokens = proxy_tokens(core_schema_bytes)
@@ -20,6 +20,7 @@ import signal
20
20
  import stat
21
21
  import subprocess
22
22
  import sys
23
+ import tempfile
23
24
  import threading
24
25
  import time
25
26
  import types
@@ -398,23 +399,75 @@ def store_sanitized_artifact_receipt(
398
399
  return receipt
399
400
 
400
401
 
401
- def capture_sanitized_artifact_line(
402
- *,
403
- capture_enabled: bool,
404
- sanitized_line: str,
405
- artifact_lines: list[str],
406
- capture_bytes: int,
407
- capture_overflow: bool,
408
- max_bytes: int,
409
- ) -> tuple[int, bool]:
410
- if not capture_enabled or capture_overflow:
411
- return capture_bytes, capture_overflow
412
- source_bytes = len(sanitized_line.encode("utf-8", errors="replace"))
413
- if capture_bytes + source_bytes <= max_bytes:
414
- artifact_lines.append(sanitized_line)
415
- return capture_bytes + source_bytes, False
416
- artifact_lines.clear()
417
- return capture_bytes, True
402
+ class SanitizedArtifactCapture:
403
+ def __init__(self, *, enabled: bool, max_bytes: int) -> None:
404
+ self.enabled = enabled
405
+ self.max_bytes = max_bytes
406
+ self.bytes = 0
407
+ self.overflow = False
408
+ self.error: str | None = None
409
+ self._file: BinaryIO | None = None
410
+
411
+ def _ensure_file(self) -> BinaryIO | None:
412
+ if self._file is not None:
413
+ return self._file
414
+ try:
415
+ self._file = tempfile.TemporaryFile("w+b")
416
+ except OSError as exc:
417
+ self._record_error(exc)
418
+ return None
419
+ return self._file
420
+
421
+ def _record_error(self, exc: OSError) -> None:
422
+ if self.error is None:
423
+ self.error = f"{exc.__class__.__name__}: {exc}"
424
+
425
+ def add(self, sanitized_line: str) -> None:
426
+ if not self.enabled or self.overflow or self.error:
427
+ return
428
+ encoded = sanitized_line.encode("utf-8", errors="replace")
429
+ source_bytes = len(encoded)
430
+ if self.bytes + source_bytes > self.max_bytes:
431
+ self.overflow = True
432
+ self.close()
433
+ return
434
+ target = self._ensure_file()
435
+ if target is None:
436
+ return
437
+ try:
438
+ target.write(encoded)
439
+ except OSError as exc:
440
+ self._record_error(exc)
441
+ self.close()
442
+ return
443
+ self.bytes += source_bytes
444
+
445
+ def text(self) -> str:
446
+ if self._file is None:
447
+ return ""
448
+ try:
449
+ self._file.flush()
450
+ self._file.seek(0)
451
+ return self._file.read().decode("utf-8", errors="replace")
452
+ except OSError as exc:
453
+ self._record_error(exc)
454
+ self.close()
455
+ return ""
456
+
457
+ def close(self) -> None:
458
+ target = self._file
459
+ self._file = None
460
+ if target is not None:
461
+ try:
462
+ target.close()
463
+ except OSError as exc:
464
+ self._record_error(exc)
465
+
466
+ def __enter__(self) -> "SanitizedArtifactCapture":
467
+ return self
468
+
469
+ def __exit__(self, *exc: object) -> None:
470
+ self.close()
418
471
 
419
472
 
420
473
  def unique_keep_order(lines: Iterable[str]) -> list[str]:
@@ -1512,11 +1565,10 @@ def main() -> int:
1512
1565
  runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
1513
1566
  duplicate_tracker = DuplicateLineTracker()
1514
1567
  redacted_lines = 0
1515
- artifact_lines: list[str] = []
1516
- artifact_capture_bytes = 0
1517
- artifact_capture_overflow = False
1568
+ artifact_capture = SanitizedArtifactCapture(enabled=args.artifact_receipt, max_bytes=args.artifact_max_bytes)
1518
1569
 
1519
1570
  if proc.stdout is None:
1571
+ artifact_capture.close()
1520
1572
  print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
1521
1573
  return 1
1522
1574
  command_stream = TimedCommandStream(
@@ -1532,14 +1584,7 @@ def main() -> int:
1532
1584
  visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1533
1585
  if redacted:
1534
1586
  redacted_lines += 1
1535
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1536
- capture_enabled=args.artifact_receipt,
1537
- sanitized_line=visible_source,
1538
- artifact_lines=artifact_lines,
1539
- capture_bytes=artifact_capture_bytes,
1540
- capture_overflow=artifact_capture_overflow,
1541
- max_bytes=args.artifact_max_bytes,
1542
- )
1587
+ artifact_capture.add(visible_source)
1543
1588
  visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1544
1589
  any_line_capped = any_line_capped or line_capped
1545
1590
  visible_chars += len(visible_line)
@@ -1562,14 +1607,7 @@ def main() -> int:
1562
1607
  visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1563
1608
  if redacted:
1564
1609
  redacted_lines += 1
1565
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1566
- capture_enabled=args.artifact_receipt,
1567
- sanitized_line=visible_source,
1568
- artifact_lines=artifact_lines,
1569
- capture_bytes=artifact_capture_bytes,
1570
- capture_overflow=artifact_capture_overflow,
1571
- max_bytes=args.artifact_max_bytes,
1572
- )
1610
+ artifact_capture.add(visible_source)
1573
1611
  visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1574
1612
  any_line_capped = any_line_capped or line_capped
1575
1613
  visible_chars += len(visible_line)
@@ -1602,32 +1640,49 @@ def main() -> int:
1602
1640
  duplicate_line_groups=duplicate_tracker.as_list(),
1603
1641
  )
1604
1642
  if args.artifact_receipt:
1605
- if artifact_capture_overflow:
1643
+ if artifact_capture.overflow:
1606
1644
  payload["artifact_receipt"] = {
1607
1645
  "stored": False,
1608
1646
  "error": "sanitized_output_exceeds_artifact_max_bytes",
1609
1647
  "max_bytes": args.artifact_max_bytes,
1610
1648
  "exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
1611
1649
  }
1650
+ elif artifact_capture.error:
1651
+ payload["artifact_receipt"] = {
1652
+ "stored": False,
1653
+ "error": "artifact_receipt_capture_unavailable",
1654
+ "reason": artifact_capture.error,
1655
+ "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
1656
+ }
1612
1657
  else:
1613
- try:
1614
- payload["artifact_receipt"] = store_sanitized_artifact_receipt(
1615
- sanitized_text="".join(artifact_lines),
1616
- command=command,
1617
- args=args,
1618
- line_sanitizer=line_sanitizer,
1619
- redacted_lines=redacted_lines,
1620
- )
1621
- except UnsafeAdjacentModuleError as exc:
1622
- print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
1623
- return 2
1624
- except Exception as exc:
1658
+ sanitized_artifact_text = artifact_capture.text()
1659
+ if artifact_capture.error:
1625
1660
  payload["artifact_receipt"] = {
1626
1661
  "stored": False,
1627
- "error": "artifact_receipt_unavailable",
1628
- "reason": f"{exc.__class__.__name__}: {exc}",
1629
- "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
1662
+ "error": "artifact_receipt_capture_unavailable",
1663
+ "reason": artifact_capture.error,
1664
+ "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
1630
1665
  }
1666
+ else:
1667
+ try:
1668
+ payload["artifact_receipt"] = store_sanitized_artifact_receipt(
1669
+ sanitized_text=sanitized_artifact_text,
1670
+ command=command,
1671
+ args=args,
1672
+ line_sanitizer=line_sanitizer,
1673
+ redacted_lines=redacted_lines,
1674
+ )
1675
+ except UnsafeAdjacentModuleError as exc:
1676
+ artifact_capture.close()
1677
+ print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
1678
+ return 2
1679
+ except Exception as exc:
1680
+ payload["artifact_receipt"] = {
1681
+ "stored": False,
1682
+ "error": "artifact_receipt_unavailable",
1683
+ "reason": f"{exc.__class__.__name__}: {exc}",
1684
+ "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
1685
+ }
1631
1686
  artifact_receipt = payload.get("artifact_receipt")
1632
1687
  if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
1633
1688
  next_queries = payload.setdefault("next_queries", [])
@@ -1642,6 +1697,7 @@ def main() -> int:
1642
1697
  sys.stdout.write(render_digest_json(payload, args.max_chars))
1643
1698
  else:
1644
1699
  sys.stdout.write(render_digest_markdown(payload, args.max_chars))
1700
+ artifact_capture.close()
1645
1701
  return rc
1646
1702
 
1647
1703
  if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
@@ -1689,6 +1745,7 @@ def main() -> int:
1689
1745
  output += "[context-guard-kit] final summary was capped by --max-chars.\n"
1690
1746
  sys.stdout.write(output)
1691
1747
 
1748
+ artifact_capture.close()
1692
1749
  return rc
1693
1750
 
1694
1751