@ictechgy/context-guard 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.ko.md +46 -28
  3. package/README.md +42 -33
  4. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  5. package/docs/benchmark-workflow-examples.md +3 -0
  6. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  7. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  8. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  9. package/docs/experimental-benchmark-fixtures.md +24 -7
  10. package/package.json +2 -1
  11. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  12. package/plugins/context-guard/README.ko.md +14 -11
  13. package/plugins/context-guard/README.md +15 -14
  14. package/plugins/context-guard/bin/context-guard +48 -17
  15. package/plugins/context-guard/bin/context-guard-artifact +342 -33
  16. package/plugins/context-guard/bin/context-guard-audit +36 -5
  17. package/plugins/context-guard/bin/context-guard-bench +1675 -44
  18. package/plugins/context-guard/bin/context-guard-cache-score +347 -35
  19. package/plugins/context-guard/bin/context-guard-compress +89 -27
  20. package/plugins/context-guard/bin/context-guard-cost +7 -2
  21. package/plugins/context-guard/bin/context-guard-experiments +364 -8
  22. package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
  23. package/plugins/context-guard/bin/context-guard-filter +88 -18
  24. package/plugins/context-guard/bin/context-guard-pack +329 -19
  25. package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
  26. package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
  27. package/plugins/context-guard/bin/context-guard-setup +21 -5
  28. package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
  29. package/plugins/context-guard/bin/context-guard-trim-output +394 -90
  30. package/plugins/context-guard/brief/README.md +5 -5
  31. package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
  32. package/plugins/context-guard/lib/context_guard_commands.py +217 -190
@@ -7,10 +7,9 @@ lines into the conversation while preserving the lines most likely to be useful.
7
7
  from __future__ import annotations
8
8
 
9
9
  import argparse
10
+ import codecs
10
11
  import collections
11
12
  import hashlib
12
- import importlib.machinery
13
- import importlib.util
14
13
  import json
15
14
  import os
16
15
  from pathlib import Path, PurePosixPath
@@ -18,11 +17,14 @@ import queue
18
17
  import re
19
18
  import shlex
20
19
  import signal
20
+ import stat
21
21
  import subprocess
22
22
  import sys
23
+ import tempfile
23
24
  import threading
24
25
  import time
25
- from typing import Iterable, Iterator
26
+ import types
27
+ from typing import BinaryIO, Iterable, Iterator
26
28
 
27
29
  MAX_SUMMARY_ITEM_CHARS = 500
28
30
  MAX_LINES_LIMIT = 5_000
@@ -35,6 +37,10 @@ MAX_TIMEOUT_SECONDS = 86_400
35
37
  TIMEOUT_EXIT_CODE = 124
36
38
  DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES = 10_000_000
37
39
  MAX_ARTIFACT_RECEIPT_MAX_BYTES = 100_000_000
40
+ COMMAND_READ_CHUNK_BYTES = 64 * 1024
41
+ COMMAND_MAX_UNTERMINATED_LINE_CHARS = 4_096
42
+ RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS = 1_024
43
+ MAX_DYNAMIC_SIBLING_MODULE_BYTES = 2_000_000
38
44
 
39
45
 
40
46
  def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
@@ -132,6 +138,10 @@ def anonymize_absolute_paths(text: str) -> str:
132
138
  return ABSOLUTE_PATH_RE.sub(repl, text)
133
139
 
134
140
 
141
+ class UnsafeAdjacentModuleError(RuntimeError):
142
+ """Adjacent helper exists but cannot be trusted for dynamic loading."""
143
+
144
+
135
145
  class FallbackLineSanitizer:
136
146
  def __init__(self, *, show_paths: bool = False, diagnostic: str | None = None) -> None:
137
147
  self.show_paths = show_paths
@@ -159,24 +169,82 @@ class FallbackLineSanitizer:
159
169
  return line, redacted
160
170
 
161
171
 
172
+ def no_follow_file_flags() -> int:
173
+ if not hasattr(os, "O_NOFOLLOW"):
174
+ raise UnsafeAdjacentModuleError("O_NOFOLLOW is required for adjacent helper loads")
175
+ flags = os.O_RDONLY | os.O_NOFOLLOW
176
+ if hasattr(os, "O_CLOEXEC"):
177
+ flags |= os.O_CLOEXEC
178
+ return flags
179
+
180
+
181
+ def no_follow_dir_flags() -> int:
182
+ flags = no_follow_file_flags()
183
+ if hasattr(os, "O_DIRECTORY"):
184
+ flags |= os.O_DIRECTORY
185
+ return flags
186
+
187
+
188
+ def read_adjacent_module_source(script_dir: Path, name: str, *, max_bytes: int) -> str | None:
189
+ if name in {"", ".", ".."} or "/" in name or os.sep in name:
190
+ raise RuntimeError(f"invalid adjacent helper name: {name!r}")
191
+ try:
192
+ dir_fd = os.open(str(script_dir), no_follow_dir_flags())
193
+ except OSError as exc:
194
+ raise UnsafeAdjacentModuleError(f"could not inspect helper directory: {exc}") from exc
195
+ try:
196
+ try:
197
+ fd = os.open(name, no_follow_file_flags(), dir_fd=dir_fd)
198
+ except FileNotFoundError:
199
+ return None
200
+ except OSError as exc:
201
+ raise UnsafeAdjacentModuleError(f"{name} could not be opened without following symlinks: {exc}") from exc
202
+ try:
203
+ st = os.fstat(fd)
204
+ if not stat.S_ISREG(st.st_mode):
205
+ raise UnsafeAdjacentModuleError(f"{name} is not a regular helper file")
206
+ if st.st_size > max_bytes:
207
+ raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: {st.st_size} > {max_bytes}")
208
+ data = os.read(fd, max_bytes + 1)
209
+ finally:
210
+ os.close(fd)
211
+ finally:
212
+ os.close(dir_fd)
213
+ if len(data) > max_bytes:
214
+ raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: > {max_bytes}")
215
+ return data.decode("utf-8", errors="replace")
216
+
217
+
218
+ def load_adjacent_python_module(script_dir: Path, name: str, *, module_prefix: str) -> object | None:
219
+ source = read_adjacent_module_source(script_dir, name, max_bytes=MAX_DYNAMIC_SIBLING_MODULE_BYTES)
220
+ if source is None:
221
+ return None
222
+ module_name = f"{module_prefix}_{os.getpid()}_{hashlib.sha256(name.encode('utf-8')).hexdigest()[:12]}"
223
+ module = types.ModuleType(module_name)
224
+ module.__file__ = str(script_dir / name)
225
+ module.__package__ = ""
226
+ exec(compile(source, str(script_dir / name), "exec"), module.__dict__)
227
+ return module
228
+
229
+
162
230
  def load_line_sanitizer(show_paths: bool) -> object:
163
231
  """Reuse the stronger sanitizer when it is shipped next to this wrapper."""
164
- script_dir = os.path.dirname(os.path.abspath(__file__))
232
+ script_dir = Path(__file__).resolve().parent
165
233
  load_errors: list[str] = []
166
234
  for name in ("sanitize_output.py", "context-guard-sanitize-output"):
167
- candidate = os.path.join(script_dir, name)
168
- if not os.path.exists(candidate):
169
- continue
170
235
  try:
171
- loader = importlib.machinery.SourceFileLoader(f"_claude_token_sanitize_{os.getpid()}", candidate)
172
- spec = importlib.util.spec_from_loader(loader.name, loader)
173
- if spec is None:
236
+ module = load_adjacent_python_module(
237
+ script_dir,
238
+ name,
239
+ module_prefix="_claude_token_sanitize",
240
+ )
241
+ if module is None:
174
242
  continue
175
- module = importlib.util.module_from_spec(spec)
176
- loader.exec_module(module)
177
243
  return module.LineSanitizer(show_paths=show_paths)
244
+ except UnsafeAdjacentModuleError:
245
+ raise
178
246
  except Exception as exc:
179
- load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
247
+ load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
180
248
  continue
181
249
  diagnostic = "; ".join(load_errors) if load_errors else "strong sanitizer not found next to trim wrapper"
182
250
  return FallbackLineSanitizer(show_paths=show_paths, diagnostic=diagnostic)
@@ -189,22 +257,22 @@ def load_artifact_store_module() -> object:
189
257
  wrapper must resolve both source-tree (`context_escrow.py`) and packaged
190
258
  (`context-guard-artifact`) names.
191
259
  """
192
- script_dir = os.path.dirname(os.path.abspath(__file__))
260
+ script_dir = Path(__file__).resolve().parent
193
261
  load_errors: list[str] = []
194
262
  for name in ("context_escrow.py", "context-guard-artifact", "claude-token-artifact"):
195
- candidate = os.path.join(script_dir, name)
196
- if not os.path.exists(candidate):
197
- continue
198
263
  try:
199
- loader = importlib.machinery.SourceFileLoader(f"_context_guard_artifact_{os.getpid()}", candidate)
200
- spec = importlib.util.spec_from_loader(loader.name, loader)
201
- if spec is None:
264
+ module = load_adjacent_python_module(
265
+ script_dir,
266
+ name,
267
+ module_prefix="_context_guard_artifact",
268
+ )
269
+ if module is None:
202
270
  continue
203
- module = importlib.util.module_from_spec(spec)
204
- loader.exec_module(module)
205
271
  return module
272
+ except UnsafeAdjacentModuleError:
273
+ raise
206
274
  except Exception as exc:
207
- load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
275
+ load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
208
276
  continue
209
277
  diagnostic = "; ".join(load_errors) if load_errors else "artifact store not found next to trim wrapper"
210
278
  raise RuntimeError(diagnostic)
@@ -272,7 +340,13 @@ def store_sanitized_artifact_receipt(
272
340
  "metadata_file": meta_path.name,
273
341
  "scope": "sanitized_full_output",
274
342
  },
275
- "digest": artifact.build_digest(sanitized_text, artifact_id=artifact_id, redacted_lines=redacted_lines),
343
+ "digest": artifact.build_digest(
344
+ sanitized_text,
345
+ artifact_id=artifact_id,
346
+ redacted_lines=redacted_lines,
347
+ raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
348
+ show_paths=bool(getattr(args, "show_paths", False)),
349
+ ),
276
350
  "retrieval": {
277
351
  "strategy": strategy,
278
352
  "deterministic": True,
@@ -282,13 +356,17 @@ def store_sanitized_artifact_receipt(
282
356
  content_type=content_type,
283
357
  strategy=strategy,
284
358
  total_lines=total_lines,
359
+ raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
360
+ show_paths=bool(getattr(args, "show_paths", False)),
285
361
  ),
286
362
  },
287
363
  }
288
364
  artifact.shrink_digest_for_metadata_cap(metadata)
289
365
  artifact.write_private_text(content_path, sanitized_text)
290
366
  artifact.write_private_text(meta_path, artifact.metadata_json_text(metadata))
291
- receipt = artifact.receipt_for(metadata)
367
+ raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
368
+ show_artifact_paths = bool(getattr(args, "show_paths", False))
369
+ receipt = artifact.receipt_for(metadata, raw_dir=raw_artifact_dir, show_paths=show_artifact_paths)
292
370
  query_line_cap = int(getattr(artifact, "MAX_QUERY_LINES", 5_000))
293
371
  query_char_cap = 1_000_000
294
372
  content_chars = len(sanitized_text)
@@ -301,21 +379,19 @@ def store_sanitized_artifact_receipt(
301
379
  "reason": "artifact query cap exceeded; use retrieval hints for exact slices",
302
380
  }
303
381
  if total_lines <= query_line_cap and content_chars <= query_char_cap:
304
- raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
305
- dir_flags = ""
306
- if raw_artifact_dir != ".context-guard/artifacts":
307
- dir_flags = f" --dir {shlex.quote(raw_artifact_dir)}"
308
382
  line_flags = ""
309
383
  if total_lines > 0:
310
384
  line_flags = f" --lines 1:{total_lines} --max-lines {max(1, total_lines)}"
385
+ prefix = artifact.artifact_dir_cli_prefix(raw_artifact_dir, show_paths=show_artifact_paths)
311
386
  exact_reexpand = {
312
387
  "available": True,
313
388
  "scope": "sanitized_full_output",
314
389
  "sha256": content_sha,
315
390
  "bytes": content_bytes,
316
391
  "lines": total_lines,
392
+ "exact": artifact.artifact_dir_cli_is_exact(raw_artifact_dir, show_paths=show_artifact_paths),
317
393
  "cli": (
318
- f"context-guard-artifact{dir_flags} get {artifact_id}{line_flags} "
394
+ f"{prefix} get {artifact_id}{line_flags} "
319
395
  f"--max-chars {max(1, content_chars)}"
320
396
  ),
321
397
  }
@@ -323,23 +399,75 @@ def store_sanitized_artifact_receipt(
323
399
  return receipt
324
400
 
325
401
 
326
- def capture_sanitized_artifact_line(
327
- *,
328
- capture_enabled: bool,
329
- sanitized_line: str,
330
- artifact_lines: list[str],
331
- capture_bytes: int,
332
- capture_overflow: bool,
333
- max_bytes: int,
334
- ) -> tuple[int, bool]:
335
- if not capture_enabled or capture_overflow:
336
- return capture_bytes, capture_overflow
337
- source_bytes = len(sanitized_line.encode("utf-8", errors="replace"))
338
- if capture_bytes + source_bytes <= max_bytes:
339
- artifact_lines.append(sanitized_line)
340
- return capture_bytes + source_bytes, False
341
- artifact_lines.clear()
342
- return capture_bytes, True
402
+ class SanitizedArtifactCapture:
403
+ def __init__(self, *, enabled: bool, max_bytes: int) -> None:
404
+ self.enabled = enabled
405
+ self.max_bytes = max_bytes
406
+ self.bytes = 0
407
+ self.overflow = False
408
+ self.error: str | None = None
409
+ self._file: BinaryIO | None = None
410
+
411
+ def _ensure_file(self) -> BinaryIO | None:
412
+ if self._file is not None:
413
+ return self._file
414
+ try:
415
+ self._file = tempfile.TemporaryFile("w+b")
416
+ except OSError as exc:
417
+ self._record_error(exc)
418
+ return None
419
+ return self._file
420
+
421
+ def _record_error(self, exc: OSError) -> None:
422
+ if self.error is None:
423
+ self.error = f"{exc.__class__.__name__}: {exc}"
424
+
425
+ def add(self, sanitized_line: str) -> None:
426
+ if not self.enabled or self.overflow or self.error:
427
+ return
428
+ encoded = sanitized_line.encode("utf-8", errors="replace")
429
+ source_bytes = len(encoded)
430
+ if self.bytes + source_bytes > self.max_bytes:
431
+ self.overflow = True
432
+ self.close()
433
+ return
434
+ target = self._ensure_file()
435
+ if target is None:
436
+ return
437
+ try:
438
+ target.write(encoded)
439
+ except OSError as exc:
440
+ self._record_error(exc)
441
+ self.close()
442
+ return
443
+ self.bytes += source_bytes
444
+
445
+ def text(self) -> str:
446
+ if self._file is None:
447
+ return ""
448
+ try:
449
+ self._file.flush()
450
+ self._file.seek(0)
451
+ return self._file.read().decode("utf-8", errors="replace")
452
+ except OSError as exc:
453
+ self._record_error(exc)
454
+ self.close()
455
+ return ""
456
+
457
+ def close(self) -> None:
458
+ target = self._file
459
+ self._file = None
460
+ if target is not None:
461
+ try:
462
+ target.close()
463
+ except OSError as exc:
464
+ self._record_error(exc)
465
+
466
+ def __enter__(self) -> "SanitizedArtifactCapture":
467
+ return self
468
+
469
+ def __exit__(self, *exc: object) -> None:
470
+ self.close()
343
471
 
344
472
 
345
473
  def unique_keep_order(lines: Iterable[str]) -> list[str]:
@@ -720,11 +848,33 @@ def build_digest_payload(
720
848
 
721
849
 
722
850
  def markdown_artifact_receipt_lines(artifact_receipt: dict[str, object]) -> list[str]:
851
+ sandbox = artifact_receipt.get("output_sandbox")
852
+ handle = None
853
+ rehydrate = None
854
+ if isinstance(sandbox, dict):
855
+ raw_handle = sandbox.get("handle")
856
+ if isinstance(raw_handle, str):
857
+ handle = raw_handle
858
+ rehydration = sandbox.get("rehydration")
859
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
860
+ if isinstance(commands, list):
861
+ for command in commands:
862
+ if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
863
+ rehydrate = command["cli"]
864
+ break
865
+ if rehydrate is None:
866
+ for command in commands:
867
+ if isinstance(command, dict) and isinstance(command.get("cli"), str):
868
+ rehydrate = command["cli"]
869
+ break
723
870
  lines = [
724
871
  "- artifact_receipt: "
725
872
  f"stored={str(artifact_receipt.get('stored')).lower()} "
726
- f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}\n"
873
+ f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}"
874
+ f"{(' handle=' + handle) if handle else ''}\n"
727
875
  ]
876
+ if rehydrate:
877
+ lines.append(f"- rehydrate: `{rehydrate}`\n")
728
878
  exact = artifact_receipt.get("exact_reexpand")
729
879
  if isinstance(exact, dict) and exact.get("cli"):
730
880
  lines.append(f"- exact_reexpand: `{exact.get('cli')}`\n")
@@ -742,12 +892,16 @@ def compact_markdown_artifact_receipt(payload: dict[str, object], max_chars: int
742
892
 
743
893
  artifact_id = artifact_receipt.get("artifact_id") or artifact_receipt.get("error")
744
894
  stored = str(artifact_receipt.get("stored")).lower()
895
+ sandbox = artifact_receipt.get("output_sandbox")
896
+ handle = sandbox.get("handle") if isinstance(sandbox, dict) and isinstance(sandbox.get("handle"), str) else None
745
897
  exact = artifact_receipt.get("exact_reexpand")
746
898
  exact_available = ""
747
899
  if isinstance(exact, dict) and "available" in exact:
748
900
  exact_available = f" exact_available={str(exact.get('available')).lower()}"
749
901
 
750
902
  candidates = [
903
+ f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}; use output_sandbox.rehydration for exact slices\n",
904
+ f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}\n",
751
905
  f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}; raise --max-chars for full exact_reexpand\n",
752
906
  f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}\n",
753
907
  f"- artifact_receipt: id={artifact_id}\n",
@@ -905,15 +1059,67 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
905
1059
  for key in ("scope", "bytes", "lines", "sha256")
906
1060
  if key in stored_output
907
1061
  }
1062
+ sandbox = artifact_receipt.get("output_sandbox")
1063
+ if isinstance(sandbox, dict):
1064
+ compact_sandbox: dict[str, object] = {
1065
+ key: sandbox[key]
1066
+ for key in ("schema_version", "mode", "handle", "artifact_id")
1067
+ if key in sandbox
1068
+ }
1069
+ rehydration = sandbox.get("rehydration")
1070
+ if isinstance(rehydration, dict):
1071
+ commands = rehydration.get("commands")
1072
+ if isinstance(commands, list):
1073
+ kept_commands = [
1074
+ command
1075
+ for command in commands
1076
+ if isinstance(command, dict) and isinstance(command.get("cli"), str)
1077
+ ][:2]
1078
+ compact_sandbox["rehydration"] = {
1079
+ "commands": kept_commands,
1080
+ "exact_commands": rehydration.get("exact_commands"),
1081
+ "dir_argument": rehydration.get("dir_argument"),
1082
+ }
1083
+ compact["output_sandbox"] = compact_sandbox
908
1084
  exact = artifact_receipt.get("exact_reexpand")
909
1085
  if include_exact_reexpand and isinstance(exact, dict):
910
1086
  compact["exact_reexpand"] = {
911
1087
  key: exact[key]
912
- for key in ("available", "scope", "sha256", "bytes", "lines", "cli", "reason")
1088
+ for key in ("available", "scope", "sha256", "bytes", "lines", "exact", "cli", "reason")
913
1089
  if key in exact
914
1090
  }
915
1091
  return compact
916
1092
 
1093
+ def tiny_artifact_receipt() -> dict[str, object] | None:
1094
+ artifact_receipt = payload.get("artifact_receipt")
1095
+ if not isinstance(artifact_receipt, dict):
1096
+ return None
1097
+ compact: dict[str, object] = {}
1098
+ for key in ("stored", "artifact_id", "error"):
1099
+ if key in artifact_receipt:
1100
+ compact[key] = artifact_receipt[key]
1101
+ sandbox = artifact_receipt.get("output_sandbox")
1102
+ if isinstance(sandbox, dict):
1103
+ tiny_sandbox: dict[str, object] = {}
1104
+ handle = sandbox.get("handle")
1105
+ if isinstance(handle, str):
1106
+ tiny_sandbox["handle"] = handle
1107
+ rehydration = sandbox.get("rehydration")
1108
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
1109
+ if isinstance(commands, list):
1110
+ for command in commands:
1111
+ if isinstance(command, dict) and isinstance(command.get("cli"), str):
1112
+ tiny_sandbox["rehydration"] = {
1113
+ "commands": [{
1114
+ "type": command.get("type"),
1115
+ "cli": command.get("cli"),
1116
+ }]
1117
+ }
1118
+ break
1119
+ if tiny_sandbox:
1120
+ compact["output_sandbox"] = tiny_sandbox
1121
+ return compact
1122
+
917
1123
  def attach_artifact_receipt(candidate: dict[str, object], artifact_receipt: dict[str, object] | None) -> dict[str, object]:
918
1124
  if artifact_receipt is not None:
919
1125
  candidate["artifact_receipt"] = artifact_receipt
@@ -955,6 +1161,7 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
955
1161
  }
956
1162
  compact_receipt = compact_artifact_receipt(include_exact_reexpand=True)
957
1163
  minimal_receipt = compact_artifact_receipt(include_exact_reexpand=False)
1164
+ tiny_receipt = tiny_artifact_receipt()
958
1165
 
959
1166
  return first_fitting(
960
1167
  [
@@ -1004,6 +1211,15 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
1004
1211
  },
1005
1212
  minimal_receipt,
1006
1213
  ),
1214
+ attach_artifact_receipt(
1215
+ {
1216
+ "digest_capped": True,
1217
+ "status": payload.get("status"),
1218
+ "exit_code": payload.get("exit_code"),
1219
+ "timed_out": payload.get("timed_out"),
1220
+ },
1221
+ tiny_receipt,
1222
+ ),
1007
1223
  {"digest_capped": True},
1008
1224
  ]
1009
1225
  )
@@ -1081,14 +1297,16 @@ def terminate_process_tree(
1081
1297
  class TimedCommandStream:
1082
1298
  def __init__(
1083
1299
  self,
1084
- proc: subprocess.Popen[str],
1085
- stdout: Iterable[str],
1300
+ proc: subprocess.Popen[bytes],
1301
+ stdout: BinaryIO,
1086
1302
  *,
1087
1303
  timeout_seconds: int,
1304
+ max_line_chars: int = MAX_LINE_CHARS_LIMIT,
1088
1305
  process_group_id: int | None = None,
1089
1306
  ) -> None:
1090
1307
  self.proc = proc
1091
1308
  self.timeout_seconds = timeout_seconds
1309
+ self.max_unterminated_line_chars = max(1, max_line_chars)
1092
1310
  self.process_group_id = process_group_id
1093
1311
  self.deadline = time.monotonic() + timeout_seconds
1094
1312
  self.timed_out = False
@@ -1098,10 +1316,62 @@ class TimedCommandStream:
1098
1316
  self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
1099
1317
  self._thread.start()
1100
1318
 
1101
- def _read_stdout(self, stdout: Iterable[str]) -> None:
1319
+ def _truncated_raw_line(self, text: str) -> str:
1320
+ holdback = min(RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS, self.max_unterminated_line_chars)
1321
+ safe_keep = max(0, self.max_unterminated_line_chars - holdback)
1322
+ return (
1323
+ text[:safe_keep]
1324
+ + (
1325
+ "...[context-guard-kit: raw line truncated before newline "
1326
+ f"after {self.max_unterminated_line_chars} chars; "
1327
+ f"withheld {holdback} boundary chars for redaction safety]\n"
1328
+ )
1329
+ )
1330
+
1331
+ def _read_stdout(self, stdout: BinaryIO) -> None:
1332
+ decoder = codecs.getincrementaldecoder("utf-8")("replace")
1333
+ pending = ""
1334
+ discarding_oversized_line = False
1335
+
1336
+ def feed(text: str) -> None:
1337
+ nonlocal pending, discarding_oversized_line
1338
+ if not text:
1339
+ return
1340
+ pending += text
1341
+ while pending:
1342
+ if discarding_oversized_line:
1343
+ newline_index = pending.find("\n")
1344
+ if newline_index == -1:
1345
+ pending = ""
1346
+ return
1347
+ pending = pending[newline_index + 1 :]
1348
+ discarding_oversized_line = False
1349
+ continue
1350
+
1351
+ newline_index = pending.find("\n")
1352
+ if newline_index != -1:
1353
+ if newline_index > self.max_unterminated_line_chars:
1354
+ self._queue.put(self._truncated_raw_line(pending))
1355
+ else:
1356
+ self._queue.put(pending[: newline_index + 1])
1357
+ pending = pending[newline_index + 1 :]
1358
+ continue
1359
+
1360
+ if len(pending) > self.max_unterminated_line_chars:
1361
+ self._queue.put(self._truncated_raw_line(pending))
1362
+ pending = ""
1363
+ discarding_oversized_line = True
1364
+ return
1365
+
1102
1366
  try:
1103
- for line in stdout:
1104
- self._queue.put(line)
1367
+ while True:
1368
+ chunk = stdout.read(COMMAND_READ_CHUNK_BYTES)
1369
+ if not chunk:
1370
+ break
1371
+ feed(decoder.decode(chunk, final=False))
1372
+ feed(decoder.decode(b"", final=True))
1373
+ if pending and not discarding_oversized_line:
1374
+ self._queue.put(pending)
1105
1375
  finally:
1106
1376
  self._stream_closed = True
1107
1377
  self._queue.put(_STREAM_END)
@@ -1242,6 +1512,18 @@ def main() -> int:
1242
1512
  if args.artifact_receipt and args.digest == "off":
1243
1513
  print("trim_command_output.py: --artifact-receipt requires --digest markdown or --digest json", file=sys.stderr)
1244
1514
  return 2
1515
+ if args.artifact_receipt:
1516
+ try:
1517
+ load_artifact_store_module()
1518
+ except UnsafeAdjacentModuleError as exc:
1519
+ print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
1520
+ return 2
1521
+ except Exception:
1522
+ # Missing/broken artifact helpers are reported in the digest payload as
1523
+ # artifact_receipt_unavailable for backward compatibility. Integrity
1524
+ # failures above are different: they indicate an adjacent helper exists
1525
+ # but cannot be safely trusted, so they fail closed.
1526
+ pass
1245
1527
 
1246
1528
  command = args.command
1247
1529
  if command and command[0] == "--":
@@ -1250,6 +1532,12 @@ def main() -> int:
1250
1532
  print("trim_command_output.py: missing command", file=sys.stderr)
1251
1533
  return 2
1252
1534
 
1535
+ try:
1536
+ line_sanitizer = load_line_sanitizer(args.show_paths)
1537
+ except UnsafeAdjacentModuleError as exc:
1538
+ print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
1539
+ return 2
1540
+
1253
1541
  popen_kwargs: dict[str, object] = {}
1254
1542
  if os.name != "nt":
1255
1543
  popen_kwargs["start_new_session"] = True
@@ -1258,9 +1546,8 @@ def main() -> int:
1258
1546
  command,
1259
1547
  stdout=subprocess.PIPE,
1260
1548
  stderr=subprocess.STDOUT,
1261
- text=True,
1262
- bufsize=1,
1263
- errors="replace",
1549
+ text=False,
1550
+ bufsize=0,
1264
1551
  **popen_kwargs,
1265
1552
  )
1266
1553
  except OSError as exc:
@@ -1276,20 +1563,19 @@ def main() -> int:
1276
1563
  visible_chars = 0
1277
1564
  any_line_capped = False
1278
1565
  runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
1279
- line_sanitizer = load_line_sanitizer(args.show_paths)
1280
1566
  duplicate_tracker = DuplicateLineTracker()
1281
1567
  redacted_lines = 0
1282
- artifact_lines: list[str] = []
1283
- artifact_capture_bytes = 0
1284
- artifact_capture_overflow = False
1568
+ artifact_capture = SanitizedArtifactCapture(enabled=args.artifact_receipt, max_bytes=args.artifact_max_bytes)
1285
1569
 
1286
1570
  if proc.stdout is None:
1571
+ artifact_capture.close()
1287
1572
  print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
1288
1573
  return 1
1289
1574
  command_stream = TimedCommandStream(
1290
1575
  proc,
1291
1576
  proc.stdout,
1292
1577
  timeout_seconds=args.timeout_seconds,
1578
+ max_line_chars=COMMAND_MAX_UNTERMINATED_LINE_CHARS,
1293
1579
  process_group_id=process_group_id_for(proc),
1294
1580
  )
1295
1581
  for line in command_stream:
@@ -1298,14 +1584,7 @@ def main() -> int:
1298
1584
  visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1299
1585
  if redacted:
1300
1586
  redacted_lines += 1
1301
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1302
- capture_enabled=args.artifact_receipt,
1303
- sanitized_line=visible_source,
1304
- artifact_lines=artifact_lines,
1305
- capture_bytes=artifact_capture_bytes,
1306
- capture_overflow=artifact_capture_overflow,
1307
- max_bytes=args.artifact_max_bytes,
1308
- )
1587
+ artifact_capture.add(visible_source)
1309
1588
  visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1310
1589
  any_line_capped = any_line_capped or line_capped
1311
1590
  visible_chars += len(visible_line)
@@ -1328,14 +1607,7 @@ def main() -> int:
1328
1607
  visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1329
1608
  if redacted:
1330
1609
  redacted_lines += 1
1331
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1332
- capture_enabled=args.artifact_receipt,
1333
- sanitized_line=visible_source,
1334
- artifact_lines=artifact_lines,
1335
- capture_bytes=artifact_capture_bytes,
1336
- capture_overflow=artifact_capture_overflow,
1337
- max_bytes=args.artifact_max_bytes,
1338
- )
1610
+ artifact_capture.add(visible_source)
1339
1611
  visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1340
1612
  any_line_capped = any_line_capped or line_capped
1341
1613
  visible_chars += len(visible_line)
@@ -1368,33 +1640,64 @@ def main() -> int:
1368
1640
  duplicate_line_groups=duplicate_tracker.as_list(),
1369
1641
  )
1370
1642
  if args.artifact_receipt:
1371
- if artifact_capture_overflow:
1643
+ if artifact_capture.overflow:
1372
1644
  payload["artifact_receipt"] = {
1373
1645
  "stored": False,
1374
1646
  "error": "sanitized_output_exceeds_artifact_max_bytes",
1375
1647
  "max_bytes": args.artifact_max_bytes,
1376
1648
  "exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
1377
1649
  }
1650
+ elif artifact_capture.error:
1651
+ payload["artifact_receipt"] = {
1652
+ "stored": False,
1653
+ "error": "artifact_receipt_capture_unavailable",
1654
+ "reason": artifact_capture.error,
1655
+ "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
1656
+ }
1378
1657
  else:
1379
- try:
1380
- payload["artifact_receipt"] = store_sanitized_artifact_receipt(
1381
- sanitized_text="".join(artifact_lines),
1382
- command=command,
1383
- args=args,
1384
- line_sanitizer=line_sanitizer,
1385
- redacted_lines=redacted_lines,
1386
- )
1387
- except Exception as exc:
1658
+ sanitized_artifact_text = artifact_capture.text()
1659
+ if artifact_capture.error:
1388
1660
  payload["artifact_receipt"] = {
1389
1661
  "stored": False,
1390
- "error": "artifact_receipt_unavailable",
1391
- "reason": f"{exc.__class__.__name__}: {exc}",
1392
- "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
1662
+ "error": "artifact_receipt_capture_unavailable",
1663
+ "reason": artifact_capture.error,
1664
+ "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
1393
1665
  }
1666
+ else:
1667
+ try:
1668
+ payload["artifact_receipt"] = store_sanitized_artifact_receipt(
1669
+ sanitized_text=sanitized_artifact_text,
1670
+ command=command,
1671
+ args=args,
1672
+ line_sanitizer=line_sanitizer,
1673
+ redacted_lines=redacted_lines,
1674
+ )
1675
+ except UnsafeAdjacentModuleError as exc:
1676
+ artifact_capture.close()
1677
+ print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
1678
+ return 2
1679
+ except Exception as exc:
1680
+ payload["artifact_receipt"] = {
1681
+ "stored": False,
1682
+ "error": "artifact_receipt_unavailable",
1683
+ "reason": f"{exc.__class__.__name__}: {exc}",
1684
+ "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
1685
+ }
1686
+ artifact_receipt = payload.get("artifact_receipt")
1687
+ if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
1688
+ next_queries = payload.setdefault("next_queries", [])
1689
+ if isinstance(next_queries, list):
1690
+ guidance = (
1691
+ "Use artifact_receipt.output_sandbox.rehydration commands for exact sanitized slices "
1692
+ "before rerunning the broad command or requesting full raw output."
1693
+ )
1694
+ if guidance not in next_queries:
1695
+ next_queries.insert(0, guidance)
1394
1696
  if args.digest == "json":
1395
1697
  sys.stdout.write(render_digest_json(payload, args.max_chars))
1396
1698
  else:
1397
1699
  sys.stdout.write(render_digest_markdown(payload, args.max_chars))
1700
+ artifact_capture.close()
1398
1701
  return rc
1399
1702
 
1400
1703
  if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
@@ -1442,6 +1745,7 @@ def main() -> int:
1442
1745
  output += "[context-guard-kit] final summary was capped by --max-chars.\n"
1443
1746
  sys.stdout.write(output)
1444
1747
 
1748
+ artifact_capture.close()
1445
1749
  return rc
1446
1750
 
1447
1751