@ictechgy/context-guard 0.4.10 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.ko.md +46 -28
- package/README.md +42 -33
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/experimental-benchmark-fixtures.md +24 -7
- package/package.json +2 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +14 -11
- package/plugins/context-guard/README.md +15 -14
- package/plugins/context-guard/bin/context-guard +48 -17
- package/plugins/context-guard/bin/context-guard-artifact +342 -33
- package/plugins/context-guard/bin/context-guard-audit +36 -5
- package/plugins/context-guard/bin/context-guard-bench +1675 -44
- package/plugins/context-guard/bin/context-guard-cache-score +347 -35
- package/plugins/context-guard/bin/context-guard-compress +89 -27
- package/plugins/context-guard/bin/context-guard-cost +7 -2
- package/plugins/context-guard/bin/context-guard-experiments +364 -8
- package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
- package/plugins/context-guard/bin/context-guard-filter +88 -18
- package/plugins/context-guard/bin/context-guard-pack +329 -19
- package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
- package/plugins/context-guard/bin/context-guard-setup +21 -5
- package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
- package/plugins/context-guard/bin/context-guard-trim-output +394 -90
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
- package/plugins/context-guard/lib/context_guard_commands.py +217 -190
|
@@ -7,10 +7,9 @@ lines into the conversation while preserving the lines most likely to be useful.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import argparse
|
|
10
|
+
import codecs
|
|
10
11
|
import collections
|
|
11
12
|
import hashlib
|
|
12
|
-
import importlib.machinery
|
|
13
|
-
import importlib.util
|
|
14
13
|
import json
|
|
15
14
|
import os
|
|
16
15
|
from pathlib import Path, PurePosixPath
|
|
@@ -18,11 +17,14 @@ import queue
|
|
|
18
17
|
import re
|
|
19
18
|
import shlex
|
|
20
19
|
import signal
|
|
20
|
+
import stat
|
|
21
21
|
import subprocess
|
|
22
22
|
import sys
|
|
23
|
+
import tempfile
|
|
23
24
|
import threading
|
|
24
25
|
import time
|
|
25
|
-
|
|
26
|
+
import types
|
|
27
|
+
from typing import BinaryIO, Iterable, Iterator
|
|
26
28
|
|
|
27
29
|
MAX_SUMMARY_ITEM_CHARS = 500
|
|
28
30
|
MAX_LINES_LIMIT = 5_000
|
|
@@ -35,6 +37,10 @@ MAX_TIMEOUT_SECONDS = 86_400
|
|
|
35
37
|
TIMEOUT_EXIT_CODE = 124
|
|
36
38
|
DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES = 10_000_000
|
|
37
39
|
MAX_ARTIFACT_RECEIPT_MAX_BYTES = 100_000_000
|
|
40
|
+
COMMAND_READ_CHUNK_BYTES = 64 * 1024
|
|
41
|
+
COMMAND_MAX_UNTERMINATED_LINE_CHARS = 4_096
|
|
42
|
+
RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS = 1_024
|
|
43
|
+
MAX_DYNAMIC_SIBLING_MODULE_BYTES = 2_000_000
|
|
38
44
|
|
|
39
45
|
|
|
40
46
|
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
@@ -132,6 +138,10 @@ def anonymize_absolute_paths(text: str) -> str:
|
|
|
132
138
|
return ABSOLUTE_PATH_RE.sub(repl, text)
|
|
133
139
|
|
|
134
140
|
|
|
141
|
+
class UnsafeAdjacentModuleError(RuntimeError):
|
|
142
|
+
"""Adjacent helper exists but cannot be trusted for dynamic loading."""
|
|
143
|
+
|
|
144
|
+
|
|
135
145
|
class FallbackLineSanitizer:
|
|
136
146
|
def __init__(self, *, show_paths: bool = False, diagnostic: str | None = None) -> None:
|
|
137
147
|
self.show_paths = show_paths
|
|
@@ -159,24 +169,82 @@ class FallbackLineSanitizer:
|
|
|
159
169
|
return line, redacted
|
|
160
170
|
|
|
161
171
|
|
|
172
|
+
def no_follow_file_flags() -> int:
|
|
173
|
+
if not hasattr(os, "O_NOFOLLOW"):
|
|
174
|
+
raise UnsafeAdjacentModuleError("O_NOFOLLOW is required for adjacent helper loads")
|
|
175
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
176
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
177
|
+
flags |= os.O_CLOEXEC
|
|
178
|
+
return flags
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def no_follow_dir_flags() -> int:
|
|
182
|
+
flags = no_follow_file_flags()
|
|
183
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
184
|
+
flags |= os.O_DIRECTORY
|
|
185
|
+
return flags
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def read_adjacent_module_source(script_dir: Path, name: str, *, max_bytes: int) -> str | None:
|
|
189
|
+
if name in {"", ".", ".."} or "/" in name or os.sep in name:
|
|
190
|
+
raise RuntimeError(f"invalid adjacent helper name: {name!r}")
|
|
191
|
+
try:
|
|
192
|
+
dir_fd = os.open(str(script_dir), no_follow_dir_flags())
|
|
193
|
+
except OSError as exc:
|
|
194
|
+
raise UnsafeAdjacentModuleError(f"could not inspect helper directory: {exc}") from exc
|
|
195
|
+
try:
|
|
196
|
+
try:
|
|
197
|
+
fd = os.open(name, no_follow_file_flags(), dir_fd=dir_fd)
|
|
198
|
+
except FileNotFoundError:
|
|
199
|
+
return None
|
|
200
|
+
except OSError as exc:
|
|
201
|
+
raise UnsafeAdjacentModuleError(f"{name} could not be opened without following symlinks: {exc}") from exc
|
|
202
|
+
try:
|
|
203
|
+
st = os.fstat(fd)
|
|
204
|
+
if not stat.S_ISREG(st.st_mode):
|
|
205
|
+
raise UnsafeAdjacentModuleError(f"{name} is not a regular helper file")
|
|
206
|
+
if st.st_size > max_bytes:
|
|
207
|
+
raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: {st.st_size} > {max_bytes}")
|
|
208
|
+
data = os.read(fd, max_bytes + 1)
|
|
209
|
+
finally:
|
|
210
|
+
os.close(fd)
|
|
211
|
+
finally:
|
|
212
|
+
os.close(dir_fd)
|
|
213
|
+
if len(data) > max_bytes:
|
|
214
|
+
raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: > {max_bytes}")
|
|
215
|
+
return data.decode("utf-8", errors="replace")
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def load_adjacent_python_module(script_dir: Path, name: str, *, module_prefix: str) -> object | None:
|
|
219
|
+
source = read_adjacent_module_source(script_dir, name, max_bytes=MAX_DYNAMIC_SIBLING_MODULE_BYTES)
|
|
220
|
+
if source is None:
|
|
221
|
+
return None
|
|
222
|
+
module_name = f"{module_prefix}_{os.getpid()}_{hashlib.sha256(name.encode('utf-8')).hexdigest()[:12]}"
|
|
223
|
+
module = types.ModuleType(module_name)
|
|
224
|
+
module.__file__ = str(script_dir / name)
|
|
225
|
+
module.__package__ = ""
|
|
226
|
+
exec(compile(source, str(script_dir / name), "exec"), module.__dict__)
|
|
227
|
+
return module
|
|
228
|
+
|
|
229
|
+
|
|
162
230
|
def load_line_sanitizer(show_paths: bool) -> object:
|
|
163
231
|
"""Reuse the stronger sanitizer when it is shipped next to this wrapper."""
|
|
164
|
-
script_dir =
|
|
232
|
+
script_dir = Path(__file__).resolve().parent
|
|
165
233
|
load_errors: list[str] = []
|
|
166
234
|
for name in ("sanitize_output.py", "context-guard-sanitize-output"):
|
|
167
|
-
candidate = os.path.join(script_dir, name)
|
|
168
|
-
if not os.path.exists(candidate):
|
|
169
|
-
continue
|
|
170
235
|
try:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
236
|
+
module = load_adjacent_python_module(
|
|
237
|
+
script_dir,
|
|
238
|
+
name,
|
|
239
|
+
module_prefix="_claude_token_sanitize",
|
|
240
|
+
)
|
|
241
|
+
if module is None:
|
|
174
242
|
continue
|
|
175
|
-
module = importlib.util.module_from_spec(spec)
|
|
176
|
-
loader.exec_module(module)
|
|
177
243
|
return module.LineSanitizer(show_paths=show_paths)
|
|
244
|
+
except UnsafeAdjacentModuleError:
|
|
245
|
+
raise
|
|
178
246
|
except Exception as exc:
|
|
179
|
-
load_errors.append(f"{
|
|
247
|
+
load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
|
|
180
248
|
continue
|
|
181
249
|
diagnostic = "; ".join(load_errors) if load_errors else "strong sanitizer not found next to trim wrapper"
|
|
182
250
|
return FallbackLineSanitizer(show_paths=show_paths, diagnostic=diagnostic)
|
|
@@ -189,22 +257,22 @@ def load_artifact_store_module() -> object:
|
|
|
189
257
|
wrapper must resolve both source-tree (`context_escrow.py`) and packaged
|
|
190
258
|
(`context-guard-artifact`) names.
|
|
191
259
|
"""
|
|
192
|
-
script_dir =
|
|
260
|
+
script_dir = Path(__file__).resolve().parent
|
|
193
261
|
load_errors: list[str] = []
|
|
194
262
|
for name in ("context_escrow.py", "context-guard-artifact", "claude-token-artifact"):
|
|
195
|
-
candidate = os.path.join(script_dir, name)
|
|
196
|
-
if not os.path.exists(candidate):
|
|
197
|
-
continue
|
|
198
263
|
try:
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
264
|
+
module = load_adjacent_python_module(
|
|
265
|
+
script_dir,
|
|
266
|
+
name,
|
|
267
|
+
module_prefix="_context_guard_artifact",
|
|
268
|
+
)
|
|
269
|
+
if module is None:
|
|
202
270
|
continue
|
|
203
|
-
module = importlib.util.module_from_spec(spec)
|
|
204
|
-
loader.exec_module(module)
|
|
205
271
|
return module
|
|
272
|
+
except UnsafeAdjacentModuleError:
|
|
273
|
+
raise
|
|
206
274
|
except Exception as exc:
|
|
207
|
-
load_errors.append(f"{
|
|
275
|
+
load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
|
|
208
276
|
continue
|
|
209
277
|
diagnostic = "; ".join(load_errors) if load_errors else "artifact store not found next to trim wrapper"
|
|
210
278
|
raise RuntimeError(diagnostic)
|
|
@@ -272,7 +340,13 @@ def store_sanitized_artifact_receipt(
|
|
|
272
340
|
"metadata_file": meta_path.name,
|
|
273
341
|
"scope": "sanitized_full_output",
|
|
274
342
|
},
|
|
275
|
-
"digest": artifact.build_digest(
|
|
343
|
+
"digest": artifact.build_digest(
|
|
344
|
+
sanitized_text,
|
|
345
|
+
artifact_id=artifact_id,
|
|
346
|
+
redacted_lines=redacted_lines,
|
|
347
|
+
raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
|
|
348
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
349
|
+
),
|
|
276
350
|
"retrieval": {
|
|
277
351
|
"strategy": strategy,
|
|
278
352
|
"deterministic": True,
|
|
@@ -282,13 +356,17 @@ def store_sanitized_artifact_receipt(
|
|
|
282
356
|
content_type=content_type,
|
|
283
357
|
strategy=strategy,
|
|
284
358
|
total_lines=total_lines,
|
|
359
|
+
raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
|
|
360
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
285
361
|
),
|
|
286
362
|
},
|
|
287
363
|
}
|
|
288
364
|
artifact.shrink_digest_for_metadata_cap(metadata)
|
|
289
365
|
artifact.write_private_text(content_path, sanitized_text)
|
|
290
366
|
artifact.write_private_text(meta_path, artifact.metadata_json_text(metadata))
|
|
291
|
-
|
|
367
|
+
raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
|
|
368
|
+
show_artifact_paths = bool(getattr(args, "show_paths", False))
|
|
369
|
+
receipt = artifact.receipt_for(metadata, raw_dir=raw_artifact_dir, show_paths=show_artifact_paths)
|
|
292
370
|
query_line_cap = int(getattr(artifact, "MAX_QUERY_LINES", 5_000))
|
|
293
371
|
query_char_cap = 1_000_000
|
|
294
372
|
content_chars = len(sanitized_text)
|
|
@@ -301,21 +379,19 @@ def store_sanitized_artifact_receipt(
|
|
|
301
379
|
"reason": "artifact query cap exceeded; use retrieval hints for exact slices",
|
|
302
380
|
}
|
|
303
381
|
if total_lines <= query_line_cap and content_chars <= query_char_cap:
|
|
304
|
-
raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
|
|
305
|
-
dir_flags = ""
|
|
306
|
-
if raw_artifact_dir != ".context-guard/artifacts":
|
|
307
|
-
dir_flags = f" --dir {shlex.quote(raw_artifact_dir)}"
|
|
308
382
|
line_flags = ""
|
|
309
383
|
if total_lines > 0:
|
|
310
384
|
line_flags = f" --lines 1:{total_lines} --max-lines {max(1, total_lines)}"
|
|
385
|
+
prefix = artifact.artifact_dir_cli_prefix(raw_artifact_dir, show_paths=show_artifact_paths)
|
|
311
386
|
exact_reexpand = {
|
|
312
387
|
"available": True,
|
|
313
388
|
"scope": "sanitized_full_output",
|
|
314
389
|
"sha256": content_sha,
|
|
315
390
|
"bytes": content_bytes,
|
|
316
391
|
"lines": total_lines,
|
|
392
|
+
"exact": artifact.artifact_dir_cli_is_exact(raw_artifact_dir, show_paths=show_artifact_paths),
|
|
317
393
|
"cli": (
|
|
318
|
-
f"
|
|
394
|
+
f"{prefix} get {artifact_id}{line_flags} "
|
|
319
395
|
f"--max-chars {max(1, content_chars)}"
|
|
320
396
|
),
|
|
321
397
|
}
|
|
@@ -323,23 +399,75 @@ def store_sanitized_artifact_receipt(
|
|
|
323
399
|
return receipt
|
|
324
400
|
|
|
325
401
|
|
|
326
|
-
|
|
327
|
-
*,
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
402
|
+
class SanitizedArtifactCapture:
|
|
403
|
+
def __init__(self, *, enabled: bool, max_bytes: int) -> None:
|
|
404
|
+
self.enabled = enabled
|
|
405
|
+
self.max_bytes = max_bytes
|
|
406
|
+
self.bytes = 0
|
|
407
|
+
self.overflow = False
|
|
408
|
+
self.error: str | None = None
|
|
409
|
+
self._file: BinaryIO | None = None
|
|
410
|
+
|
|
411
|
+
def _ensure_file(self) -> BinaryIO | None:
|
|
412
|
+
if self._file is not None:
|
|
413
|
+
return self._file
|
|
414
|
+
try:
|
|
415
|
+
self._file = tempfile.TemporaryFile("w+b")
|
|
416
|
+
except OSError as exc:
|
|
417
|
+
self._record_error(exc)
|
|
418
|
+
return None
|
|
419
|
+
return self._file
|
|
420
|
+
|
|
421
|
+
def _record_error(self, exc: OSError) -> None:
|
|
422
|
+
if self.error is None:
|
|
423
|
+
self.error = f"{exc.__class__.__name__}: {exc}"
|
|
424
|
+
|
|
425
|
+
def add(self, sanitized_line: str) -> None:
|
|
426
|
+
if not self.enabled or self.overflow or self.error:
|
|
427
|
+
return
|
|
428
|
+
encoded = sanitized_line.encode("utf-8", errors="replace")
|
|
429
|
+
source_bytes = len(encoded)
|
|
430
|
+
if self.bytes + source_bytes > self.max_bytes:
|
|
431
|
+
self.overflow = True
|
|
432
|
+
self.close()
|
|
433
|
+
return
|
|
434
|
+
target = self._ensure_file()
|
|
435
|
+
if target is None:
|
|
436
|
+
return
|
|
437
|
+
try:
|
|
438
|
+
target.write(encoded)
|
|
439
|
+
except OSError as exc:
|
|
440
|
+
self._record_error(exc)
|
|
441
|
+
self.close()
|
|
442
|
+
return
|
|
443
|
+
self.bytes += source_bytes
|
|
444
|
+
|
|
445
|
+
def text(self) -> str:
|
|
446
|
+
if self._file is None:
|
|
447
|
+
return ""
|
|
448
|
+
try:
|
|
449
|
+
self._file.flush()
|
|
450
|
+
self._file.seek(0)
|
|
451
|
+
return self._file.read().decode("utf-8", errors="replace")
|
|
452
|
+
except OSError as exc:
|
|
453
|
+
self._record_error(exc)
|
|
454
|
+
self.close()
|
|
455
|
+
return ""
|
|
456
|
+
|
|
457
|
+
def close(self) -> None:
|
|
458
|
+
target = self._file
|
|
459
|
+
self._file = None
|
|
460
|
+
if target is not None:
|
|
461
|
+
try:
|
|
462
|
+
target.close()
|
|
463
|
+
except OSError as exc:
|
|
464
|
+
self._record_error(exc)
|
|
465
|
+
|
|
466
|
+
def __enter__(self) -> "SanitizedArtifactCapture":
|
|
467
|
+
return self
|
|
468
|
+
|
|
469
|
+
def __exit__(self, *exc: object) -> None:
|
|
470
|
+
self.close()
|
|
343
471
|
|
|
344
472
|
|
|
345
473
|
def unique_keep_order(lines: Iterable[str]) -> list[str]:
|
|
@@ -720,11 +848,33 @@ def build_digest_payload(
|
|
|
720
848
|
|
|
721
849
|
|
|
722
850
|
def markdown_artifact_receipt_lines(artifact_receipt: dict[str, object]) -> list[str]:
|
|
851
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
852
|
+
handle = None
|
|
853
|
+
rehydrate = None
|
|
854
|
+
if isinstance(sandbox, dict):
|
|
855
|
+
raw_handle = sandbox.get("handle")
|
|
856
|
+
if isinstance(raw_handle, str):
|
|
857
|
+
handle = raw_handle
|
|
858
|
+
rehydration = sandbox.get("rehydration")
|
|
859
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
860
|
+
if isinstance(commands, list):
|
|
861
|
+
for command in commands:
|
|
862
|
+
if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
|
|
863
|
+
rehydrate = command["cli"]
|
|
864
|
+
break
|
|
865
|
+
if rehydrate is None:
|
|
866
|
+
for command in commands:
|
|
867
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str):
|
|
868
|
+
rehydrate = command["cli"]
|
|
869
|
+
break
|
|
723
870
|
lines = [
|
|
724
871
|
"- artifact_receipt: "
|
|
725
872
|
f"stored={str(artifact_receipt.get('stored')).lower()} "
|
|
726
|
-
f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}
|
|
873
|
+
f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}"
|
|
874
|
+
f"{(' handle=' + handle) if handle else ''}\n"
|
|
727
875
|
]
|
|
876
|
+
if rehydrate:
|
|
877
|
+
lines.append(f"- rehydrate: `{rehydrate}`\n")
|
|
728
878
|
exact = artifact_receipt.get("exact_reexpand")
|
|
729
879
|
if isinstance(exact, dict) and exact.get("cli"):
|
|
730
880
|
lines.append(f"- exact_reexpand: `{exact.get('cli')}`\n")
|
|
@@ -742,12 +892,16 @@ def compact_markdown_artifact_receipt(payload: dict[str, object], max_chars: int
|
|
|
742
892
|
|
|
743
893
|
artifact_id = artifact_receipt.get("artifact_id") or artifact_receipt.get("error")
|
|
744
894
|
stored = str(artifact_receipt.get("stored")).lower()
|
|
895
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
896
|
+
handle = sandbox.get("handle") if isinstance(sandbox, dict) and isinstance(sandbox.get("handle"), str) else None
|
|
745
897
|
exact = artifact_receipt.get("exact_reexpand")
|
|
746
898
|
exact_available = ""
|
|
747
899
|
if isinstance(exact, dict) and "available" in exact:
|
|
748
900
|
exact_available = f" exact_available={str(exact.get('available')).lower()}"
|
|
749
901
|
|
|
750
902
|
candidates = [
|
|
903
|
+
f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}; use output_sandbox.rehydration for exact slices\n",
|
|
904
|
+
f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}\n",
|
|
751
905
|
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}; raise --max-chars for full exact_reexpand\n",
|
|
752
906
|
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}\n",
|
|
753
907
|
f"- artifact_receipt: id={artifact_id}\n",
|
|
@@ -905,15 +1059,67 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
905
1059
|
for key in ("scope", "bytes", "lines", "sha256")
|
|
906
1060
|
if key in stored_output
|
|
907
1061
|
}
|
|
1062
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
1063
|
+
if isinstance(sandbox, dict):
|
|
1064
|
+
compact_sandbox: dict[str, object] = {
|
|
1065
|
+
key: sandbox[key]
|
|
1066
|
+
for key in ("schema_version", "mode", "handle", "artifact_id")
|
|
1067
|
+
if key in sandbox
|
|
1068
|
+
}
|
|
1069
|
+
rehydration = sandbox.get("rehydration")
|
|
1070
|
+
if isinstance(rehydration, dict):
|
|
1071
|
+
commands = rehydration.get("commands")
|
|
1072
|
+
if isinstance(commands, list):
|
|
1073
|
+
kept_commands = [
|
|
1074
|
+
command
|
|
1075
|
+
for command in commands
|
|
1076
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str)
|
|
1077
|
+
][:2]
|
|
1078
|
+
compact_sandbox["rehydration"] = {
|
|
1079
|
+
"commands": kept_commands,
|
|
1080
|
+
"exact_commands": rehydration.get("exact_commands"),
|
|
1081
|
+
"dir_argument": rehydration.get("dir_argument"),
|
|
1082
|
+
}
|
|
1083
|
+
compact["output_sandbox"] = compact_sandbox
|
|
908
1084
|
exact = artifact_receipt.get("exact_reexpand")
|
|
909
1085
|
if include_exact_reexpand and isinstance(exact, dict):
|
|
910
1086
|
compact["exact_reexpand"] = {
|
|
911
1087
|
key: exact[key]
|
|
912
|
-
for key in ("available", "scope", "sha256", "bytes", "lines", "cli", "reason")
|
|
1088
|
+
for key in ("available", "scope", "sha256", "bytes", "lines", "exact", "cli", "reason")
|
|
913
1089
|
if key in exact
|
|
914
1090
|
}
|
|
915
1091
|
return compact
|
|
916
1092
|
|
|
1093
|
+
def tiny_artifact_receipt() -> dict[str, object] | None:
|
|
1094
|
+
artifact_receipt = payload.get("artifact_receipt")
|
|
1095
|
+
if not isinstance(artifact_receipt, dict):
|
|
1096
|
+
return None
|
|
1097
|
+
compact: dict[str, object] = {}
|
|
1098
|
+
for key in ("stored", "artifact_id", "error"):
|
|
1099
|
+
if key in artifact_receipt:
|
|
1100
|
+
compact[key] = artifact_receipt[key]
|
|
1101
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
1102
|
+
if isinstance(sandbox, dict):
|
|
1103
|
+
tiny_sandbox: dict[str, object] = {}
|
|
1104
|
+
handle = sandbox.get("handle")
|
|
1105
|
+
if isinstance(handle, str):
|
|
1106
|
+
tiny_sandbox["handle"] = handle
|
|
1107
|
+
rehydration = sandbox.get("rehydration")
|
|
1108
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
1109
|
+
if isinstance(commands, list):
|
|
1110
|
+
for command in commands:
|
|
1111
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str):
|
|
1112
|
+
tiny_sandbox["rehydration"] = {
|
|
1113
|
+
"commands": [{
|
|
1114
|
+
"type": command.get("type"),
|
|
1115
|
+
"cli": command.get("cli"),
|
|
1116
|
+
}]
|
|
1117
|
+
}
|
|
1118
|
+
break
|
|
1119
|
+
if tiny_sandbox:
|
|
1120
|
+
compact["output_sandbox"] = tiny_sandbox
|
|
1121
|
+
return compact
|
|
1122
|
+
|
|
917
1123
|
def attach_artifact_receipt(candidate: dict[str, object], artifact_receipt: dict[str, object] | None) -> dict[str, object]:
|
|
918
1124
|
if artifact_receipt is not None:
|
|
919
1125
|
candidate["artifact_receipt"] = artifact_receipt
|
|
@@ -955,6 +1161,7 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
955
1161
|
}
|
|
956
1162
|
compact_receipt = compact_artifact_receipt(include_exact_reexpand=True)
|
|
957
1163
|
minimal_receipt = compact_artifact_receipt(include_exact_reexpand=False)
|
|
1164
|
+
tiny_receipt = tiny_artifact_receipt()
|
|
958
1165
|
|
|
959
1166
|
return first_fitting(
|
|
960
1167
|
[
|
|
@@ -1004,6 +1211,15 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
1004
1211
|
},
|
|
1005
1212
|
minimal_receipt,
|
|
1006
1213
|
),
|
|
1214
|
+
attach_artifact_receipt(
|
|
1215
|
+
{
|
|
1216
|
+
"digest_capped": True,
|
|
1217
|
+
"status": payload.get("status"),
|
|
1218
|
+
"exit_code": payload.get("exit_code"),
|
|
1219
|
+
"timed_out": payload.get("timed_out"),
|
|
1220
|
+
},
|
|
1221
|
+
tiny_receipt,
|
|
1222
|
+
),
|
|
1007
1223
|
{"digest_capped": True},
|
|
1008
1224
|
]
|
|
1009
1225
|
)
|
|
@@ -1081,14 +1297,16 @@ def terminate_process_tree(
|
|
|
1081
1297
|
class TimedCommandStream:
|
|
1082
1298
|
def __init__(
|
|
1083
1299
|
self,
|
|
1084
|
-
proc: subprocess.Popen[
|
|
1085
|
-
stdout:
|
|
1300
|
+
proc: subprocess.Popen[bytes],
|
|
1301
|
+
stdout: BinaryIO,
|
|
1086
1302
|
*,
|
|
1087
1303
|
timeout_seconds: int,
|
|
1304
|
+
max_line_chars: int = MAX_LINE_CHARS_LIMIT,
|
|
1088
1305
|
process_group_id: int | None = None,
|
|
1089
1306
|
) -> None:
|
|
1090
1307
|
self.proc = proc
|
|
1091
1308
|
self.timeout_seconds = timeout_seconds
|
|
1309
|
+
self.max_unterminated_line_chars = max(1, max_line_chars)
|
|
1092
1310
|
self.process_group_id = process_group_id
|
|
1093
1311
|
self.deadline = time.monotonic() + timeout_seconds
|
|
1094
1312
|
self.timed_out = False
|
|
@@ -1098,10 +1316,62 @@ class TimedCommandStream:
|
|
|
1098
1316
|
self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
|
|
1099
1317
|
self._thread.start()
|
|
1100
1318
|
|
|
1101
|
-
def
|
|
1319
|
+
def _truncated_raw_line(self, text: str) -> str:
|
|
1320
|
+
holdback = min(RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS, self.max_unterminated_line_chars)
|
|
1321
|
+
safe_keep = max(0, self.max_unterminated_line_chars - holdback)
|
|
1322
|
+
return (
|
|
1323
|
+
text[:safe_keep]
|
|
1324
|
+
+ (
|
|
1325
|
+
"...[context-guard-kit: raw line truncated before newline "
|
|
1326
|
+
f"after {self.max_unterminated_line_chars} chars; "
|
|
1327
|
+
f"withheld {holdback} boundary chars for redaction safety]\n"
|
|
1328
|
+
)
|
|
1329
|
+
)
|
|
1330
|
+
|
|
1331
|
+
def _read_stdout(self, stdout: BinaryIO) -> None:
|
|
1332
|
+
decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
1333
|
+
pending = ""
|
|
1334
|
+
discarding_oversized_line = False
|
|
1335
|
+
|
|
1336
|
+
def feed(text: str) -> None:
|
|
1337
|
+
nonlocal pending, discarding_oversized_line
|
|
1338
|
+
if not text:
|
|
1339
|
+
return
|
|
1340
|
+
pending += text
|
|
1341
|
+
while pending:
|
|
1342
|
+
if discarding_oversized_line:
|
|
1343
|
+
newline_index = pending.find("\n")
|
|
1344
|
+
if newline_index == -1:
|
|
1345
|
+
pending = ""
|
|
1346
|
+
return
|
|
1347
|
+
pending = pending[newline_index + 1 :]
|
|
1348
|
+
discarding_oversized_line = False
|
|
1349
|
+
continue
|
|
1350
|
+
|
|
1351
|
+
newline_index = pending.find("\n")
|
|
1352
|
+
if newline_index != -1:
|
|
1353
|
+
if newline_index > self.max_unterminated_line_chars:
|
|
1354
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
1355
|
+
else:
|
|
1356
|
+
self._queue.put(pending[: newline_index + 1])
|
|
1357
|
+
pending = pending[newline_index + 1 :]
|
|
1358
|
+
continue
|
|
1359
|
+
|
|
1360
|
+
if len(pending) > self.max_unterminated_line_chars:
|
|
1361
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
1362
|
+
pending = ""
|
|
1363
|
+
discarding_oversized_line = True
|
|
1364
|
+
return
|
|
1365
|
+
|
|
1102
1366
|
try:
|
|
1103
|
-
|
|
1104
|
-
|
|
1367
|
+
while True:
|
|
1368
|
+
chunk = stdout.read(COMMAND_READ_CHUNK_BYTES)
|
|
1369
|
+
if not chunk:
|
|
1370
|
+
break
|
|
1371
|
+
feed(decoder.decode(chunk, final=False))
|
|
1372
|
+
feed(decoder.decode(b"", final=True))
|
|
1373
|
+
if pending and not discarding_oversized_line:
|
|
1374
|
+
self._queue.put(pending)
|
|
1105
1375
|
finally:
|
|
1106
1376
|
self._stream_closed = True
|
|
1107
1377
|
self._queue.put(_STREAM_END)
|
|
@@ -1242,6 +1512,18 @@ def main() -> int:
|
|
|
1242
1512
|
if args.artifact_receipt and args.digest == "off":
|
|
1243
1513
|
print("trim_command_output.py: --artifact-receipt requires --digest markdown or --digest json", file=sys.stderr)
|
|
1244
1514
|
return 2
|
|
1515
|
+
if args.artifact_receipt:
|
|
1516
|
+
try:
|
|
1517
|
+
load_artifact_store_module()
|
|
1518
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1519
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1520
|
+
return 2
|
|
1521
|
+
except Exception:
|
|
1522
|
+
# Missing/broken artifact helpers are reported in the digest payload as
|
|
1523
|
+
# artifact_receipt_unavailable for backward compatibility. Integrity
|
|
1524
|
+
# failures above are different: they indicate an adjacent helper exists
|
|
1525
|
+
# but cannot be safely trusted, so they fail closed.
|
|
1526
|
+
pass
|
|
1245
1527
|
|
|
1246
1528
|
command = args.command
|
|
1247
1529
|
if command and command[0] == "--":
|
|
@@ -1250,6 +1532,12 @@ def main() -> int:
|
|
|
1250
1532
|
print("trim_command_output.py: missing command", file=sys.stderr)
|
|
1251
1533
|
return 2
|
|
1252
1534
|
|
|
1535
|
+
try:
|
|
1536
|
+
line_sanitizer = load_line_sanitizer(args.show_paths)
|
|
1537
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1538
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1539
|
+
return 2
|
|
1540
|
+
|
|
1253
1541
|
popen_kwargs: dict[str, object] = {}
|
|
1254
1542
|
if os.name != "nt":
|
|
1255
1543
|
popen_kwargs["start_new_session"] = True
|
|
@@ -1258,9 +1546,8 @@ def main() -> int:
|
|
|
1258
1546
|
command,
|
|
1259
1547
|
stdout=subprocess.PIPE,
|
|
1260
1548
|
stderr=subprocess.STDOUT,
|
|
1261
|
-
text=
|
|
1262
|
-
bufsize=
|
|
1263
|
-
errors="replace",
|
|
1549
|
+
text=False,
|
|
1550
|
+
bufsize=0,
|
|
1264
1551
|
**popen_kwargs,
|
|
1265
1552
|
)
|
|
1266
1553
|
except OSError as exc:
|
|
@@ -1276,20 +1563,19 @@ def main() -> int:
|
|
|
1276
1563
|
visible_chars = 0
|
|
1277
1564
|
any_line_capped = False
|
|
1278
1565
|
runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
|
|
1279
|
-
line_sanitizer = load_line_sanitizer(args.show_paths)
|
|
1280
1566
|
duplicate_tracker = DuplicateLineTracker()
|
|
1281
1567
|
redacted_lines = 0
|
|
1282
|
-
|
|
1283
|
-
artifact_capture_bytes = 0
|
|
1284
|
-
artifact_capture_overflow = False
|
|
1568
|
+
artifact_capture = SanitizedArtifactCapture(enabled=args.artifact_receipt, max_bytes=args.artifact_max_bytes)
|
|
1285
1569
|
|
|
1286
1570
|
if proc.stdout is None:
|
|
1571
|
+
artifact_capture.close()
|
|
1287
1572
|
print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
|
|
1288
1573
|
return 1
|
|
1289
1574
|
command_stream = TimedCommandStream(
|
|
1290
1575
|
proc,
|
|
1291
1576
|
proc.stdout,
|
|
1292
1577
|
timeout_seconds=args.timeout_seconds,
|
|
1578
|
+
max_line_chars=COMMAND_MAX_UNTERMINATED_LINE_CHARS,
|
|
1293
1579
|
process_group_id=process_group_id_for(proc),
|
|
1294
1580
|
)
|
|
1295
1581
|
for line in command_stream:
|
|
@@ -1298,14 +1584,7 @@ def main() -> int:
|
|
|
1298
1584
|
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1299
1585
|
if redacted:
|
|
1300
1586
|
redacted_lines += 1
|
|
1301
|
-
|
|
1302
|
-
capture_enabled=args.artifact_receipt,
|
|
1303
|
-
sanitized_line=visible_source,
|
|
1304
|
-
artifact_lines=artifact_lines,
|
|
1305
|
-
capture_bytes=artifact_capture_bytes,
|
|
1306
|
-
capture_overflow=artifact_capture_overflow,
|
|
1307
|
-
max_bytes=args.artifact_max_bytes,
|
|
1308
|
-
)
|
|
1587
|
+
artifact_capture.add(visible_source)
|
|
1309
1588
|
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1310
1589
|
any_line_capped = any_line_capped or line_capped
|
|
1311
1590
|
visible_chars += len(visible_line)
|
|
@@ -1328,14 +1607,7 @@ def main() -> int:
|
|
|
1328
1607
|
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1329
1608
|
if redacted:
|
|
1330
1609
|
redacted_lines += 1
|
|
1331
|
-
|
|
1332
|
-
capture_enabled=args.artifact_receipt,
|
|
1333
|
-
sanitized_line=visible_source,
|
|
1334
|
-
artifact_lines=artifact_lines,
|
|
1335
|
-
capture_bytes=artifact_capture_bytes,
|
|
1336
|
-
capture_overflow=artifact_capture_overflow,
|
|
1337
|
-
max_bytes=args.artifact_max_bytes,
|
|
1338
|
-
)
|
|
1610
|
+
artifact_capture.add(visible_source)
|
|
1339
1611
|
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1340
1612
|
any_line_capped = any_line_capped or line_capped
|
|
1341
1613
|
visible_chars += len(visible_line)
|
|
@@ -1368,33 +1640,64 @@ def main() -> int:
|
|
|
1368
1640
|
duplicate_line_groups=duplicate_tracker.as_list(),
|
|
1369
1641
|
)
|
|
1370
1642
|
if args.artifact_receipt:
|
|
1371
|
-
if
|
|
1643
|
+
if artifact_capture.overflow:
|
|
1372
1644
|
payload["artifact_receipt"] = {
|
|
1373
1645
|
"stored": False,
|
|
1374
1646
|
"error": "sanitized_output_exceeds_artifact_max_bytes",
|
|
1375
1647
|
"max_bytes": args.artifact_max_bytes,
|
|
1376
1648
|
"exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
|
|
1377
1649
|
}
|
|
1650
|
+
elif artifact_capture.error:
|
|
1651
|
+
payload["artifact_receipt"] = {
|
|
1652
|
+
"stored": False,
|
|
1653
|
+
"error": "artifact_receipt_capture_unavailable",
|
|
1654
|
+
"reason": artifact_capture.error,
|
|
1655
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
|
|
1656
|
+
}
|
|
1378
1657
|
else:
|
|
1379
|
-
|
|
1380
|
-
|
|
1381
|
-
sanitized_text="".join(artifact_lines),
|
|
1382
|
-
command=command,
|
|
1383
|
-
args=args,
|
|
1384
|
-
line_sanitizer=line_sanitizer,
|
|
1385
|
-
redacted_lines=redacted_lines,
|
|
1386
|
-
)
|
|
1387
|
-
except Exception as exc:
|
|
1658
|
+
sanitized_artifact_text = artifact_capture.text()
|
|
1659
|
+
if artifact_capture.error:
|
|
1388
1660
|
payload["artifact_receipt"] = {
|
|
1389
1661
|
"stored": False,
|
|
1390
|
-
"error": "
|
|
1391
|
-
"reason":
|
|
1392
|
-
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1662
|
+
"error": "artifact_receipt_capture_unavailable",
|
|
1663
|
+
"reason": artifact_capture.error,
|
|
1664
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
|
|
1393
1665
|
}
|
|
1666
|
+
else:
|
|
1667
|
+
try:
|
|
1668
|
+
payload["artifact_receipt"] = store_sanitized_artifact_receipt(
|
|
1669
|
+
sanitized_text=sanitized_artifact_text,
|
|
1670
|
+
command=command,
|
|
1671
|
+
args=args,
|
|
1672
|
+
line_sanitizer=line_sanitizer,
|
|
1673
|
+
redacted_lines=redacted_lines,
|
|
1674
|
+
)
|
|
1675
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1676
|
+
artifact_capture.close()
|
|
1677
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1678
|
+
return 2
|
|
1679
|
+
except Exception as exc:
|
|
1680
|
+
payload["artifact_receipt"] = {
|
|
1681
|
+
"stored": False,
|
|
1682
|
+
"error": "artifact_receipt_unavailable",
|
|
1683
|
+
"reason": f"{exc.__class__.__name__}: {exc}",
|
|
1684
|
+
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1685
|
+
}
|
|
1686
|
+
artifact_receipt = payload.get("artifact_receipt")
|
|
1687
|
+
if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
|
|
1688
|
+
next_queries = payload.setdefault("next_queries", [])
|
|
1689
|
+
if isinstance(next_queries, list):
|
|
1690
|
+
guidance = (
|
|
1691
|
+
"Use artifact_receipt.output_sandbox.rehydration commands for exact sanitized slices "
|
|
1692
|
+
"before rerunning the broad command or requesting full raw output."
|
|
1693
|
+
)
|
|
1694
|
+
if guidance not in next_queries:
|
|
1695
|
+
next_queries.insert(0, guidance)
|
|
1394
1696
|
if args.digest == "json":
|
|
1395
1697
|
sys.stdout.write(render_digest_json(payload, args.max_chars))
|
|
1396
1698
|
else:
|
|
1397
1699
|
sys.stdout.write(render_digest_markdown(payload, args.max_chars))
|
|
1700
|
+
artifact_capture.close()
|
|
1398
1701
|
return rc
|
|
1399
1702
|
|
|
1400
1703
|
if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
|
|
@@ -1442,6 +1745,7 @@ def main() -> int:
|
|
|
1442
1745
|
output += "[context-guard-kit] final summary was capped by --max-chars.\n"
|
|
1443
1746
|
sys.stdout.write(output)
|
|
1444
1747
|
|
|
1748
|
+
artifact_capture.close()
|
|
1445
1749
|
return rc
|
|
1446
1750
|
|
|
1447
1751
|
|