@ictechgy/context-guard 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -1
- package/README.ko.md +32 -21
- package/README.md +38 -29
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/experimental-benchmark-fixtures.md +24 -7
- package/package.json +2 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +14 -11
- package/plugins/context-guard/README.md +15 -14
- package/plugins/context-guard/bin/context-guard +46 -11
- package/plugins/context-guard/bin/context-guard-artifact +342 -33
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +318 -33
- package/plugins/context-guard/bin/context-guard-cost +7 -2
- package/plugins/context-guard/bin/context-guard-experiments +364 -8
- package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
- package/plugins/context-guard/bin/context-guard-pack +301 -17
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -54
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +214 -190
|
@@ -7,10 +7,9 @@ lines into the conversation while preserving the lines most likely to be useful.
|
|
|
7
7
|
from __future__ import annotations
|
|
8
8
|
|
|
9
9
|
import argparse
|
|
10
|
+
import codecs
|
|
10
11
|
import collections
|
|
11
12
|
import hashlib
|
|
12
|
-
import importlib.machinery
|
|
13
|
-
import importlib.util
|
|
14
13
|
import json
|
|
15
14
|
import os
|
|
16
15
|
from pathlib import Path, PurePosixPath
|
|
@@ -18,11 +17,13 @@ import queue
|
|
|
18
17
|
import re
|
|
19
18
|
import shlex
|
|
20
19
|
import signal
|
|
20
|
+
import stat
|
|
21
21
|
import subprocess
|
|
22
22
|
import sys
|
|
23
23
|
import threading
|
|
24
24
|
import time
|
|
25
|
-
|
|
25
|
+
import types
|
|
26
|
+
from typing import BinaryIO, Iterable, Iterator
|
|
26
27
|
|
|
27
28
|
MAX_SUMMARY_ITEM_CHARS = 500
|
|
28
29
|
MAX_LINES_LIMIT = 5_000
|
|
@@ -35,6 +36,10 @@ MAX_TIMEOUT_SECONDS = 86_400
|
|
|
35
36
|
TIMEOUT_EXIT_CODE = 124
|
|
36
37
|
DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES = 10_000_000
|
|
37
38
|
MAX_ARTIFACT_RECEIPT_MAX_BYTES = 100_000_000
|
|
39
|
+
COMMAND_READ_CHUNK_BYTES = 64 * 1024
|
|
40
|
+
COMMAND_MAX_UNTERMINATED_LINE_CHARS = 4_096
|
|
41
|
+
RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS = 1_024
|
|
42
|
+
MAX_DYNAMIC_SIBLING_MODULE_BYTES = 2_000_000
|
|
38
43
|
|
|
39
44
|
|
|
40
45
|
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
@@ -132,6 +137,10 @@ def anonymize_absolute_paths(text: str) -> str:
|
|
|
132
137
|
return ABSOLUTE_PATH_RE.sub(repl, text)
|
|
133
138
|
|
|
134
139
|
|
|
140
|
+
class UnsafeAdjacentModuleError(RuntimeError):
|
|
141
|
+
"""Adjacent helper exists but cannot be trusted for dynamic loading."""
|
|
142
|
+
|
|
143
|
+
|
|
135
144
|
class FallbackLineSanitizer:
|
|
136
145
|
def __init__(self, *, show_paths: bool = False, diagnostic: str | None = None) -> None:
|
|
137
146
|
self.show_paths = show_paths
|
|
@@ -159,24 +168,82 @@ class FallbackLineSanitizer:
|
|
|
159
168
|
return line, redacted
|
|
160
169
|
|
|
161
170
|
|
|
171
|
+
def no_follow_file_flags() -> int:
|
|
172
|
+
if not hasattr(os, "O_NOFOLLOW"):
|
|
173
|
+
raise UnsafeAdjacentModuleError("O_NOFOLLOW is required for adjacent helper loads")
|
|
174
|
+
flags = os.O_RDONLY | os.O_NOFOLLOW
|
|
175
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
176
|
+
flags |= os.O_CLOEXEC
|
|
177
|
+
return flags
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
def no_follow_dir_flags() -> int:
|
|
181
|
+
flags = no_follow_file_flags()
|
|
182
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
183
|
+
flags |= os.O_DIRECTORY
|
|
184
|
+
return flags
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def read_adjacent_module_source(script_dir: Path, name: str, *, max_bytes: int) -> str | None:
|
|
188
|
+
if name in {"", ".", ".."} or "/" in name or os.sep in name:
|
|
189
|
+
raise RuntimeError(f"invalid adjacent helper name: {name!r}")
|
|
190
|
+
try:
|
|
191
|
+
dir_fd = os.open(str(script_dir), no_follow_dir_flags())
|
|
192
|
+
except OSError as exc:
|
|
193
|
+
raise UnsafeAdjacentModuleError(f"could not inspect helper directory: {exc}") from exc
|
|
194
|
+
try:
|
|
195
|
+
try:
|
|
196
|
+
fd = os.open(name, no_follow_file_flags(), dir_fd=dir_fd)
|
|
197
|
+
except FileNotFoundError:
|
|
198
|
+
return None
|
|
199
|
+
except OSError as exc:
|
|
200
|
+
raise UnsafeAdjacentModuleError(f"{name} could not be opened without following symlinks: {exc}") from exc
|
|
201
|
+
try:
|
|
202
|
+
st = os.fstat(fd)
|
|
203
|
+
if not stat.S_ISREG(st.st_mode):
|
|
204
|
+
raise UnsafeAdjacentModuleError(f"{name} is not a regular helper file")
|
|
205
|
+
if st.st_size > max_bytes:
|
|
206
|
+
raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: {st.st_size} > {max_bytes}")
|
|
207
|
+
data = os.read(fd, max_bytes + 1)
|
|
208
|
+
finally:
|
|
209
|
+
os.close(fd)
|
|
210
|
+
finally:
|
|
211
|
+
os.close(dir_fd)
|
|
212
|
+
if len(data) > max_bytes:
|
|
213
|
+
raise UnsafeAdjacentModuleError(f"{name} exceeds helper size cap: > {max_bytes}")
|
|
214
|
+
return data.decode("utf-8", errors="replace")
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def load_adjacent_python_module(script_dir: Path, name: str, *, module_prefix: str) -> object | None:
|
|
218
|
+
source = read_adjacent_module_source(script_dir, name, max_bytes=MAX_DYNAMIC_SIBLING_MODULE_BYTES)
|
|
219
|
+
if source is None:
|
|
220
|
+
return None
|
|
221
|
+
module_name = f"{module_prefix}_{os.getpid()}_{hashlib.sha256(name.encode('utf-8')).hexdigest()[:12]}"
|
|
222
|
+
module = types.ModuleType(module_name)
|
|
223
|
+
module.__file__ = str(script_dir / name)
|
|
224
|
+
module.__package__ = ""
|
|
225
|
+
exec(compile(source, str(script_dir / name), "exec"), module.__dict__)
|
|
226
|
+
return module
|
|
227
|
+
|
|
228
|
+
|
|
162
229
|
def load_line_sanitizer(show_paths: bool) -> object:
|
|
163
230
|
"""Reuse the stronger sanitizer when it is shipped next to this wrapper."""
|
|
164
|
-
script_dir =
|
|
231
|
+
script_dir = Path(__file__).resolve().parent
|
|
165
232
|
load_errors: list[str] = []
|
|
166
233
|
for name in ("sanitize_output.py", "context-guard-sanitize-output"):
|
|
167
|
-
candidate = os.path.join(script_dir, name)
|
|
168
|
-
if not os.path.exists(candidate):
|
|
169
|
-
continue
|
|
170
234
|
try:
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
235
|
+
module = load_adjacent_python_module(
|
|
236
|
+
script_dir,
|
|
237
|
+
name,
|
|
238
|
+
module_prefix="_claude_token_sanitize",
|
|
239
|
+
)
|
|
240
|
+
if module is None:
|
|
174
241
|
continue
|
|
175
|
-
module = importlib.util.module_from_spec(spec)
|
|
176
|
-
loader.exec_module(module)
|
|
177
242
|
return module.LineSanitizer(show_paths=show_paths)
|
|
243
|
+
except UnsafeAdjacentModuleError:
|
|
244
|
+
raise
|
|
178
245
|
except Exception as exc:
|
|
179
|
-
load_errors.append(f"{
|
|
246
|
+
load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
|
|
180
247
|
continue
|
|
181
248
|
diagnostic = "; ".join(load_errors) if load_errors else "strong sanitizer not found next to trim wrapper"
|
|
182
249
|
return FallbackLineSanitizer(show_paths=show_paths, diagnostic=diagnostic)
|
|
@@ -189,22 +256,22 @@ def load_artifact_store_module() -> object:
|
|
|
189
256
|
wrapper must resolve both source-tree (`context_escrow.py`) and packaged
|
|
190
257
|
(`context-guard-artifact`) names.
|
|
191
258
|
"""
|
|
192
|
-
script_dir =
|
|
259
|
+
script_dir = Path(__file__).resolve().parent
|
|
193
260
|
load_errors: list[str] = []
|
|
194
261
|
for name in ("context_escrow.py", "context-guard-artifact", "claude-token-artifact"):
|
|
195
|
-
candidate = os.path.join(script_dir, name)
|
|
196
|
-
if not os.path.exists(candidate):
|
|
197
|
-
continue
|
|
198
262
|
try:
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
263
|
+
module = load_adjacent_python_module(
|
|
264
|
+
script_dir,
|
|
265
|
+
name,
|
|
266
|
+
module_prefix="_context_guard_artifact",
|
|
267
|
+
)
|
|
268
|
+
if module is None:
|
|
202
269
|
continue
|
|
203
|
-
module = importlib.util.module_from_spec(spec)
|
|
204
|
-
loader.exec_module(module)
|
|
205
270
|
return module
|
|
271
|
+
except UnsafeAdjacentModuleError:
|
|
272
|
+
raise
|
|
206
273
|
except Exception as exc:
|
|
207
|
-
load_errors.append(f"{
|
|
274
|
+
load_errors.append(f"{name} failed to load: {exc.__class__.__name__}: {exc}")
|
|
208
275
|
continue
|
|
209
276
|
diagnostic = "; ".join(load_errors) if load_errors else "artifact store not found next to trim wrapper"
|
|
210
277
|
raise RuntimeError(diagnostic)
|
|
@@ -272,7 +339,13 @@ def store_sanitized_artifact_receipt(
|
|
|
272
339
|
"metadata_file": meta_path.name,
|
|
273
340
|
"scope": "sanitized_full_output",
|
|
274
341
|
},
|
|
275
|
-
"digest": artifact.build_digest(
|
|
342
|
+
"digest": artifact.build_digest(
|
|
343
|
+
sanitized_text,
|
|
344
|
+
artifact_id=artifact_id,
|
|
345
|
+
redacted_lines=redacted_lines,
|
|
346
|
+
raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
|
|
347
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
348
|
+
),
|
|
276
349
|
"retrieval": {
|
|
277
350
|
"strategy": strategy,
|
|
278
351
|
"deterministic": True,
|
|
@@ -282,13 +355,17 @@ def store_sanitized_artifact_receipt(
|
|
|
282
355
|
content_type=content_type,
|
|
283
356
|
strategy=strategy,
|
|
284
357
|
total_lines=total_lines,
|
|
358
|
+
raw_dir=str(getattr(args, "artifact_dir", ".context-guard/artifacts")),
|
|
359
|
+
show_paths=bool(getattr(args, "show_paths", False)),
|
|
285
360
|
),
|
|
286
361
|
},
|
|
287
362
|
}
|
|
288
363
|
artifact.shrink_digest_for_metadata_cap(metadata)
|
|
289
364
|
artifact.write_private_text(content_path, sanitized_text)
|
|
290
365
|
artifact.write_private_text(meta_path, artifact.metadata_json_text(metadata))
|
|
291
|
-
|
|
366
|
+
raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
|
|
367
|
+
show_artifact_paths = bool(getattr(args, "show_paths", False))
|
|
368
|
+
receipt = artifact.receipt_for(metadata, raw_dir=raw_artifact_dir, show_paths=show_artifact_paths)
|
|
292
369
|
query_line_cap = int(getattr(artifact, "MAX_QUERY_LINES", 5_000))
|
|
293
370
|
query_char_cap = 1_000_000
|
|
294
371
|
content_chars = len(sanitized_text)
|
|
@@ -301,21 +378,19 @@ def store_sanitized_artifact_receipt(
|
|
|
301
378
|
"reason": "artifact query cap exceeded; use retrieval hints for exact slices",
|
|
302
379
|
}
|
|
303
380
|
if total_lines <= query_line_cap and content_chars <= query_char_cap:
|
|
304
|
-
raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
|
|
305
|
-
dir_flags = ""
|
|
306
|
-
if raw_artifact_dir != ".context-guard/artifacts":
|
|
307
|
-
dir_flags = f" --dir {shlex.quote(raw_artifact_dir)}"
|
|
308
381
|
line_flags = ""
|
|
309
382
|
if total_lines > 0:
|
|
310
383
|
line_flags = f" --lines 1:{total_lines} --max-lines {max(1, total_lines)}"
|
|
384
|
+
prefix = artifact.artifact_dir_cli_prefix(raw_artifact_dir, show_paths=show_artifact_paths)
|
|
311
385
|
exact_reexpand = {
|
|
312
386
|
"available": True,
|
|
313
387
|
"scope": "sanitized_full_output",
|
|
314
388
|
"sha256": content_sha,
|
|
315
389
|
"bytes": content_bytes,
|
|
316
390
|
"lines": total_lines,
|
|
391
|
+
"exact": artifact.artifact_dir_cli_is_exact(raw_artifact_dir, show_paths=show_artifact_paths),
|
|
317
392
|
"cli": (
|
|
318
|
-
f"
|
|
393
|
+
f"{prefix} get {artifact_id}{line_flags} "
|
|
319
394
|
f"--max-chars {max(1, content_chars)}"
|
|
320
395
|
),
|
|
321
396
|
}
|
|
@@ -720,11 +795,33 @@ def build_digest_payload(
|
|
|
720
795
|
|
|
721
796
|
|
|
722
797
|
def markdown_artifact_receipt_lines(artifact_receipt: dict[str, object]) -> list[str]:
|
|
798
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
799
|
+
handle = None
|
|
800
|
+
rehydrate = None
|
|
801
|
+
if isinstance(sandbox, dict):
|
|
802
|
+
raw_handle = sandbox.get("handle")
|
|
803
|
+
if isinstance(raw_handle, str):
|
|
804
|
+
handle = raw_handle
|
|
805
|
+
rehydration = sandbox.get("rehydration")
|
|
806
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
807
|
+
if isinstance(commands, list):
|
|
808
|
+
for command in commands:
|
|
809
|
+
if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
|
|
810
|
+
rehydrate = command["cli"]
|
|
811
|
+
break
|
|
812
|
+
if rehydrate is None:
|
|
813
|
+
for command in commands:
|
|
814
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str):
|
|
815
|
+
rehydrate = command["cli"]
|
|
816
|
+
break
|
|
723
817
|
lines = [
|
|
724
818
|
"- artifact_receipt: "
|
|
725
819
|
f"stored={str(artifact_receipt.get('stored')).lower()} "
|
|
726
|
-
f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}
|
|
820
|
+
f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}"
|
|
821
|
+
f"{(' handle=' + handle) if handle else ''}\n"
|
|
727
822
|
]
|
|
823
|
+
if rehydrate:
|
|
824
|
+
lines.append(f"- rehydrate: `{rehydrate}`\n")
|
|
728
825
|
exact = artifact_receipt.get("exact_reexpand")
|
|
729
826
|
if isinstance(exact, dict) and exact.get("cli"):
|
|
730
827
|
lines.append(f"- exact_reexpand: `{exact.get('cli')}`\n")
|
|
@@ -742,12 +839,16 @@ def compact_markdown_artifact_receipt(payload: dict[str, object], max_chars: int
|
|
|
742
839
|
|
|
743
840
|
artifact_id = artifact_receipt.get("artifact_id") or artifact_receipt.get("error")
|
|
744
841
|
stored = str(artifact_receipt.get("stored")).lower()
|
|
842
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
843
|
+
handle = sandbox.get("handle") if isinstance(sandbox, dict) and isinstance(sandbox.get("handle"), str) else None
|
|
745
844
|
exact = artifact_receipt.get("exact_reexpand")
|
|
746
845
|
exact_available = ""
|
|
747
846
|
if isinstance(exact, dict) and "available" in exact:
|
|
748
847
|
exact_available = f" exact_available={str(exact.get('available')).lower()}"
|
|
749
848
|
|
|
750
849
|
candidates = [
|
|
850
|
+
f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}; use output_sandbox.rehydration for exact slices\n",
|
|
851
|
+
f"- artifact_receipt: stored={stored} id={artifact_id}{(' handle=' + handle) if handle else ''}{exact_available}\n",
|
|
751
852
|
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}; raise --max-chars for full exact_reexpand\n",
|
|
752
853
|
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}\n",
|
|
753
854
|
f"- artifact_receipt: id={artifact_id}\n",
|
|
@@ -905,15 +1006,67 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
905
1006
|
for key in ("scope", "bytes", "lines", "sha256")
|
|
906
1007
|
if key in stored_output
|
|
907
1008
|
}
|
|
1009
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
1010
|
+
if isinstance(sandbox, dict):
|
|
1011
|
+
compact_sandbox: dict[str, object] = {
|
|
1012
|
+
key: sandbox[key]
|
|
1013
|
+
for key in ("schema_version", "mode", "handle", "artifact_id")
|
|
1014
|
+
if key in sandbox
|
|
1015
|
+
}
|
|
1016
|
+
rehydration = sandbox.get("rehydration")
|
|
1017
|
+
if isinstance(rehydration, dict):
|
|
1018
|
+
commands = rehydration.get("commands")
|
|
1019
|
+
if isinstance(commands, list):
|
|
1020
|
+
kept_commands = [
|
|
1021
|
+
command
|
|
1022
|
+
for command in commands
|
|
1023
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str)
|
|
1024
|
+
][:2]
|
|
1025
|
+
compact_sandbox["rehydration"] = {
|
|
1026
|
+
"commands": kept_commands,
|
|
1027
|
+
"exact_commands": rehydration.get("exact_commands"),
|
|
1028
|
+
"dir_argument": rehydration.get("dir_argument"),
|
|
1029
|
+
}
|
|
1030
|
+
compact["output_sandbox"] = compact_sandbox
|
|
908
1031
|
exact = artifact_receipt.get("exact_reexpand")
|
|
909
1032
|
if include_exact_reexpand and isinstance(exact, dict):
|
|
910
1033
|
compact["exact_reexpand"] = {
|
|
911
1034
|
key: exact[key]
|
|
912
|
-
for key in ("available", "scope", "sha256", "bytes", "lines", "cli", "reason")
|
|
1035
|
+
for key in ("available", "scope", "sha256", "bytes", "lines", "exact", "cli", "reason")
|
|
913
1036
|
if key in exact
|
|
914
1037
|
}
|
|
915
1038
|
return compact
|
|
916
1039
|
|
|
1040
|
+
def tiny_artifact_receipt() -> dict[str, object] | None:
|
|
1041
|
+
artifact_receipt = payload.get("artifact_receipt")
|
|
1042
|
+
if not isinstance(artifact_receipt, dict):
|
|
1043
|
+
return None
|
|
1044
|
+
compact: dict[str, object] = {}
|
|
1045
|
+
for key in ("stored", "artifact_id", "error"):
|
|
1046
|
+
if key in artifact_receipt:
|
|
1047
|
+
compact[key] = artifact_receipt[key]
|
|
1048
|
+
sandbox = artifact_receipt.get("output_sandbox")
|
|
1049
|
+
if isinstance(sandbox, dict):
|
|
1050
|
+
tiny_sandbox: dict[str, object] = {}
|
|
1051
|
+
handle = sandbox.get("handle")
|
|
1052
|
+
if isinstance(handle, str):
|
|
1053
|
+
tiny_sandbox["handle"] = handle
|
|
1054
|
+
rehydration = sandbox.get("rehydration")
|
|
1055
|
+
commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
|
|
1056
|
+
if isinstance(commands, list):
|
|
1057
|
+
for command in commands:
|
|
1058
|
+
if isinstance(command, dict) and isinstance(command.get("cli"), str):
|
|
1059
|
+
tiny_sandbox["rehydration"] = {
|
|
1060
|
+
"commands": [{
|
|
1061
|
+
"type": command.get("type"),
|
|
1062
|
+
"cli": command.get("cli"),
|
|
1063
|
+
}]
|
|
1064
|
+
}
|
|
1065
|
+
break
|
|
1066
|
+
if tiny_sandbox:
|
|
1067
|
+
compact["output_sandbox"] = tiny_sandbox
|
|
1068
|
+
return compact
|
|
1069
|
+
|
|
917
1070
|
def attach_artifact_receipt(candidate: dict[str, object], artifact_receipt: dict[str, object] | None) -> dict[str, object]:
|
|
918
1071
|
if artifact_receipt is not None:
|
|
919
1072
|
candidate["artifact_receipt"] = artifact_receipt
|
|
@@ -955,6 +1108,7 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
955
1108
|
}
|
|
956
1109
|
compact_receipt = compact_artifact_receipt(include_exact_reexpand=True)
|
|
957
1110
|
minimal_receipt = compact_artifact_receipt(include_exact_reexpand=False)
|
|
1111
|
+
tiny_receipt = tiny_artifact_receipt()
|
|
958
1112
|
|
|
959
1113
|
return first_fitting(
|
|
960
1114
|
[
|
|
@@ -1004,6 +1158,15 @@ def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
|
1004
1158
|
},
|
|
1005
1159
|
minimal_receipt,
|
|
1006
1160
|
),
|
|
1161
|
+
attach_artifact_receipt(
|
|
1162
|
+
{
|
|
1163
|
+
"digest_capped": True,
|
|
1164
|
+
"status": payload.get("status"),
|
|
1165
|
+
"exit_code": payload.get("exit_code"),
|
|
1166
|
+
"timed_out": payload.get("timed_out"),
|
|
1167
|
+
},
|
|
1168
|
+
tiny_receipt,
|
|
1169
|
+
),
|
|
1007
1170
|
{"digest_capped": True},
|
|
1008
1171
|
]
|
|
1009
1172
|
)
|
|
@@ -1081,14 +1244,16 @@ def terminate_process_tree(
|
|
|
1081
1244
|
class TimedCommandStream:
|
|
1082
1245
|
def __init__(
|
|
1083
1246
|
self,
|
|
1084
|
-
proc: subprocess.Popen[
|
|
1085
|
-
stdout:
|
|
1247
|
+
proc: subprocess.Popen[bytes],
|
|
1248
|
+
stdout: BinaryIO,
|
|
1086
1249
|
*,
|
|
1087
1250
|
timeout_seconds: int,
|
|
1251
|
+
max_line_chars: int = MAX_LINE_CHARS_LIMIT,
|
|
1088
1252
|
process_group_id: int | None = None,
|
|
1089
1253
|
) -> None:
|
|
1090
1254
|
self.proc = proc
|
|
1091
1255
|
self.timeout_seconds = timeout_seconds
|
|
1256
|
+
self.max_unterminated_line_chars = max(1, max_line_chars)
|
|
1092
1257
|
self.process_group_id = process_group_id
|
|
1093
1258
|
self.deadline = time.monotonic() + timeout_seconds
|
|
1094
1259
|
self.timed_out = False
|
|
@@ -1098,10 +1263,62 @@ class TimedCommandStream:
|
|
|
1098
1263
|
self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
|
|
1099
1264
|
self._thread.start()
|
|
1100
1265
|
|
|
1101
|
-
def
|
|
1266
|
+
def _truncated_raw_line(self, text: str) -> str:
|
|
1267
|
+
holdback = min(RAW_TRUNCATION_REDACTION_HOLDBACK_CHARS, self.max_unterminated_line_chars)
|
|
1268
|
+
safe_keep = max(0, self.max_unterminated_line_chars - holdback)
|
|
1269
|
+
return (
|
|
1270
|
+
text[:safe_keep]
|
|
1271
|
+
+ (
|
|
1272
|
+
"...[context-guard-kit: raw line truncated before newline "
|
|
1273
|
+
f"after {self.max_unterminated_line_chars} chars; "
|
|
1274
|
+
f"withheld {holdback} boundary chars for redaction safety]\n"
|
|
1275
|
+
)
|
|
1276
|
+
)
|
|
1277
|
+
|
|
1278
|
+
def _read_stdout(self, stdout: BinaryIO) -> None:
|
|
1279
|
+
decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
1280
|
+
pending = ""
|
|
1281
|
+
discarding_oversized_line = False
|
|
1282
|
+
|
|
1283
|
+
def feed(text: str) -> None:
|
|
1284
|
+
nonlocal pending, discarding_oversized_line
|
|
1285
|
+
if not text:
|
|
1286
|
+
return
|
|
1287
|
+
pending += text
|
|
1288
|
+
while pending:
|
|
1289
|
+
if discarding_oversized_line:
|
|
1290
|
+
newline_index = pending.find("\n")
|
|
1291
|
+
if newline_index == -1:
|
|
1292
|
+
pending = ""
|
|
1293
|
+
return
|
|
1294
|
+
pending = pending[newline_index + 1 :]
|
|
1295
|
+
discarding_oversized_line = False
|
|
1296
|
+
continue
|
|
1297
|
+
|
|
1298
|
+
newline_index = pending.find("\n")
|
|
1299
|
+
if newline_index != -1:
|
|
1300
|
+
if newline_index > self.max_unterminated_line_chars:
|
|
1301
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
1302
|
+
else:
|
|
1303
|
+
self._queue.put(pending[: newline_index + 1])
|
|
1304
|
+
pending = pending[newline_index + 1 :]
|
|
1305
|
+
continue
|
|
1306
|
+
|
|
1307
|
+
if len(pending) > self.max_unterminated_line_chars:
|
|
1308
|
+
self._queue.put(self._truncated_raw_line(pending))
|
|
1309
|
+
pending = ""
|
|
1310
|
+
discarding_oversized_line = True
|
|
1311
|
+
return
|
|
1312
|
+
|
|
1102
1313
|
try:
|
|
1103
|
-
|
|
1104
|
-
|
|
1314
|
+
while True:
|
|
1315
|
+
chunk = stdout.read(COMMAND_READ_CHUNK_BYTES)
|
|
1316
|
+
if not chunk:
|
|
1317
|
+
break
|
|
1318
|
+
feed(decoder.decode(chunk, final=False))
|
|
1319
|
+
feed(decoder.decode(b"", final=True))
|
|
1320
|
+
if pending and not discarding_oversized_line:
|
|
1321
|
+
self._queue.put(pending)
|
|
1105
1322
|
finally:
|
|
1106
1323
|
self._stream_closed = True
|
|
1107
1324
|
self._queue.put(_STREAM_END)
|
|
@@ -1242,6 +1459,18 @@ def main() -> int:
|
|
|
1242
1459
|
if args.artifact_receipt and args.digest == "off":
|
|
1243
1460
|
print("trim_command_output.py: --artifact-receipt requires --digest markdown or --digest json", file=sys.stderr)
|
|
1244
1461
|
return 2
|
|
1462
|
+
if args.artifact_receipt:
|
|
1463
|
+
try:
|
|
1464
|
+
load_artifact_store_module()
|
|
1465
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1466
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1467
|
+
return 2
|
|
1468
|
+
except Exception:
|
|
1469
|
+
# Missing/broken artifact helpers are reported in the digest payload as
|
|
1470
|
+
# artifact_receipt_unavailable for backward compatibility. Integrity
|
|
1471
|
+
# failures above are different: they indicate an adjacent helper exists
|
|
1472
|
+
# but cannot be safely trusted, so they fail closed.
|
|
1473
|
+
pass
|
|
1245
1474
|
|
|
1246
1475
|
command = args.command
|
|
1247
1476
|
if command and command[0] == "--":
|
|
@@ -1250,6 +1479,12 @@ def main() -> int:
|
|
|
1250
1479
|
print("trim_command_output.py: missing command", file=sys.stderr)
|
|
1251
1480
|
return 2
|
|
1252
1481
|
|
|
1482
|
+
try:
|
|
1483
|
+
line_sanitizer = load_line_sanitizer(args.show_paths)
|
|
1484
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1485
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1486
|
+
return 2
|
|
1487
|
+
|
|
1253
1488
|
popen_kwargs: dict[str, object] = {}
|
|
1254
1489
|
if os.name != "nt":
|
|
1255
1490
|
popen_kwargs["start_new_session"] = True
|
|
@@ -1258,9 +1493,8 @@ def main() -> int:
|
|
|
1258
1493
|
command,
|
|
1259
1494
|
stdout=subprocess.PIPE,
|
|
1260
1495
|
stderr=subprocess.STDOUT,
|
|
1261
|
-
text=
|
|
1262
|
-
bufsize=
|
|
1263
|
-
errors="replace",
|
|
1496
|
+
text=False,
|
|
1497
|
+
bufsize=0,
|
|
1264
1498
|
**popen_kwargs,
|
|
1265
1499
|
)
|
|
1266
1500
|
except OSError as exc:
|
|
@@ -1276,7 +1510,6 @@ def main() -> int:
|
|
|
1276
1510
|
visible_chars = 0
|
|
1277
1511
|
any_line_capped = False
|
|
1278
1512
|
runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
|
|
1279
|
-
line_sanitizer = load_line_sanitizer(args.show_paths)
|
|
1280
1513
|
duplicate_tracker = DuplicateLineTracker()
|
|
1281
1514
|
redacted_lines = 0
|
|
1282
1515
|
artifact_lines: list[str] = []
|
|
@@ -1290,6 +1523,7 @@ def main() -> int:
|
|
|
1290
1523
|
proc,
|
|
1291
1524
|
proc.stdout,
|
|
1292
1525
|
timeout_seconds=args.timeout_seconds,
|
|
1526
|
+
max_line_chars=COMMAND_MAX_UNTERMINATED_LINE_CHARS,
|
|
1293
1527
|
process_group_id=process_group_id_for(proc),
|
|
1294
1528
|
)
|
|
1295
1529
|
for line in command_stream:
|
|
@@ -1384,6 +1618,9 @@ def main() -> int:
|
|
|
1384
1618
|
line_sanitizer=line_sanitizer,
|
|
1385
1619
|
redacted_lines=redacted_lines,
|
|
1386
1620
|
)
|
|
1621
|
+
except UnsafeAdjacentModuleError as exc:
|
|
1622
|
+
print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
|
|
1623
|
+
return 2
|
|
1387
1624
|
except Exception as exc:
|
|
1388
1625
|
payload["artifact_receipt"] = {
|
|
1389
1626
|
"stored": False,
|
|
@@ -1391,6 +1628,16 @@ def main() -> int:
|
|
|
1391
1628
|
"reason": f"{exc.__class__.__name__}: {exc}",
|
|
1392
1629
|
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1393
1630
|
}
|
|
1631
|
+
artifact_receipt = payload.get("artifact_receipt")
|
|
1632
|
+
if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
|
|
1633
|
+
next_queries = payload.setdefault("next_queries", [])
|
|
1634
|
+
if isinstance(next_queries, list):
|
|
1635
|
+
guidance = (
|
|
1636
|
+
"Use artifact_receipt.output_sandbox.rehydration commands for exact sanitized slices "
|
|
1637
|
+
"before rerunning the broad command or requesting full raw output."
|
|
1638
|
+
)
|
|
1639
|
+
if guidance not in next_queries:
|
|
1640
|
+
next_queries.insert(0, guidance)
|
|
1394
1641
|
if args.digest == "json":
|
|
1395
1642
|
sys.stdout.write(render_digest_json(payload, args.max_chars))
|
|
1396
1643
|
else:
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
# ContextGuard brief mode (advisory)
|
|
2
2
|
|
|
3
|
-
Brief mode is a set of **agent-neutral, advisory** rule snippets that ask a coding
|
|
4
|
-
agent to cut filler from its responses while preserving the technical evidence a
|
|
5
|
-
needs. It is guidance text, not an enforcement mechanism.
|
|
3
|
+
Brief mode is a set of **agent-neutral, advisory** rule snippets that ask a coding or
|
|
4
|
+
tool-using agent to cut filler from its responses while preserving the technical evidence a
|
|
5
|
+
reviewer needs. It is guidance text, not an enforcement mechanism.
|
|
6
6
|
|
|
7
7
|
- **Advisory / best-effort.** Compatible agents may follow these rules fully, partially, or
|
|
8
8
|
ignore them. Brief mode does not intercept, rewrite, or block model output.
|
|
9
9
|
- **No guaranteed savings.** Brief mode does **not** promise any token or cost reduction.
|
|
10
|
-
Verbosity behavior varies by agent and model. Measure real before
|
|
10
|
+
Verbosity behavior varies by agent and model. Measure real before-and-after results for your
|
|
11
11
|
own tasks with `context-guard-bench` before making any savings claim.
|
|
12
12
|
- **Evidence first.** Every level keeps the same mandatory evidence floor (see below). Brief
|
|
13
13
|
mode trims wording, never correctness-critical content.
|
|
@@ -56,7 +56,7 @@ context-guard setup --agent codex --scope project --brief-mode standard --yes
|
|
|
56
56
|
context-guard setup --agent codex --scope project --brief-mode off --yes
|
|
57
57
|
```
|
|
58
58
|
|
|
59
|
-
Per the project safety rules it stays dry-run first, writes only local files, backs up
|
|
59
|
+
Per the project safety rules, it stays dry-run first, writes only local files, backs up
|
|
60
60
|
existing rule files before changing anything, and applies only with explicit approval.
|
|
61
61
|
|
|
62
62
|
Each block is wrapped in stable markers:
|