@ictechgy/context-guard 0.4.11 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +4 -0
- package/README.ko.md +19 -12
- package/README.md +11 -11
- package/package.json +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/bin/context-guard +42 -46
- package/plugins/context-guard/bin/context-guard-audit +3 -3
- package/plugins/context-guard/bin/context-guard-bench +136 -16
- package/plugins/context-guard/bin/context-guard-cache-score +29 -2
- package/plugins/context-guard/bin/context-guard-compress +89 -27
- package/plugins/context-guard/bin/context-guard-filter +88 -18
- package/plugins/context-guard/bin/context-guard-pack +28 -2
- package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +169 -6
- package/plugins/context-guard/bin/context-guard-setup +21 -5
- package/plugins/context-guard/bin/context-guard-tool-prune +48 -10
- package/plugins/context-guard/bin/context-guard-trim-output +109 -52
- package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
- package/plugins/context-guard/lib/context_guard_commands.py +4 -1
|
@@ -63,6 +63,7 @@ MAX_JSON_PATH_SEGMENT_CHARS = 64
|
|
|
63
63
|
MAX_JSON_WALK_NODES = 10_000
|
|
64
64
|
MAX_JSON_WALK_DEPTH = 64
|
|
65
65
|
MAX_JSON_SHAPE_WARNINGS = 200
|
|
66
|
+
MAX_JSON_CANONICAL_COMPARE_BYTES = 200_000
|
|
66
67
|
SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
|
|
67
68
|
DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
|
|
68
69
|
SENSITIVE_JSON_KEY_RE = re.compile(
|
|
@@ -93,6 +94,22 @@ def json_bytes(data: Any, *, indent: int | None = None) -> str:
|
|
|
93
94
|
return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":") if indent is None else None, indent=indent)
|
|
94
95
|
|
|
95
96
|
|
|
97
|
+
def bounded_canonical_json(data: Any, *, max_bytes: int) -> str | None:
|
|
98
|
+
encoder = json.JSONEncoder(ensure_ascii=False, sort_keys=True, indent=2)
|
|
99
|
+
chunks: list[str] = []
|
|
100
|
+
size = 0
|
|
101
|
+
for chunk in encoder.iterencode(data):
|
|
102
|
+
size += byte_len_text(chunk)
|
|
103
|
+
if size > max_bytes:
|
|
104
|
+
return None
|
|
105
|
+
chunks.append(chunk)
|
|
106
|
+
size += 1
|
|
107
|
+
if size > max_bytes:
|
|
108
|
+
return None
|
|
109
|
+
chunks.append("\n")
|
|
110
|
+
return "".join(chunks)
|
|
111
|
+
|
|
112
|
+
|
|
96
113
|
def json_path_child(path: str, key: object) -> str:
|
|
97
114
|
"""Return a JSON warning path segment without echoing sensitive/dynamic keys."""
|
|
98
115
|
text = str(key)
|
|
@@ -335,8 +352,18 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
|
335
352
|
if not isinstance(data, (dict, list)):
|
|
336
353
|
return "json-scalar", []
|
|
337
354
|
warnings = _walk_json(data)
|
|
338
|
-
|
|
339
|
-
|
|
355
|
+
input_bytes = byte_len_text(text)
|
|
356
|
+
canonical = bounded_canonical_json(data, max_bytes=MAX_JSON_CANONICAL_COMPARE_BYTES)
|
|
357
|
+
if canonical is None:
|
|
358
|
+
warnings.append({
|
|
359
|
+
"code": "json_canonical_check_skipped",
|
|
360
|
+
"path": "$",
|
|
361
|
+
"severity": "info",
|
|
362
|
+
"message": "JSON input is parseable but canonical formatting would exceed the comparison byte cap.",
|
|
363
|
+
"input_bytes": input_bytes,
|
|
364
|
+
"max_bytes": MAX_JSON_CANONICAL_COMPARE_BYTES,
|
|
365
|
+
})
|
|
366
|
+
elif canonical != text:
|
|
340
367
|
warnings.append({
|
|
341
368
|
"code": "json_not_canonical",
|
|
342
369
|
"path": "$",
|
|
@@ -20,10 +20,12 @@ import os
|
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
import re
|
|
22
22
|
import sys
|
|
23
|
-
from typing import Callable
|
|
23
|
+
from typing import Callable, Iterable
|
|
24
24
|
|
|
25
25
|
DEFAULT_MAX_BYTES = 10_000_000
|
|
26
26
|
MAX_MAX_BYTES = 100_000_000
|
|
27
|
+
MAX_SEARCH_DEDUPE_KEYS = 50_000
|
|
28
|
+
JSON_PARSE_FAILED = object()
|
|
27
29
|
# 토큰 추정은 보수적 proxy 일 뿐이다(관측값 아님). 평균 ~4 chars/token 휴리스틱을 쓰되
|
|
28
30
|
# 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
|
|
29
31
|
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
@@ -214,20 +216,57 @@ def token_proxy(text: str) -> int:
|
|
|
214
216
|
return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
|
|
215
217
|
|
|
216
218
|
|
|
219
|
+
LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def iter_text_lines(text: str) -> Iterable[str]:
|
|
223
|
+
"""Yield lines with str.splitlines() boundaries without building a line list."""
|
|
224
|
+
start = 0
|
|
225
|
+
index = 0
|
|
226
|
+
length = len(text)
|
|
227
|
+
while index < length:
|
|
228
|
+
char = text[index]
|
|
229
|
+
if char == "\r" and index + 1 < length and text[index + 1] == "\n":
|
|
230
|
+
yield text[start:index]
|
|
231
|
+
index += 2
|
|
232
|
+
start = index
|
|
233
|
+
continue
|
|
234
|
+
if char in LINE_BOUNDARY_CHARS:
|
|
235
|
+
yield text[start:index]
|
|
236
|
+
index += 1
|
|
237
|
+
start = index
|
|
238
|
+
continue
|
|
239
|
+
index += 1
|
|
240
|
+
if start < length:
|
|
241
|
+
yield text[start:]
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def sample_text_lines(text: str, limit: int) -> list[str]:
|
|
245
|
+
sample: list[str] = []
|
|
246
|
+
for line in iter_text_lines(text):
|
|
247
|
+
sample.append(line)
|
|
248
|
+
if len(sample) >= limit:
|
|
249
|
+
break
|
|
250
|
+
return sample
|
|
251
|
+
|
|
252
|
+
|
|
217
253
|
def classify_content(text: str) -> str:
|
|
218
254
|
"""Best-effort content classification into one of CONTENT_TYPES.
|
|
219
255
|
|
|
220
|
-
Order matters: JSON and diff have the strongest unambiguous signals
|
|
221
|
-
|
|
222
|
-
|
|
256
|
+
Order matters: valid JSON and diff have the strongest unambiguous signals;
|
|
257
|
+
search/log/code are sampled over the first lines; prose is the conservative
|
|
258
|
+
default so unknown text is never over-compressed.
|
|
223
259
|
"""
|
|
224
260
|
stripped = text.strip()
|
|
225
261
|
if not stripped:
|
|
226
262
|
return "prose"
|
|
227
263
|
if _looks_like_json(stripped):
|
|
228
264
|
return "json"
|
|
229
|
-
|
|
230
|
-
|
|
265
|
+
return classify_non_json_content(stripped)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def classify_non_json_content(stripped: str) -> str:
|
|
269
|
+
sample = sample_text_lines(stripped, 200)
|
|
231
270
|
if _looks_like_diff(sample):
|
|
232
271
|
return "diff"
|
|
233
272
|
if _looks_like_search(sample):
|
|
@@ -355,14 +394,17 @@ def build_readable_compression_metadata(
|
|
|
355
394
|
}
|
|
356
395
|
|
|
357
396
|
|
|
358
|
-
def
|
|
359
|
-
if stripped[0] not in "{[":
|
|
360
|
-
return
|
|
397
|
+
def parse_json_candidate(stripped: str) -> object:
|
|
398
|
+
if not stripped or stripped[0] not in "{[":
|
|
399
|
+
return JSON_PARSE_FAILED
|
|
361
400
|
try:
|
|
362
|
-
json.loads(stripped)
|
|
401
|
+
return json.loads(stripped)
|
|
363
402
|
except (ValueError, RecursionError):
|
|
364
|
-
return
|
|
365
|
-
|
|
403
|
+
return JSON_PARSE_FAILED
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _looks_like_json(stripped: str) -> bool:
|
|
407
|
+
return parse_json_candidate(stripped) is not JSON_PARSE_FAILED
|
|
366
408
|
|
|
367
409
|
|
|
368
410
|
def _ratio(matches: int, total: int, threshold: float) -> bool:
|
|
@@ -390,15 +432,7 @@ def _looks_like_code(sample: list[str]) -> bool:
|
|
|
390
432
|
return _ratio(matches, len(sample), 0.25)
|
|
391
433
|
|
|
392
434
|
|
|
393
|
-
def
|
|
394
|
-
"""Re-serialize JSON without insignificant whitespace (data-preserving)."""
|
|
395
|
-
try:
|
|
396
|
-
parsed = json.loads(text)
|
|
397
|
-
except (ValueError, RecursionError):
|
|
398
|
-
# 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
|
|
399
|
-
compressed, detail = compress_prose(text)
|
|
400
|
-
detail["fallback_from"] = "json"
|
|
401
|
-
return compressed, detail
|
|
435
|
+
def compress_parsed_json(text: str, parsed: object) -> tuple[str, dict[str, object]]:
|
|
402
436
|
compact = json.dumps(parsed, ensure_ascii=False, separators=(",", ":"))
|
|
403
437
|
if not text.endswith("\n"):
|
|
404
438
|
trailing = ""
|
|
@@ -407,6 +441,17 @@ def compress_json(text: str) -> tuple[str, dict[str, object]]:
|
|
|
407
441
|
return compact + trailing, {"strategy": "json-compact", "lossy": False, "json_parse_ok": True}
|
|
408
442
|
|
|
409
443
|
|
|
444
|
+
def compress_json(text: str) -> tuple[str, dict[str, object]]:
|
|
445
|
+
"""Re-serialize JSON without insignificant whitespace (data-preserving)."""
|
|
446
|
+
parsed = parse_json_candidate(text.strip())
|
|
447
|
+
if parsed is JSON_PARSE_FAILED:
|
|
448
|
+
# 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
|
|
449
|
+
compressed, detail = compress_prose(text)
|
|
450
|
+
detail["fallback_from"] = "json"
|
|
451
|
+
return compressed, detail
|
|
452
|
+
return compress_parsed_json(text, parsed)
|
|
453
|
+
|
|
454
|
+
|
|
410
455
|
def compress_diff(text: str) -> tuple[str, dict[str, object]]:
|
|
411
456
|
"""Keep file headers, hunk headers, and +/- changes; collapse context runs."""
|
|
412
457
|
out: list[str] = []
|
|
@@ -464,18 +509,28 @@ def compress_log(text: str) -> tuple[str, dict[str, object]]:
|
|
|
464
509
|
|
|
465
510
|
|
|
466
511
|
def compress_search(text: str) -> tuple[str, dict[str, object]]:
|
|
467
|
-
"""Drop exact-duplicate match lines while preserving first-seen order."""
|
|
512
|
+
"""Drop exact-duplicate match lines while preserving first-seen order with bounded keys."""
|
|
468
513
|
out: list[str] = []
|
|
469
514
|
seen: set[str] = set()
|
|
470
515
|
dropped = 0
|
|
471
|
-
|
|
516
|
+
dedupe_limit_reached = False
|
|
517
|
+
for line in iter_text_lines(text):
|
|
472
518
|
key = line.rstrip()
|
|
473
519
|
if key in seen:
|
|
474
520
|
dropped += 1
|
|
475
521
|
continue
|
|
476
|
-
seen
|
|
522
|
+
if len(seen) < MAX_SEARCH_DEDUPE_KEYS:
|
|
523
|
+
seen.add(key)
|
|
524
|
+
else:
|
|
525
|
+
dedupe_limit_reached = True
|
|
477
526
|
out.append(line)
|
|
478
|
-
return _join_lines(out, text), {
|
|
527
|
+
return _join_lines(out, text), {
|
|
528
|
+
"strategy": "search-dedupe",
|
|
529
|
+
"lossy": dropped > 0,
|
|
530
|
+
"duplicate_lines_dropped": dropped,
|
|
531
|
+
"dedupe_key_limit": MAX_SEARCH_DEDUPE_KEYS,
|
|
532
|
+
"dedupe_key_limit_reached": dedupe_limit_reached,
|
|
533
|
+
}
|
|
479
534
|
|
|
480
535
|
|
|
481
536
|
def compress_code(text: str) -> tuple[str, dict[str, object]]:
|
|
@@ -689,14 +744,21 @@ def compress_text(
|
|
|
689
744
|
the compressed body, or the metadata that follows.
|
|
690
745
|
"""
|
|
691
746
|
sanitized, redacted_lines = sanitize_text(text, show_paths=show_paths)
|
|
747
|
+
parsed_json: object = JSON_PARSE_FAILED
|
|
692
748
|
if forced_type is not None:
|
|
693
749
|
content_type, type_source = forced_type, "override"
|
|
694
750
|
else:
|
|
695
|
-
|
|
751
|
+
stripped = sanitized.strip()
|
|
752
|
+
parsed_json = parse_json_candidate(stripped)
|
|
753
|
+
content_type = "json" if parsed_json is not JSON_PARSE_FAILED else classify_non_json_content(stripped)
|
|
754
|
+
type_source = "detected"
|
|
696
755
|
if compression_mode == "readable" and content_type == "prose":
|
|
697
756
|
compressed, strategy_detail = compress_prose_readable(sanitized)
|
|
698
757
|
else:
|
|
699
|
-
|
|
758
|
+
if content_type == "json" and parsed_json is not JSON_PARSE_FAILED:
|
|
759
|
+
compressed, strategy_detail = compress_parsed_json(sanitized, parsed_json)
|
|
760
|
+
else:
|
|
761
|
+
compressed, strategy_detail = STRATEGIES[content_type](sanitized)
|
|
700
762
|
if compression_mode == "readable":
|
|
701
763
|
strategy_detail["readable_mode"] = True
|
|
702
764
|
strategy_detail["readable_strategy"] = "sentence-window-preview"
|
|
@@ -9,6 +9,8 @@ from __future__ import annotations
|
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
11
|
import codecs
|
|
12
|
+
import collections
|
|
13
|
+
import itertools
|
|
12
14
|
from dataclasses import dataclass
|
|
13
15
|
import json
|
|
14
16
|
import os
|
|
@@ -455,26 +457,94 @@ def cap_line(line: str, max_chars: int) -> str:
|
|
|
455
457
|
return line[: max(0, max_chars - len(marker) - len(suffix))] + marker + suffix
|
|
456
458
|
|
|
457
459
|
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
460
|
+
LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
@dataclass
|
|
464
|
+
class LineSelection:
|
|
465
|
+
lines: list[str]
|
|
466
|
+
input_lines: int
|
|
467
|
+
input_complete: bool
|
|
468
|
+
|
|
469
|
+
|
|
470
|
+
def iter_text_lines_keepends(text: str) -> Iterable[str]:
|
|
471
|
+
"""Yield lines with Python splitlines(keepends=True) boundaries without a list."""
|
|
472
|
+
start = 0
|
|
473
|
+
index = 0
|
|
474
|
+
length = len(text)
|
|
475
|
+
while index < length:
|
|
476
|
+
char = text[index]
|
|
477
|
+
if char == "\r" and index + 1 < length and text[index + 1] == "\n":
|
|
478
|
+
yield text[start : index + 2]
|
|
479
|
+
index += 2
|
|
480
|
+
start = index
|
|
481
|
+
continue
|
|
482
|
+
if char in LINE_BOUNDARY_CHARS:
|
|
483
|
+
yield text[start : index + 1]
|
|
484
|
+
index += 1
|
|
485
|
+
start = index
|
|
486
|
+
continue
|
|
487
|
+
index += 1
|
|
488
|
+
if start < length:
|
|
489
|
+
yield text[start:]
|
|
490
|
+
|
|
491
|
+
|
|
492
|
+
def line_matches_filter(line: str, flt: CompiledFilter) -> bool:
|
|
493
|
+
if flt.include_regex and not any(pattern.search(line) for pattern in flt.include_regex):
|
|
494
|
+
return False
|
|
495
|
+
if flt.exclude_regex and any(pattern.search(line) for pattern in flt.exclude_regex):
|
|
496
|
+
return False
|
|
497
|
+
return True
|
|
498
|
+
|
|
499
|
+
|
|
500
|
+
def select_lines_with_stats(lines: Iterable[str], flt: CompiledFilter, max_line_chars: int) -> LineSelection:
|
|
501
|
+
source_count = 0
|
|
502
|
+
matched_count = 0
|
|
503
|
+
input_complete = True
|
|
464
504
|
if flt.head_lines is not None or flt.tail_lines is not None:
|
|
465
505
|
head_n = flt.head_lines if flt.head_lines is not None else 0
|
|
466
506
|
tail_n = flt.tail_lines if flt.tail_lines is not None else 0
|
|
467
|
-
head
|
|
468
|
-
tail
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
507
|
+
head: list[str] = []
|
|
508
|
+
tail: collections.deque[str] = collections.deque(maxlen=tail_n)
|
|
509
|
+
for source_line in lines:
|
|
510
|
+
source_count += 1
|
|
511
|
+
line = cap_line(source_line, max_line_chars)
|
|
512
|
+
if not line_matches_filter(line, flt):
|
|
513
|
+
continue
|
|
514
|
+
matched_count += 1
|
|
515
|
+
if head_n and len(head) < head_n:
|
|
516
|
+
head.append(line)
|
|
517
|
+
if tail_n:
|
|
518
|
+
tail.append(line)
|
|
519
|
+
elif head_n and len(head) >= head_n:
|
|
520
|
+
input_complete = False
|
|
521
|
+
break
|
|
522
|
+
tail_list = list(tail)
|
|
523
|
+
if head and tail_list:
|
|
524
|
+
tail_list = tail_list[max(0, len(head) + len(tail_list) - matched_count):]
|
|
525
|
+
selected = head + tail_list
|
|
526
|
+
else:
|
|
527
|
+
limit = min(flt.max_lines if flt.max_lines is not None else MAX_EMIT_LINES, MAX_EMIT_LINES)
|
|
528
|
+
selected = []
|
|
529
|
+
for source_line in lines:
|
|
530
|
+
source_count += 1
|
|
531
|
+
line = cap_line(source_line, max_line_chars)
|
|
532
|
+
if not line_matches_filter(line, flt):
|
|
533
|
+
continue
|
|
534
|
+
matched_count += 1
|
|
535
|
+
selected.append(line)
|
|
536
|
+
if len(selected) >= limit:
|
|
537
|
+
input_complete = False
|
|
538
|
+
break
|
|
473
539
|
if flt.max_lines is not None and len(selected) > flt.max_lines:
|
|
474
540
|
selected = selected[:flt.max_lines]
|
|
475
541
|
if len(selected) > MAX_EMIT_LINES:
|
|
476
542
|
selected = selected[:MAX_EMIT_LINES]
|
|
477
|
-
return selected
|
|
543
|
+
return LineSelection(selected, source_count, input_complete)
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def select_lines(lines: Iterable[str], flt: CompiledFilter, max_line_chars: int) -> list[str]:
|
|
547
|
+
return select_lines_with_stats(lines, flt, max_line_chars).lines
|
|
478
548
|
|
|
479
549
|
|
|
480
550
|
def validation_payload(valid: bool, errors: list[str], count: int = 0) -> dict[str, Any]:
|
|
@@ -720,7 +790,6 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
720
790
|
filters, errors = load_filters(Path(args.config).expanduser())
|
|
721
791
|
result = run_command(command, timeout_seconds, max_capture)
|
|
722
792
|
rc = result.returncode
|
|
723
|
-
output = result.stdout_text + result.stderr_text
|
|
724
793
|
protected_nonzero = rc != 0 and is_protected_command(command)
|
|
725
794
|
report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
|
|
726
795
|
if result.timed_out:
|
|
@@ -746,18 +815,19 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
746
815
|
report["filter_id"] = matched.id
|
|
747
816
|
else:
|
|
748
817
|
try:
|
|
749
|
-
|
|
750
|
-
|
|
818
|
+
source_lines = itertools.chain(iter_text_lines_keepends(result.stdout_text), iter_text_lines_keepends(result.stderr_text))
|
|
819
|
+
selection = select_lines_with_stats(source_lines, matched, max_line_chars)
|
|
820
|
+
filtered = selection.lines
|
|
751
821
|
except re.error as exc:
|
|
752
822
|
report["reason"] = f"filter-error:{compact(str(exc), 80)}"
|
|
753
823
|
report["filter_id"] = matched.id
|
|
754
824
|
else:
|
|
755
|
-
if
|
|
825
|
+
if (result.stdout_text or result.stderr_text) and not filtered:
|
|
756
826
|
report["reason"] = "empty-output-fallback"
|
|
757
827
|
report["filter_id"] = matched.id
|
|
758
828
|
else:
|
|
759
829
|
sys.stdout.write("".join(filtered))
|
|
760
|
-
report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines":
|
|
830
|
+
report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": selection.input_lines, "input_lines_complete": selection.input_complete, "output_lines": len(filtered)})
|
|
761
831
|
emit_run_report(args, report)
|
|
762
832
|
return rc
|
|
763
833
|
if not result.passthrough_emitted:
|
|
@@ -957,6 +957,29 @@ def metadata_size(data: dict[str, Any]) -> int:
|
|
|
957
957
|
return len(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True).encode("utf-8", errors="replace")) + 1
|
|
958
958
|
|
|
959
959
|
|
|
960
|
+
def receipt_working_copy(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
|
961
|
+
"""Copy receipt metadata without deep-copying or serializing an oversized pack body.
|
|
962
|
+
|
|
963
|
+
The pack body is already an immutable string in normal builds and stdout remains
|
|
964
|
+
authoritative for it. When it cannot possibly fit under the receipt cap by
|
|
965
|
+
itself, omit it before the first receipt-size probe so capping work only touches
|
|
966
|
+
metadata previews.
|
|
967
|
+
"""
|
|
968
|
+
receipt: dict[str, Any] = {}
|
|
969
|
+
pack_omitted = False
|
|
970
|
+
for key, value in data.items():
|
|
971
|
+
if key == "pack" and isinstance(value, str):
|
|
972
|
+
if len(value.encode("utf-8", errors="replace")) > MAX_RECEIPT_BYTES:
|
|
973
|
+
pack_omitted = True
|
|
974
|
+
continue
|
|
975
|
+
receipt[key] = value
|
|
976
|
+
continue
|
|
977
|
+
receipt[key] = copy.deepcopy(value)
|
|
978
|
+
if pack_omitted:
|
|
979
|
+
receipt["pack_omitted_from_receipt"] = True
|
|
980
|
+
return receipt, pack_omitted
|
|
981
|
+
|
|
982
|
+
|
|
960
983
|
def artifact_failure(error: str, *, bytes_count: int = 0, capped: bool = False) -> dict[str, Any]:
|
|
961
984
|
return {
|
|
962
985
|
"stored": False,
|
|
@@ -1113,8 +1136,11 @@ def finalize_receipt_size(receipt: dict[str, Any]) -> int:
|
|
|
1113
1136
|
|
|
1114
1137
|
|
|
1115
1138
|
def shrink_receipt_for_write(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
|
1116
|
-
receipt =
|
|
1117
|
-
capped =
|
|
1139
|
+
receipt, pack_omitted = receipt_working_copy(data)
|
|
1140
|
+
capped = pack_omitted
|
|
1141
|
+
if pack_omitted:
|
|
1142
|
+
receipt.setdefault("artifact", {})["capped"] = True
|
|
1143
|
+
receipt.setdefault("artifact", {})["cap_bytes"] = MAX_RECEIPT_BYTES
|
|
1118
1144
|
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
1119
1145
|
return receipt, capped
|
|
1120
1146
|
capped = True
|
|
@@ -11,6 +11,7 @@ import argparse
|
|
|
11
11
|
import ast
|
|
12
12
|
import errno
|
|
13
13
|
import hashlib
|
|
14
|
+
import importlib.machinery
|
|
14
15
|
import importlib.util
|
|
15
16
|
import json
|
|
16
17
|
import os
|
|
@@ -39,8 +40,27 @@ def _load_hook_secret_patterns():
|
|
|
39
40
|
raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
|
|
40
41
|
|
|
41
42
|
|
|
43
|
+
def _load_sanitize_output():
|
|
44
|
+
searched = []
|
|
45
|
+
for helper_path in (SCRIPT_DIR / "sanitize_output.py", SCRIPT_DIR / "context-guard-sanitize-output"):
|
|
46
|
+
searched.append(str(helper_path))
|
|
47
|
+
if not helper_path.is_file():
|
|
48
|
+
continue
|
|
49
|
+
loader = importlib.machinery.SourceFileLoader("_claude_token_sanitize_output", str(helper_path))
|
|
50
|
+
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
51
|
+
if spec is None:
|
|
52
|
+
continue
|
|
53
|
+
module = importlib.util.module_from_spec(spec)
|
|
54
|
+
loader.exec_module(module)
|
|
55
|
+
return module
|
|
56
|
+
raise ImportError("sanitize_output helper not found in " + ", ".join(searched))
|
|
57
|
+
|
|
58
|
+
|
|
42
59
|
_hook_secret_patterns = _load_hook_secret_patterns()
|
|
60
|
+
_sanitize_output = _load_sanitize_output()
|
|
43
61
|
hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
|
|
62
|
+
redact_sensitive_hook_text = _hook_secret_patterns.redact_sensitive_hook_text
|
|
63
|
+
LineSanitizer = _sanitize_output.LineSanitizer
|
|
44
64
|
|
|
45
65
|
DEFAULT_CONTEXT_LINES = 3
|
|
46
66
|
DEFAULT_MAX_CHARS = 16_000
|
|
@@ -391,6 +411,11 @@ def strip_line_for_brace_count(line: str, in_block_comment: bool = False) -> tup
|
|
|
391
411
|
return "".join(output), in_block_comment
|
|
392
412
|
|
|
393
413
|
|
|
414
|
+
def redact_symbol_content(content: str) -> str:
|
|
415
|
+
sanitizer = LineSanitizer(show_paths=True)
|
|
416
|
+
return "".join(sanitizer.sanitize(line)[0] for line in content.splitlines(keepends=True))
|
|
417
|
+
|
|
418
|
+
|
|
394
419
|
def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, show_paths: bool) -> SymbolSlice | None:
|
|
395
420
|
text, scan_truncated = read_text_bounded(path)
|
|
396
421
|
lines = text.splitlines(keepends=True)
|
|
@@ -409,6 +434,8 @@ def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, sho
|
|
|
409
434
|
start_with_context = max(0, start - max(0, context))
|
|
410
435
|
end_with_context = min(len(lines), end + max(0, context))
|
|
411
436
|
content = "".join(lines[start_with_context:end_with_context])
|
|
437
|
+
content = redact_symbol_content(content)
|
|
438
|
+
content = redact_sensitive_hook_text(content, "[REDACTED]")
|
|
412
439
|
capped = False
|
|
413
440
|
if max_chars > 0 and len(content) > max_chars:
|
|
414
441
|
marker = f"\n[context-guard-kit] symbol slice capped: {len(content)} chars total\n"
|