@ictechgy/context-guard 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.ko.md +41 -24
  3. package/README.md +66 -26
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +21 -13
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +99 -18
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -4348
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,725 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Sanitize grep/diff/log output before it enters Claude context.
3
-
4
- The helper can wrap a command while preserving its exit code, or sanitize stdin.
5
- It redacts common credential patterns, anonymizes absolute paths by default, and
6
- keeps only bounded head/anchor/tail context when output is too large.
7
- """
8
- from __future__ import annotations
9
-
10
- import argparse
11
- import collections
12
- import hashlib
13
- import os
14
- from pathlib import PurePosixPath
15
- import queue
16
- import re
17
- import signal
18
- import subprocess
19
- import sys
20
- import threading
21
- import time
22
- from typing import Iterable, Iterator, TextIO
23
-
24
- TERMINAL_CONTROL_RE = re.compile(
25
- r"(?:"
26
- r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|" # OSC title/clipboard controls
27
- r"\x1b[@-_][0-?]*[ -/]*[@-~]|" # CSI and other ESC sequences
28
- r"[\x00-\x08\x0b\x0c\x0d\x0e-\x1f\x7f-\x9f]"
29
- r")"
30
- )
31
- # Match plausible absolute file paths without treating operators (`//`) or
32
- # tiny string literals (`"/"`) as paths. Requiring at least one directory plus
33
- # one leaf keeps the sanitizer from corrupting code while still anonymizing
34
- # common grep/test output like /Users/me/project/app.py:12.
35
- PATH_SEGMENT = r"[A-Za-z0-9._~+\-]+"
36
- ABSOLUTE_PATH_RE = re.compile(
37
- rf"(?P<prefix>^|[\s('\"=])(?P<path>/(?:{PATH_SEGMENT}/)+{PATH_SEGMENT})"
38
- )
39
- WINDOWS_PATH_RE = re.compile(
40
- rf"(?P<prefix>^|[\s('\"=])(?P<path>[A-Za-z]:\\(?:{PATH_SEGMENT}\\)+{PATH_SEGMENT})"
41
- )
42
- PRIVATE_KEY_BEGIN_RE = re.compile(
43
- r"-----BEGIN (?:[A-Z0-9 ]*PRIVATE KEY|OPENSSH PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----"
44
- )
45
- PRIVATE_KEY_END_RE = re.compile(
46
- r"-----END (?:[A-Z0-9 ]*PRIVATE KEY|OPENSSH PRIVATE KEY|PGP PRIVATE KEY BLOCK)-----"
47
- )
48
- AUTH_HEADER_RE = re.compile(
49
- r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
50
- )
51
- SECRET_KEY = (
52
- r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
53
- r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
54
- r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
55
- r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
56
- )
57
- INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
58
- rf"(?i)(?P<lead>^|[\s;{{\[,])"
59
- rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
60
- rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
61
- rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
62
- )
63
- INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
64
- rf"(?i)(?P<lead>^|[\s;{{\[,])"
65
- rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
66
- rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
67
- rf"(?P<value>[^\s,;}}\]]+)"
68
- )
69
- URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
70
- URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
71
- SAFE_UNQUOTED_VALUES = {
72
- "[redacted]",
73
- "false",
74
- "none",
75
- "null",
76
- "os.getenv",
77
- "process.env",
78
- "true",
79
- "undefined",
80
- }
81
- IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
82
- INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
83
- (re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
84
- (re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
85
- (re.compile(rf"(?i)([?&#](?:{SECRET_KEY})=)[^\s&#;]+"), r"\1[REDACTED]"),
86
- (re.compile(r"(?i)(--(?:api[_-]?key|token|secret|password|client[_-]?secret)\s+)\S+"), r"\1[REDACTED]"),
87
- (re.compile(r"(?i)(--(?:api[_-]?key|token|secret|password|client[_-]?secret)=)\S+"), r"\1[REDACTED]"),
88
- (re.compile(r"(?i)((?:-p|-u|--user)\s+)\S+:\S+"), r"\1[REDACTED]"),
89
- (re.compile(r"gh[pousr]_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
90
- (re.compile(r"github_pat_[A-Za-z0-9_]{20,}"), "[REDACTED]"),
91
- (re.compile(r"glpat-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
92
- (re.compile(r"xox[abprs]-[A-Za-z0-9-]{10,}"), "[REDACTED]"),
93
- (re.compile(r"(?:AKIA|ASIA)[0-9A-Z]{16}"), "[REDACTED]"),
94
- (re.compile(r"(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}"), "[REDACTED]"),
95
- (re.compile(r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}"), "[REDACTED]"),
96
- (re.compile(r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}"), "[REDACTED]"),
97
- (re.compile(r"npm_[A-Za-z0-9]{20,}"), "[REDACTED]"),
98
- (re.compile(r"AIza[0-9A-Za-z_\-]{20,}"), "[REDACTED]"),
99
- (re.compile(r"SG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}"), "[REDACTED]"),
100
- (re.compile(r"eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+"), "[REDACTED]"),
101
- (re.compile(r"([a-z][a-z0-9+.-]*://)[^/\s:@]+:[^/\s@]+@", re.IGNORECASE), r"\1[REDACTED]@"),
102
- )
103
- ANCHOR_RE = re.compile(
104
- r"^(?:diff --git |index [0-9a-f]|--- |\+\+\+ |@@ |Binary files |(?:[^:\n]+):\d+(?::\d+)?:)",
105
- re.IGNORECASE,
106
- )
107
- SECRET_WORD_RE = re.compile(r"(?i)\b(api[_-]?key|token|secret|password|private[_-]?key|client[_-]?secret)\b")
108
- MAX_LINES_LIMIT = 5_000
109
- MAX_CHARS_LIMIT = 1_000_000
110
- MAX_LINE_CHARS_LIMIT = 100_000
111
- MAX_SECTION_LINES_LIMIT = 2_000
112
- DEFAULT_TIMEOUT_SECONDS = 600
113
- MAX_TIMEOUT_SECONDS = 86_400
114
- TIMEOUT_EXIT_CODE = 124
115
-
116
-
117
- def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
118
- try:
119
- number = int(value)
120
- except (TypeError, ValueError, OverflowError):
121
- return default
122
- return min(max(number, minimum), maximum)
123
-
124
-
125
- def normalize_budgets(args: argparse.Namespace) -> None:
126
- args.max_lines = bounded_int(args.max_lines, 240, 1, MAX_LINES_LIMIT)
127
- args.max_chars = bounded_int(args.max_chars, 24000, 1, MAX_CHARS_LIMIT)
128
- args.max_line_chars = bounded_int(args.max_line_chars, 3000, 1, MAX_LINE_CHARS_LIMIT)
129
- args.head_lines = bounded_int(args.head_lines, 50, 0, MAX_SECTION_LINES_LIMIT)
130
- args.tail_lines = bounded_int(args.tail_lines, 90, 0, MAX_SECTION_LINES_LIMIT)
131
- args.anchor_lines = bounded_int(args.anchor_lines, 80, 0, MAX_SECTION_LINES_LIMIT)
132
- args.timeout_seconds = bounded_int(
133
- args.timeout_seconds,
134
- DEFAULT_TIMEOUT_SECONDS,
135
- 1,
136
- MAX_TIMEOUT_SECONDS,
137
- )
138
-
139
-
140
- def strip_ansi(text: str) -> str:
141
- return TERMINAL_CONTROL_RE.sub("", text)
142
-
143
-
144
- def stable_hash(value: str, length: int = 12) -> str:
145
- return hashlib.sha256(value.encode("utf-8", errors="replace")).hexdigest()[:length]
146
-
147
-
148
- def anonymize_absolute_paths(text: str) -> str:
149
- def repl(match: re.Match[str]) -> str:
150
- prefix = match.group("prefix")
151
- path = match.group("path")
152
- normalized = path.replace("\\", "/")
153
- name = PurePosixPath(normalized).name or "path"
154
- return f"{prefix}{name}#path:{stable_hash(path)}"
155
-
156
- text = ABSOLUTE_PATH_RE.sub(repl, text)
157
- return WINDOWS_PATH_RE.sub(repl, text)
158
-
159
-
160
- def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
161
- if max_line_chars <= 0 or len(line) <= max_line_chars:
162
- return line, False
163
- newline = "\n" if line.endswith("\n") else ""
164
- body = line[:-1] if newline else line
165
- marker = f"...[line trimmed: {len(body)} chars]"
166
- keep = max(0, max_line_chars - len(marker) - len(newline))
167
- return body[:keep] + marker + newline, True
168
-
169
-
170
- def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
171
- value = match.group("value").strip()
172
- if not value:
173
- return False
174
- if value.lower() in SAFE_UNQUOTED_VALUES:
175
- return False
176
- if IDENTIFIER_CHAIN_RE.match(value):
177
- return False
178
- end = match.end("value")
179
- if end < len(line) and line[end] in "([{":
180
- # Likely a function call or expression (`api_key = os.getenv(...)`);
181
- # preserve it so Claude can still reason about code flow.
182
- return False
183
- if any(ch in value for ch in "()[]{}"):
184
- return False
185
- return True
186
-
187
-
188
- def redact_url_like_secret_params(line: str) -> tuple[str, bool]:
189
- redacted = False
190
-
191
- def url_repl(match: re.Match[str]) -> str:
192
- nonlocal redacted
193
- url, count = URL_SECRET_PARAM_RE.subn(r"\1[REDACTED]", match.group(0))
194
- if count:
195
- redacted = True
196
- return url
197
-
198
- return URL_LIKE_RE.sub(url_repl, line), redacted
199
-
200
-
201
- def redact_secret_assignments(line: str) -> tuple[str, bool]:
202
- line, redacted = redact_url_like_secret_params(line)
203
-
204
- def quoted_repl(match: re.Match[str]) -> str:
205
- nonlocal redacted
206
- redacted = True
207
- return f"{match.group('lead')}{match.group('prefix')}{match.group('quote')}[REDACTED]{match.group('quote')}"
208
-
209
- def unquoted_repl(match: re.Match[str]) -> str:
210
- nonlocal redacted
211
- if not should_redact_unquoted_secret_value(line, match):
212
- return match.group(0)
213
- redacted = True
214
- return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
215
-
216
- line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
217
- line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
218
- return line, redacted
219
-
220
-
221
- MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
222
- rf"(?i)(?:^|[\s;{{\[,])(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
223
- rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<quote>[\"'])"
224
- )
225
-
226
-
227
- def find_unescaped_quote_end(text: str, quote: str, start: int = 0) -> int | None:
228
- """Return the index after the first unescaped quote delimiter, if present."""
229
- escaped = False
230
- for index, char in enumerate(text[start:], start=start):
231
- if escaped:
232
- escaped = False
233
- continue
234
- if char == "\\":
235
- escaped = True
236
- continue
237
- if char == quote:
238
- return index + 1
239
- return None
240
-
241
-
242
- def has_unescaped_quote(text: str, quote: str, start: int = 0) -> bool:
243
- """Return True when text contains an unescaped quote delimiter."""
244
- return find_unescaped_quote_end(text, quote, start) is not None
245
-
246
-
247
- def detect_multiline_secret_assignment(line: str) -> str | None:
248
- """Return the quote delimiter when any secret assignment starts a multiline value."""
249
- for marker in MULTILINE_SECRET_ASSIGNMENT_RE.finditer(line):
250
- quote = marker.group("quote")
251
- if not has_unescaped_quote(line, quote, marker.end("quote")):
252
- return quote
253
- return None
254
-
255
-
256
- def private_key_state_after_line(line: str) -> bool | None:
257
- """Return updated private-key state for a line, or None when no marker appears."""
258
- if PRIVATE_KEY_BEGIN_RE.search(line):
259
- return not bool(PRIVATE_KEY_END_RE.search(line))
260
- if PRIVATE_KEY_END_RE.search(line):
261
- return False
262
- return None
263
-
264
-
265
- def secret_or_private_key_redaction_label(line: str) -> str:
266
- if PRIVATE_KEY_BEGIN_RE.search(line) or PRIVATE_KEY_END_RE.search(line):
267
- return "[REDACTED PRIVATE KEY BLOCK]\n"
268
- return "[REDACTED MULTILINE SECRET]\n"
269
-
270
-
271
- class LineSanitizer:
272
- def __init__(self, *, show_paths: bool = False) -> None:
273
- self.show_paths = show_paths
274
- self.in_private_key_block = False
275
- self.multiline_secret_quote: str | None = None
276
- self.redactions = 0
277
-
278
- def sanitize(self, raw_line: str) -> tuple[str, bool]:
279
- line = strip_ansi(raw_line)
280
- redacted = False
281
- diff_prefix = ""
282
- stripped_for_key = line.lstrip()
283
- if stripped_for_key.startswith(('+', '-')):
284
- diff_prefix = stripped_for_key[0]
285
-
286
- if self.multiline_secret_quote is not None:
287
- redacted = True
288
- label = "[REDACTED PRIVATE KEY BLOCK]\n" if (
289
- self.in_private_key_block or PRIVATE_KEY_BEGIN_RE.search(line) or PRIVATE_KEY_END_RE.search(line)
290
- ) else "[REDACTED MULTILINE SECRET]\n"
291
- key_state = private_key_state_after_line(line)
292
- if key_state is not None:
293
- self.in_private_key_block = key_state
294
- closing_index = find_unescaped_quote_end(line, self.multiline_secret_quote)
295
- if closing_index is not None:
296
- self.multiline_secret_quote = detect_multiline_secret_assignment(line[closing_index:])
297
- return self._finish(diff_prefix + label, redacted)
298
-
299
- if self.in_private_key_block:
300
- redacted = True
301
- multiline_quote = detect_multiline_secret_assignment(line)
302
- if multiline_quote is not None:
303
- self.multiline_secret_quote = multiline_quote
304
- if PRIVATE_KEY_END_RE.search(line):
305
- self.in_private_key_block = False
306
- return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
307
-
308
- multiline_quote = detect_multiline_secret_assignment(line)
309
- if multiline_quote is not None:
310
- self.multiline_secret_quote = multiline_quote
311
- key_state = private_key_state_after_line(line)
312
- if key_state is not None:
313
- self.in_private_key_block = key_state
314
- return self._finish(diff_prefix + secret_or_private_key_redaction_label(line), True)
315
-
316
- if PRIVATE_KEY_BEGIN_RE.search(line):
317
- redacted = True
318
- if not PRIVATE_KEY_END_RE.search(line):
319
- self.in_private_key_block = True
320
- return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
321
-
322
- new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
323
- if count:
324
- redacted = True
325
- line = new_line
326
-
327
- line, assignment_redacted = redact_secret_assignments(line)
328
- if assignment_redacted:
329
- redacted = True
330
-
331
- for pattern, replacement in INLINE_PATTERNS:
332
- line, count = pattern.subn(replacement, line)
333
- if count:
334
- redacted = True
335
-
336
- return self._finish(line, redacted)
337
-
338
- def _finish(self, line: str, redacted: bool) -> tuple[str, bool]:
339
- if redacted:
340
- self.redactions += 1
341
- if not self.show_paths:
342
- line = anonymize_absolute_paths(line)
343
- return line, redacted
344
-
345
-
346
- class BoundedOutput:
347
- def __init__(
348
- self,
349
- *,
350
- max_lines: int,
351
- max_chars: int,
352
- max_line_chars: int,
353
- head_lines: int,
354
- tail_lines: int,
355
- anchor_lines: int,
356
- ) -> None:
357
- self.max_lines = max_lines
358
- self.max_chars = max_chars
359
- self.max_line_chars = max_line_chars
360
- self.head_limit = max(0, head_lines)
361
- self.tail = collections.deque(maxlen=max(0, tail_lines))
362
- self.anchor_limit = max(0, anchor_lines)
363
- self.head: list[str] = []
364
- self.anchors: list[str] = []
365
- self.anchor_seen: set[str] = set()
366
- self.full: list[str] = []
367
- self.line_count = 0
368
- self.raw_chars = 0
369
- self.visible_chars = 0
370
- self.line_caps = 0
371
- self.trimmed = False
372
-
373
- def add(self, raw_line: str, sanitized_line: str, *, redacted: bool) -> None:
374
- self.line_count += 1
375
- self.raw_chars += len(raw_line)
376
- capped, was_capped = cap_line(sanitized_line, self.max_line_chars)
377
- if was_capped:
378
- self.line_caps += 1
379
- self.visible_chars += len(capped)
380
-
381
- if len(self.head) < self.head_limit:
382
- self.head.append(capped)
383
- self.tail.append(capped)
384
- if self._is_anchor(capped, redacted):
385
- key = capped.rstrip("\n")
386
- if key not in self.anchor_seen and len(self.anchors) < self.anchor_limit:
387
- self.anchor_seen.add(key)
388
- self.anchors.append(capped)
389
-
390
- if not self.trimmed:
391
- self.full.append(capped)
392
- if (self.max_lines > 0 and self.line_count > self.max_lines) or (
393
- self.max_chars > 0 and self.visible_chars > self.max_chars
394
- ):
395
- self.trimmed = True
396
-
397
- def _is_anchor(self, line: str, redacted: bool) -> bool:
398
- return redacted or bool(ANCHOR_RE.search(line)) or bool(SECRET_WORD_RE.search(line))
399
-
400
- def render(self, redactions: int) -> str:
401
- if not self.trimmed:
402
- return "".join(self.full)
403
-
404
- lines_budget = self.max_lines if self.max_lines > 0 else 240
405
- remaining = max(0, lines_budget - 8)
406
- head_n = min(len(self.head), max(1, remaining // 3) if remaining else 0)
407
- anchor_n = min(len(self.anchors), max(0, remaining // 3))
408
- tail_n = min(len(self.tail), max(0, remaining - head_n - anchor_n))
409
-
410
- rendered: list[str] = [
411
- (
412
- "[context-guard-kit] sanitized output trimmed: "
413
- f"lines={self.line_count} raw_chars={self.raw_chars} "
414
- f"sanitized_chars={self.visible_chars} redacted_lines={redactions} "
415
- f"line_caps={self.line_caps}\n"
416
- )
417
- ]
418
- if head_n:
419
- rendered.append(f"--- head ({head_n} lines) ---\n")
420
- rendered.extend(self.head[:head_n])
421
- if anchor_n:
422
- rendered.append(f"--- grep/diff/security anchors ({anchor_n} lines) ---\n")
423
- rendered.extend(self.anchors[:anchor_n])
424
- if tail_n:
425
- rendered.append(f"--- tail ({tail_n} lines) ---\n")
426
- rendered.extend(list(self.tail)[-tail_n:])
427
- text = "".join(rendered)
428
- if self.max_chars > 0 and len(text) > self.max_chars:
429
- marker = f"\n[context-guard-kit] rendered sanitized summary capped: {len(text)} chars\n"
430
- keep = max(0, self.max_chars - len(marker))
431
- text = text[:keep].rstrip() + marker
432
- return text
433
-
434
-
435
- def sanitize_stream(stream: Iterable[str], args: argparse.Namespace) -> tuple[str, int, int]:
436
- sanitizer = LineSanitizer(show_paths=args.show_paths)
437
- bounded = BoundedOutput(
438
- max_lines=args.max_lines,
439
- max_chars=args.max_chars,
440
- max_line_chars=args.max_line_chars,
441
- head_lines=args.head_lines,
442
- tail_lines=args.tail_lines,
443
- anchor_lines=args.anchor_lines,
444
- )
445
- for raw_line in stream:
446
- sanitized, redacted = sanitizer.sanitize(raw_line)
447
- bounded.add(raw_line, sanitized, redacted=redacted)
448
- return bounded.render(sanitizer.redactions), sanitizer.redactions, bounded.line_count
449
-
450
-
451
- _STREAM_END = object()
452
-
453
-
454
- def process_group_exists(pgid: int) -> bool:
455
- try:
456
- os.killpg(pgid, 0)
457
- except ProcessLookupError:
458
- return False
459
- except PermissionError:
460
- return True
461
- except OSError:
462
- return False
463
- return True
464
-
465
-
466
- def terminate_process_tree(
467
- proc: subprocess.Popen[str],
468
- *,
469
- process_group_id: int | None = None,
470
- include_exited_group: bool = False,
471
- ) -> None:
472
- if os.name != "nt":
473
- pgid = process_group_id if process_group_id is not None else proc.pid
474
- if proc.poll() is not None and not include_exited_group:
475
- return
476
- try:
477
- os.killpg(pgid, signal.SIGTERM)
478
- except ProcessLookupError:
479
- return
480
- deadline = time.monotonic() + 2
481
- while time.monotonic() < deadline:
482
- if proc.poll() is None:
483
- try:
484
- proc.wait(timeout=0.05)
485
- except subprocess.TimeoutExpired:
486
- pass
487
- if not process_group_exists(pgid):
488
- return
489
- time.sleep(0.05)
490
- try:
491
- os.killpg(pgid, signal.SIGKILL)
492
- except ProcessLookupError:
493
- return
494
- return
495
-
496
- if proc.poll() is not None:
497
- return
498
- try:
499
- proc.terminate()
500
- except ProcessLookupError:
501
- return
502
- except OSError:
503
- try:
504
- proc.kill()
505
- except OSError:
506
- return
507
- try:
508
- proc.wait(timeout=2)
509
- return
510
- except subprocess.TimeoutExpired:
511
- pass
512
- try:
513
- proc.kill()
514
- except ProcessLookupError:
515
- return
516
- except OSError:
517
- return
518
-
519
-
520
- class TimedCommandStream:
521
- def __init__(
522
- self,
523
- proc: subprocess.Popen[str],
524
- stdout: TextIO,
525
- *,
526
- timeout_seconds: int,
527
- process_group_id: int | None = None,
528
- ) -> None:
529
- self.proc = proc
530
- self.timeout_seconds = timeout_seconds
531
- self.process_group_id = process_group_id
532
- self.deadline = time.monotonic() + timeout_seconds
533
- self.timed_out = False
534
- self.timeout_reported = False
535
- self._stream_closed = False
536
- self._queue: queue.Queue[str | object] = queue.Queue(maxsize=1024)
537
- self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
538
- self._thread.start()
539
-
540
- def _read_stdout(self, stdout: TextIO) -> None:
541
- try:
542
- for line in stdout:
543
- self._queue.put(line)
544
- finally:
545
- self._stream_closed = True
546
- self._queue.put(_STREAM_END)
547
-
548
- def timeout_message(self) -> str:
549
- return (
550
- f"[context-guard-kit] command timed out after {self.timeout_seconds}s; "
551
- "terminated wrapped process\n"
552
- )
553
-
554
- def _mark_timed_out(self) -> None:
555
- if not self.timed_out:
556
- self.timed_out = True
557
- terminate_process_tree(
558
- self.proc,
559
- process_group_id=self.process_group_id,
560
- include_exited_group=True,
561
- )
562
-
563
- def _timeout_line(self) -> str:
564
- self._mark_timed_out()
565
- self.timeout_reported = True
566
- return self.timeout_message()
567
-
568
- def __iter__(self) -> Iterator[str]:
569
- while True:
570
- remaining = self.deadline - time.monotonic()
571
- wait_time = 0.05 if self.proc.poll() is not None or self.timed_out else min(0.05, max(0.0, remaining))
572
- try:
573
- item = self._queue.get(timeout=wait_time)
574
- except queue.Empty:
575
- if remaining <= 0 and not self._stream_closed:
576
- if not self.timeout_reported:
577
- yield self._timeout_line()
578
- break
579
- continue
580
- if item is _STREAM_END:
581
- break
582
- if not isinstance(item, str):
583
- continue
584
- yield item
585
- if not self._stream_closed and time.monotonic() >= self.deadline:
586
- if not self.timeout_reported:
587
- yield self._timeout_line()
588
- break
589
-
590
- def returncode(self) -> int:
591
- if self.timed_out:
592
- return TIMEOUT_EXIT_CODE
593
- remaining = self.deadline - time.monotonic()
594
- try:
595
- return self.proc.wait(timeout=max(0.0, remaining))
596
- except subprocess.TimeoutExpired:
597
- self._mark_timed_out()
598
- return TIMEOUT_EXIT_CODE
599
-
600
-
601
- def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
602
- if os.name == "nt":
603
- return None
604
- try:
605
- return os.getpgid(proc.pid)
606
- except ProcessLookupError:
607
- # start_new_session=True makes the child the group leader; if it exits
608
- # before getpgid(), the group id is still the leader pid while inherited
609
- # stdout descendants remain alive.
610
- return proc.pid
611
-
612
-
613
- def run_command(
614
- command: list[str],
615
- timeout_seconds: int,
616
- ) -> tuple[Iterable[str], subprocess.Popen[str] | None, int | None]:
617
- popen_kwargs: dict[str, object] = {}
618
- if os.name != "nt":
619
- popen_kwargs["start_new_session"] = True
620
- try:
621
- proc = subprocess.Popen(
622
- command,
623
- stdout=subprocess.PIPE,
624
- stderr=subprocess.STDOUT,
625
- text=True,
626
- bufsize=1,
627
- errors="replace",
628
- **popen_kwargs,
629
- )
630
- except OSError as exc:
631
- print(f"context-guard-sanitize-output: command failed to start: {exc}", file=sys.stderr)
632
- return [], None, 127
633
- if proc.stdout is None:
634
- print("context-guard-sanitize-output: subprocess produced no stdout pipe", file=sys.stderr)
635
- return [], proc, 1
636
- return (
637
- TimedCommandStream(
638
- proc,
639
- proc.stdout,
640
- timeout_seconds=timeout_seconds,
641
- process_group_id=process_group_id_for(proc),
642
- ),
643
- proc,
644
- None,
645
- )
646
-
647
-
648
- def stdin_has_data(stdin: TextIO) -> bool:
649
- return not stdin.isatty()
650
-
651
-
652
- def build_parser() -> argparse.ArgumentParser:
653
- parser = argparse.ArgumentParser(
654
- description="Redact secrets and budget grep/diff/log output before sending it to Claude."
655
- )
656
- parser.add_argument("--max-lines", type=int, default=240)
657
- parser.add_argument("--max-chars", type=int, default=24000)
658
- parser.add_argument("--max-line-chars", type=int, default=3000)
659
- parser.add_argument("--head-lines", type=int, default=50)
660
- parser.add_argument("--tail-lines", type=int, default=90)
661
- parser.add_argument("--anchor-lines", type=int, default=80)
662
- parser.add_argument(
663
- "--timeout-seconds",
664
- type=int,
665
- default=DEFAULT_TIMEOUT_SECONDS,
666
- help=(
667
- "maximum runtime for wrapped commands before terminating the process group "
668
- f"(default: {DEFAULT_TIMEOUT_SECONDS}, max: {MAX_TIMEOUT_SECONDS})"
669
- ),
670
- )
671
- parser.add_argument(
672
- "--show-paths",
673
- action="store_true",
674
- help="show raw absolute paths instead of basename#path:<hash>; local debugging only because private paths may be exposed",
675
- )
676
- parser.add_argument("command", nargs=argparse.REMAINDER)
677
- return parser
678
-
679
-
680
- def main() -> int:
681
- parser = build_parser()
682
- args = parser.parse_args()
683
- normalize_budgets(args)
684
- command = args.command
685
- if command and command[0] == "--":
686
- command = command[1:]
687
-
688
- proc: subprocess.Popen[str] | None = None
689
- command_stream: TimedCommandStream | None = None
690
- early_rc: int | None = None
691
- if command:
692
- stream, proc, early_rc = run_command(command, args.timeout_seconds)
693
- if isinstance(stream, TimedCommandStream):
694
- command_stream = stream
695
- if early_rc is not None and proc is None:
696
- return early_rc
697
- elif stdin_has_data(sys.stdin):
698
- stream = sys.stdin
699
- else:
700
- print("context-guard-sanitize-output: missing command or stdin", file=sys.stderr)
701
- return 2
702
-
703
- output, _redactions, _line_count = sanitize_stream(stream, args)
704
- rc: int | None = None
705
- if proc is not None:
706
- rc = command_stream.returncode() if command_stream is not None else proc.wait()
707
- if command_stream is not None and command_stream.timed_out and not command_stream.timeout_reported:
708
- timeout_line, _redacted = LineSanitizer(show_paths=args.show_paths).sanitize(
709
- command_stream.timeout_message()
710
- )
711
- command_stream.timeout_reported = True
712
- output = output + timeout_line
713
-
714
- if output:
715
- sys.stdout.write(output)
716
- if not output.endswith("\n"):
717
- sys.stdout.write("\n")
718
-
719
- if proc is not None:
720
- return early_rc if early_rc is not None else rc
721
- return 0
722
-
723
-
724
- if __name__ == "__main__":
725
- raise SystemExit(main())