@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,1449 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Run a command, preserve exit code, and print a token-budgeted output summary.
3
-
4
- Designed for Claude Code Bash tool output. It avoids dumping thousands of log
5
- lines into the conversation while preserving the lines most likely to be useful.
6
- """
7
- from __future__ import annotations
8
-
9
- import argparse
10
- import collections
11
- import hashlib
12
- import importlib.machinery
13
- import importlib.util
14
- import json
15
- import os
16
- from pathlib import Path, PurePosixPath
17
- import queue
18
- import re
19
- import shlex
20
- import signal
21
- import subprocess
22
- import sys
23
- import threading
24
- import time
25
- from typing import Iterable, Iterator
26
-
27
- MAX_SUMMARY_ITEM_CHARS = 500
28
- MAX_LINES_LIMIT = 5_000
29
- MAX_CHARS_LIMIT = 1_000_000
30
- MAX_LINE_CHARS_LIMIT = 100_000
31
- MAX_SECTION_LINES_LIMIT = 2_000
32
- MAX_RUNNER_SUMMARY_ITEMS_LIMIT = 100
33
- DEFAULT_TIMEOUT_SECONDS = 600
34
- MAX_TIMEOUT_SECONDS = 86_400
35
- TIMEOUT_EXIT_CODE = 124
36
- DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES = 10_000_000
37
- MAX_ARTIFACT_RECEIPT_MAX_BYTES = 100_000_000
38
-
39
-
40
- def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
41
- try:
42
- number = int(value)
43
- except (TypeError, ValueError, OverflowError):
44
- return default
45
- return min(max(number, minimum), maximum)
46
-
47
-
48
- def normalize_budgets(args: argparse.Namespace) -> None:
49
- args.max_lines = bounded_int(args.max_lines, 220, 1, MAX_LINES_LIMIT)
50
- args.max_chars = bounded_int(args.max_chars, 20000, 1, MAX_CHARS_LIMIT)
51
- args.max_line_chars = bounded_int(args.max_line_chars, 4000, 1, MAX_LINE_CHARS_LIMIT)
52
- args.head_lines = bounded_int(args.head_lines, 40, 0, MAX_SECTION_LINES_LIMIT)
53
- args.tail_lines = bounded_int(args.tail_lines, 80, 0, MAX_SECTION_LINES_LIMIT)
54
- args.error_lines = bounded_int(args.error_lines, 120, 0, MAX_SECTION_LINES_LIMIT)
55
- args.runner_summary_items = bounded_int(args.runner_summary_items, 12, 0, MAX_RUNNER_SUMMARY_ITEMS_LIMIT)
56
- args.timeout_seconds = bounded_int(
57
- args.timeout_seconds,
58
- DEFAULT_TIMEOUT_SECONDS,
59
- 1,
60
- MAX_TIMEOUT_SECONDS,
61
- )
62
-
63
- TERMINAL_CONTROL_RE = re.compile(
64
- r"(?:"
65
- r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|" # OSC title/clipboard controls
66
- r"\x1b[@-_][0-?]*[ -/]*[@-~]|" # CSI and other ESC sequences
67
- r"[\x00-\x08\x0b\x0c\x0d\x0e-\x1f\x7f-\x9f]"
68
- r")"
69
- )
70
- ABSOLUTE_PATH_RE = re.compile(r"(?P<prefix>^|[\s('\"=])(?P<path>/(?:[^\s:(),]+/)*[^\s:(),]+)")
71
- SECRET_KEY = (
72
- r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
73
- r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
74
- )
75
- FALLBACK_INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
76
- (re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
77
- (re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
78
- (re.compile(r"(?i)\bgh[pousr]_[A-Za-z0-9_]{20,}\b"), "[REDACTED]"),
79
- (re.compile(r"(?i)\bgithub_pat_[A-Za-z0-9_]{20,}\b"), "[REDACTED]"),
80
- (re.compile(r"(?i)\bglpat-[A-Za-z0-9_-]{12,}\b"), "[REDACTED]"),
81
- (re.compile(r"(?i)\bxox[abprs]-[A-Za-z0-9-]{10,}\b"), "[REDACTED]"),
82
- (re.compile(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"), "[REDACTED]"),
83
- (re.compile(r"\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b"), "[REDACTED]"),
84
- (re.compile(r"\bsk-(?:ant|proj)-[A-Za-z0-9_-]{12,}\b"), "[REDACTED]"),
85
- (re.compile(r"\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b"), "[REDACTED]"),
86
- (re.compile(r"\bnpm_[A-Za-z0-9]{20,}\b"), "[REDACTED]"),
87
- (re.compile(r"(?i)\bAIza[0-9A-Za-z_\-]{20,}\b"), "[REDACTED]"),
88
- (re.compile(r"\bSG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b"), "[REDACTED]"),
89
- (re.compile(r"\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b"), "[REDACTED]"),
90
- (re.compile(r"([a-z][a-z0-9+.-]*://)[^/\s:@]+:[^/\s@]+@", re.IGNORECASE), r"\1[REDACTED]@"),
91
- (re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s&#;]+"), r"\1[REDACTED]"),
92
- (re.compile(rf"(?i)(\b(?:{SECRET_KEY})\s*[:=]\s*)[^\s]+"), r"\1[REDACTED]"),
93
- )
94
- FALLBACK_AUTH_HEADER_RE = re.compile(
95
- r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
96
- )
97
- ERROR_RE = re.compile(
98
- r"(FAIL|FAILED|ERROR|Error:|Exception|Traceback|AssertionError|panic:|fatal:|"
99
- r"segmentation fault|not ok|\bE\s+assert|\[ERROR\]|✗|✖)",
100
- re.IGNORECASE,
101
- )
102
- PYTEST_RESULT_RE = re.compile(r"^(?P<kind>FAILED|ERROR)\s+(?P<node>\S+)(?:\s+-\s+(?P<reason>.*))?$")
103
- PYTEST_LOCATION_RE = re.compile(r"^(?P<file>[^:\s][^:\n]*\.py):(?P<line>\d+):(?P<message>.*)$")
104
- JEST_FILE_RE = re.compile(
105
- r"^\s*FAIL\s+(?P<file>\S+(?:\.(?:test|spec)\.[cm]?[jt]sx?|__tests__/\S+\.[cm]?[jt]sx?))"
106
- r"(?:\s+>\s+(?P<name>.+))?\s*$"
107
- )
108
- JEST_TEST_RE = re.compile(r"^\s*[●✕×]\s+(?P<name>.+?)\s*$")
109
- JEST_AT_RE = re.compile(
110
- r"^\s*at\s+(?:.+?\s+\()?(?P<file>[^()\s]+?\.[cm]?[jt]sx?):(?P<line>\d+):(?P<col>\d+)\)?\s*$"
111
- )
112
- VITEST_LOCATION_RE = re.compile(r"^\s*❯\s+(?P<file>[^()\s]+?\.[cm]?[jt]sx?):(?P<line>\d+):(?P<col>\d+)\s*$")
113
- GO_FAIL_RE = re.compile(r"^--- FAIL: (?P<name>\S+)(?:\s+\([^)]+\))?")
114
- GO_LOCATION_RE = re.compile(r"^\s*(?P<file>[^:\s]+_test\.go):(?P<line>\d+):\s*(?P<message>.*)$")
115
- RUST_THREAD_RE = re.compile(
116
- r"^thread '(?P<name>[^']+)' panicked at (?:.*,\s+)?(?P<file>[^,\n]+?\.rs):(?P<line>\d+):(?P<col>\d+):?"
117
- )
118
-
119
-
120
- def strip_ansi(text: str) -> str:
121
- return TERMINAL_CONTROL_RE.sub("", text)
122
-
123
-
124
- def anonymize_absolute_paths(text: str) -> str:
125
- def repl(match: re.Match[str]) -> str:
126
- prefix = match.group("prefix")
127
- path = match.group("path")
128
- name = PurePosixPath(path).name or "path"
129
- digest = hashlib.sha256(path.encode("utf-8", "replace")).hexdigest()[:12]
130
- return f"{prefix}{name}#path:{digest}"
131
-
132
- return ABSOLUTE_PATH_RE.sub(repl, text)
133
-
134
-
135
- class FallbackLineSanitizer:
136
- def __init__(self, *, show_paths: bool = False, diagnostic: str | None = None) -> None:
137
- self.show_paths = show_paths
138
- self.diagnostic = diagnostic
139
- self.diagnostic_emitted = False
140
- self.redactions = 0
141
-
142
- def sanitize(self, raw_line: str) -> tuple[str, bool]:
143
- if self.diagnostic and not self.diagnostic_emitted:
144
- print(f"context-guard-kit: sanitizer fallback active: {self.diagnostic}", file=sys.stderr)
145
- self.diagnostic_emitted = True
146
- line = strip_ansi(raw_line)
147
- if not self.show_paths:
148
- line = anonymize_absolute_paths(line)
149
- original = line
150
- auth_match = FALLBACK_AUTH_HEADER_RE.match(line)
151
- if auth_match:
152
- line = auth_match.group("prefix") + "[REDACTED]\n"
153
- else:
154
- for pattern, repl in FALLBACK_INLINE_PATTERNS:
155
- line = pattern.sub(repl, line)
156
- redacted = line != original
157
- if redacted:
158
- self.redactions += 1
159
- return line, redacted
160
-
161
-
162
- def load_line_sanitizer(show_paths: bool) -> object:
163
- """Reuse the stronger sanitizer when it is shipped next to this wrapper."""
164
- script_dir = os.path.dirname(os.path.abspath(__file__))
165
- load_errors: list[str] = []
166
- for name in ("sanitize_output.py", "context-guard-sanitize-output"):
167
- candidate = os.path.join(script_dir, name)
168
- if not os.path.exists(candidate):
169
- continue
170
- try:
171
- loader = importlib.machinery.SourceFileLoader(f"_claude_token_sanitize_{os.getpid()}", candidate)
172
- spec = importlib.util.spec_from_loader(loader.name, loader)
173
- if spec is None:
174
- continue
175
- module = importlib.util.module_from_spec(spec)
176
- loader.exec_module(module)
177
- return module.LineSanitizer(show_paths=show_paths)
178
- except Exception as exc:
179
- load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
180
- continue
181
- diagnostic = "; ".join(load_errors) if load_errors else "strong sanitizer not found next to trim wrapper"
182
- return FallbackLineSanitizer(show_paths=show_paths, diagnostic=diagnostic)
183
-
184
-
185
- def load_artifact_store_module() -> object:
186
- """Load the adjacent artifact store without importing by package name.
187
-
188
- The plugin ships helper scripts as sibling executable files, so the trim
189
- wrapper must resolve both source-tree (`context_escrow.py`) and packaged
190
- (`context-guard-artifact`) names.
191
- """
192
- script_dir = os.path.dirname(os.path.abspath(__file__))
193
- load_errors: list[str] = []
194
- for name in ("context_escrow.py", "context-guard-artifact", "claude-token-artifact"):
195
- candidate = os.path.join(script_dir, name)
196
- if not os.path.exists(candidate):
197
- continue
198
- try:
199
- loader = importlib.machinery.SourceFileLoader(f"_context_guard_artifact_{os.getpid()}", candidate)
200
- spec = importlib.util.spec_from_loader(loader.name, loader)
201
- if spec is None:
202
- continue
203
- module = importlib.util.module_from_spec(spec)
204
- loader.exec_module(module)
205
- return module
206
- except Exception as exc:
207
- load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
208
- continue
209
- diagnostic = "; ".join(load_errors) if load_errors else "artifact store not found next to trim wrapper"
210
- raise RuntimeError(diagnostic)
211
-
212
-
213
- def store_sanitized_artifact_receipt(
214
- *,
215
- sanitized_text: str,
216
- command: list[str],
217
- args: argparse.Namespace,
218
- line_sanitizer: object,
219
- redacted_lines: int,
220
- ) -> dict[str, object]:
221
- """Store exact sanitized output using the existing artifact receipt format."""
222
- artifact = load_artifact_store_module()
223
- max_bytes = bounded_int(
224
- getattr(args, "artifact_max_bytes", DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES),
225
- DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
226
- 1,
227
- MAX_ARTIFACT_RECEIPT_MAX_BYTES,
228
- )
229
- content_bytes = len(sanitized_text.encode("utf-8", errors="replace"))
230
- if content_bytes > max_bytes:
231
- return {
232
- "stored": False,
233
- "error": "sanitized_output_exceeds_artifact_max_bytes",
234
- "bytes": content_bytes,
235
- "max_bytes": max_bytes,
236
- "exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
237
- }
238
-
239
- directory = artifact.normalize_allowed_first_absolute_symlink(Path(args.artifact_dir).expanduser())
240
- content_sha = hashlib.sha256(sanitized_text.encode("utf-8", errors="replace")).hexdigest()
241
- preview = command_preview(command, line_sanitizer, args.max_line_chars)
242
- id_basis = json.dumps(
243
- {
244
- "content_sha256": content_sha,
245
- "command_preview": preview,
246
- "input_truncated": False,
247
- "producer": "context-guard-trim-output",
248
- },
249
- sort_keys=True,
250
- )
251
- artifact_id = hashlib.sha256(id_basis.encode("utf-8")).hexdigest()[:20]
252
- content_path, meta_path = artifact.artifact_paths(directory, artifact_id)
253
- total_lines = sanitized_text.count("\n") + (1 if sanitized_text and not sanitized_text.endswith("\n") else 0)
254
- content_type = artifact.classify_content_type(sanitized_text)
255
- strategy = artifact.recommended_strategy(content_type)
256
- metadata: dict[str, object] = {
257
- "artifact_id": artifact_id,
258
- "created_at": int(time.time()),
259
- "command_preview": preview,
260
- "content_type": content_type,
261
- "input": {
262
- "bytes_read": content_bytes,
263
- "truncated": False,
264
- "max_bytes": max_bytes,
265
- "source": "context-guard-trim-output:sanitized-output",
266
- },
267
- "stored_output": {
268
- "bytes": content_bytes,
269
- "lines": total_lines,
270
- "sha256": content_sha,
271
- "content_file": content_path.name,
272
- "metadata_file": meta_path.name,
273
- "scope": "sanitized_full_output",
274
- },
275
- "digest": artifact.build_digest(sanitized_text, artifact_id=artifact_id, redacted_lines=redacted_lines),
276
- "retrieval": {
277
- "strategy": strategy,
278
- "deterministic": True,
279
- "hints": artifact.build_retrieval_hints(
280
- artifact_id,
281
- sanitized_text,
282
- content_type=content_type,
283
- strategy=strategy,
284
- total_lines=total_lines,
285
- ),
286
- },
287
- }
288
- artifact.shrink_digest_for_metadata_cap(metadata)
289
- artifact.write_private_text(content_path, sanitized_text)
290
- artifact.write_private_text(meta_path, artifact.metadata_json_text(metadata))
291
- receipt = artifact.receipt_for(metadata)
292
- query_line_cap = int(getattr(artifact, "MAX_QUERY_LINES", 5_000))
293
- query_char_cap = 1_000_000
294
- content_chars = len(sanitized_text)
295
- exact_reexpand: dict[str, object] = {
296
- "available": False,
297
- "scope": "sanitized_full_output",
298
- "sha256": content_sha,
299
- "bytes": content_bytes,
300
- "lines": total_lines,
301
- "reason": "artifact query cap exceeded; use retrieval hints for exact slices",
302
- }
303
- if total_lines <= query_line_cap and content_chars <= query_char_cap:
304
- raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
305
- dir_flags = ""
306
- if raw_artifact_dir != ".context-guard/artifacts":
307
- dir_flags = f" --dir {shlex.quote(raw_artifact_dir)}"
308
- line_flags = ""
309
- if total_lines > 0:
310
- line_flags = f" --lines 1:{total_lines} --max-lines {max(1, total_lines)}"
311
- exact_reexpand = {
312
- "available": True,
313
- "scope": "sanitized_full_output",
314
- "sha256": content_sha,
315
- "bytes": content_bytes,
316
- "lines": total_lines,
317
- "cli": (
318
- f"context-guard-artifact{dir_flags} get {artifact_id}{line_flags} "
319
- f"--max-chars {max(1, content_chars)}"
320
- ),
321
- }
322
- receipt["exact_reexpand"] = exact_reexpand
323
- return receipt
324
-
325
-
326
- def capture_sanitized_artifact_line(
327
- *,
328
- capture_enabled: bool,
329
- sanitized_line: str,
330
- artifact_lines: list[str],
331
- capture_bytes: int,
332
- capture_overflow: bool,
333
- max_bytes: int,
334
- ) -> tuple[int, bool]:
335
- if not capture_enabled or capture_overflow:
336
- return capture_bytes, capture_overflow
337
- source_bytes = len(sanitized_line.encode("utf-8", errors="replace"))
338
- if capture_bytes + source_bytes <= max_bytes:
339
- artifact_lines.append(sanitized_line)
340
- return capture_bytes + source_bytes, False
341
- artifact_lines.clear()
342
- return capture_bytes, True
343
-
344
-
345
- def unique_keep_order(lines: Iterable[str]) -> list[str]:
346
- seen: set[str] = set()
347
- out: list[str] = []
348
- for line in lines:
349
- key = line.rstrip()
350
- if key not in seen:
351
- out.append(line)
352
- seen.add(key)
353
- return out
354
-
355
-
356
- def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
357
- if max_line_chars <= 0 or len(line) <= max_line_chars:
358
- return line, False
359
- newline = "\n" if line.endswith("\n") else ""
360
- body = line[:-1] if newline else line
361
- marker = f"...[line trimmed: {len(body)} chars]"
362
- keep = max(0, max_line_chars - len(marker) - len(newline))
363
- return body[:keep] + marker + newline, True
364
-
365
-
366
- def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
367
- if max_chars <= 0 or len(text) <= max_chars:
368
- return text, False
369
- marker = f"\n[context-guard-kit] text capped: {len(text)} chars total\n"
370
- keep = max(0, max_chars - len(marker))
371
- return text[:keep].rstrip() + marker, True
372
-
373
-
374
- def compact_item(
375
- text: str,
376
- limit: int = MAX_SUMMARY_ITEM_CHARS,
377
- *,
378
- show_paths: bool = False,
379
- sanitizer: object | None = None,
380
- ) -> str:
381
- """Normalize a failure-summary item without letting one log line dominate memory/output."""
382
- if sanitizer is None:
383
- sanitizer = load_line_sanitizer(show_paths)
384
- sanitized, _ = sanitizer.sanitize(text) # type: ignore[attr-defined]
385
- item = re.sub(r"\s+", " ", strip_ansi(sanitized).strip())
386
- if len(item) <= limit:
387
- return item
388
- marker = f"...[item trimmed: {len(item)} chars]"
389
- keep = max(0, limit - len(marker))
390
- return item[:keep] + marker
391
-
392
-
393
- class RunnerFailureSummary:
394
- """Bounded, runner-aware extraction of the most actionable failure lines.
395
-
396
- The extractor is intentionally online and stores only a small de-duplicated
397
- set of findings. That keeps the wrapper useful for huge logs without
398
- retaining the whole command output in memory.
399
- """
400
-
401
- def __init__(self, max_items_per_runner: int, *, show_paths: bool = False) -> None:
402
- self.max_items_per_runner = max(0, max_items_per_runner)
403
- self.show_paths = show_paths
404
- self.sanitizer = load_line_sanitizer(show_paths)
405
- self.items: dict[str, list[str]] = collections.defaultdict(list)
406
- self.seen: dict[str, set[str]] = collections.defaultdict(set)
407
- self.jest_active = False
408
- self.go_failed_seen = False
409
-
410
- def add(self, runner: str, item: str) -> None:
411
- if self.max_items_per_runner <= 0:
412
- return
413
- compact = compact_item(item, show_paths=self.show_paths, sanitizer=self.sanitizer)
414
- if not compact or compact in self.seen[runner]:
415
- return
416
- if len(self.items[runner]) >= self.max_items_per_runner:
417
- return
418
- self.items[runner].append(compact)
419
- self.seen[runner].add(compact)
420
-
421
- def feed(self, line: str) -> None:
422
- if self.max_items_per_runner <= 0:
423
- return
424
-
425
- stripped = strip_ansi(line.rstrip("\n"))
426
-
427
- match = PYTEST_RESULT_RE.match(stripped)
428
- if match and (".py" in match.group("node") or "::" in match.group("node")):
429
- reason = compact_item(match.group("reason") or "", show_paths=self.show_paths, sanitizer=self.sanitizer)
430
- if reason:
431
- self.add("pytest", f"{match.group('kind')} {match.group('node')} - {reason}")
432
- else:
433
- self.add("pytest", f"{match.group('kind')} {match.group('node')}")
434
-
435
- match = PYTEST_LOCATION_RE.match(stripped)
436
- if match and ERROR_RE.search(stripped):
437
- self.add("pytest", f"{match.group('file')}:{match.group('line')}: {match.group('message').strip()}")
438
-
439
- match = JEST_FILE_RE.match(stripped)
440
- if match:
441
- self.jest_active = True
442
- self.add("jest/vitest", f"FAIL {match.group('file')}")
443
- if match.group("name"):
444
- self.add("jest/vitest", f"test {match.group('name')}")
445
-
446
- if self.jest_active:
447
- match = JEST_TEST_RE.match(stripped)
448
- if match:
449
- self.add("jest/vitest", f"test {match.group('name')}")
450
-
451
- match = JEST_AT_RE.match(stripped)
452
- if match:
453
- self.add("jest/vitest", f"{match.group('file')}:{match.group('line')}:{match.group('col')}")
454
-
455
- match = VITEST_LOCATION_RE.match(stripped)
456
- if match:
457
- self.add("jest/vitest", f"{match.group('file')}:{match.group('line')}:{match.group('col')}")
458
-
459
- match = GO_FAIL_RE.match(stripped)
460
- if match:
461
- self.go_failed_seen = True
462
- self.add("go test", f"FAIL {match.group('name')}")
463
-
464
- match = GO_LOCATION_RE.match(stripped)
465
- if self.go_failed_seen and match:
466
- message = match.group("message").strip()
467
- suffix = f": {message}" if message else ""
468
- self.add("go test", f"{match.group('file')}:{match.group('line')}{suffix}")
469
-
470
- match = RUST_THREAD_RE.match(stripped)
471
- if match:
472
- self.add(
473
- "cargo test",
474
- f"{match.group('name')} at {match.group('file')}:{match.group('line')}:{match.group('col')}",
475
- )
476
-
477
- def as_lines(self, max_line_chars: int, max_lines: int) -> list[str]:
478
- if not self.items:
479
- return []
480
- if max_lines <= 0:
481
- return []
482
- out = ["\n--- runner failure summary ---\n"]
483
- used_lines = len(out[0].splitlines())
484
- for runner in sorted(self.items):
485
- runner_line = f"runner={runner}\n"
486
- if used_lines + 1 > max_lines:
487
- break
488
- out.append(runner_line)
489
- used_lines += 1
490
- for item in self.items[runner]:
491
- if used_lines + 1 > max_lines:
492
- break
493
- line, _ = cap_line(f"- {item}\n", max_line_chars)
494
- out.append(line)
495
- used_lines += 1
496
- return out
497
-
498
- def as_dict(self) -> dict[str, list[str]]:
499
- return {runner: list(items) for runner, items in sorted(self.items.items()) if items}
500
-
501
-
502
- def digest_line_items(lines: Iterable[str], *, limit: int, max_line_chars: int) -> list[str]:
503
- out: list[str] = []
504
- seen: set[str] = set()
505
- for line in lines:
506
- item = strip_ansi(line).strip()
507
- if not item or item in seen:
508
- continue
509
- capped, _ = cap_line(item, max_line_chars)
510
- out.append(capped.strip())
511
- seen.add(item)
512
- if len(out) >= limit:
513
- break
514
- return out
515
-
516
-
517
- class DuplicateLineTracker:
518
- """Track repeated sanitized lines without retaining unbounded unique output."""
519
-
520
- def __init__(self, *, max_groups: int = 12, max_unique: int = 2048) -> None:
521
- self.max_groups = max(0, max_groups)
522
- self.max_unique = max(1, max_unique)
523
- self.counts: dict[str, int] = {}
524
- self.first_line: dict[str, int] = {}
525
- self.overflow_unique_lines = 0
526
-
527
- def feed(self, line_number: int, line: str) -> None:
528
- text = strip_ansi(line).strip()
529
- if not text:
530
- return
531
- if text not in self.counts:
532
- if len(self.counts) >= self.max_unique:
533
- self.overflow_unique_lines += 1
534
- return
535
- self.counts[text] = 0
536
- self.first_line[text] = line_number
537
- self.counts[text] += 1
538
-
539
- def as_list(self) -> list[dict[str, object]]:
540
- groups: list[dict[str, object]] = []
541
- repeated = [
542
- (text, count)
543
- for text, count in self.counts.items()
544
- if count > 1
545
- ]
546
- for text, count in sorted(repeated, key=lambda item: (-item[1], self.first_line[item[0]], item[0]))[
547
- : self.max_groups
548
- ]:
549
- groups.append(
550
- {
551
- "count": count,
552
- "first_line": self.first_line[text],
553
- "text": text,
554
- }
555
- )
556
- if groups and self.overflow_unique_lines:
557
- groups.append(
558
- {
559
- "count": self.overflow_unique_lines,
560
- "first_line": None,
561
- "text": "[context-guard-kit] additional unique lines omitted from duplicate tracking",
562
- }
563
- )
564
- return groups
565
-
566
-
567
- def command_preview(command: list[str], sanitizer: object, max_line_chars: int) -> str:
568
- try:
569
- raw = shlex.join(command)
570
- except Exception:
571
- raw = " ".join(command)
572
- sanitized, _ = sanitizer.sanitize(raw + "\n") # type: ignore[attr-defined]
573
- capped, _ = cap_line(sanitized.strip(), max_line_chars)
574
- return capped.strip()
575
-
576
-
577
- def digest_next_queries(
578
- *,
579
- rc: int,
580
- timed_out: bool,
581
- raw_output_truncated: bool,
582
- runner_items: dict[str, list[str]],
583
- top_error_lines: list[str],
584
- ) -> list[str]:
585
- if timed_out:
586
- return [
587
- "Inspect timeout cause first; rerun with a narrower command or higher --timeout-seconds only if needed.",
588
- "If the process spawned children, check whether the wrapped command handles termination cleanly.",
589
- ]
590
- if rc == 0:
591
- if raw_output_truncated:
592
- return [
593
- "Treat this as success unless a specific assertion needs raw logs.",
594
- "Query exact raw output only for the component named in the next task.",
595
- ]
596
- return ["No raw output follow-up needed; command completed successfully."]
597
- queries: list[str] = []
598
- if runner_items:
599
- queries.append("Run the failing test/node from runner_failure_summary directly with minimal verbosity.")
600
- if top_error_lines:
601
- queries.append("Inspect top_error_lines before rerunning the full command.")
602
- if raw_output_truncated:
603
- queries.append("Rerun without trim only if these failure facts are insufficient.")
604
- if not queries:
605
- queries.append("Rerun with a narrower command or grep for the first error before requesting raw output.")
606
- return queries
607
-
608
-
609
- def build_failure_signature(
610
- *,
611
- status: str,
612
- rc: int,
613
- timed_out: bool,
614
- runner_items: dict[str, list[str]],
615
- top_error_lines: list[str],
616
- ) -> dict[str, object]:
617
- basis: list[str] = []
618
- source = "status"
619
- if runner_items:
620
- source = "runner_failure_summary"
621
- for runner in sorted(runner_items):
622
- for item in runner_items[runner]:
623
- basis.append(f"{runner}: {item}")
624
- if len(basis) >= 8:
625
- break
626
- if len(basis) >= 8:
627
- break
628
- elif top_error_lines:
629
- source = "top_error_lines"
630
- basis = top_error_lines[:8]
631
- if not basis:
632
- basis = [f"status={status}", f"exit_code={rc}", f"timed_out={str(timed_out).lower()}"]
633
- digest = hashlib.sha256(
634
- json.dumps(
635
- {"status": status, "exit_code": rc, "timed_out": timed_out, "basis": basis},
636
- ensure_ascii=False,
637
- sort_keys=True,
638
- ).encode("utf-8", errors="replace")
639
- ).hexdigest()[:16]
640
- return {
641
- "hash": digest,
642
- "source": source,
643
- "basis": basis,
644
- "exit_code": rc,
645
- "timed_out": timed_out,
646
- }
647
-
648
-
649
- def build_digest_payload(
650
- *,
651
- args: argparse.Namespace,
652
- command: list[str],
653
- rc: int,
654
- timed_out: bool,
655
- total: int,
656
- raw_chars: int,
657
- visible_chars: int,
658
- any_line_capped: bool,
659
- redacted_lines: int,
660
- head: list[str],
661
- tail: Iterable[str],
662
- error_lines: list[str],
663
- runner_summary: RunnerFailureSummary,
664
- line_sanitizer: object,
665
- duplicate_line_groups: list[dict[str, object]] | None = None,
666
- ) -> dict[str, object]:
667
- raw_output_truncated = total > args.max_lines or visible_chars > args.max_chars or any_line_capped
668
- status = "timeout" if timed_out else ("success" if rc == 0 else "failure")
669
- runner_items = runner_summary.as_dict() if rc != 0 else {}
670
- top_error_lines = digest_line_items(error_lines, limit=12, max_line_chars=args.max_line_chars)
671
- sample_limit = 8 if status == "success" else 10
672
- tail_list = list(tail)
673
- payload: dict[str, object] = {
674
- "tool": "context-guard-kit.trim_command_output",
675
- "digest_version": 1,
676
- "status": status,
677
- "exit_code": rc,
678
- "timed_out": timed_out,
679
- "raw_output": {
680
- "lines": total,
681
- "chars": raw_chars,
682
- "visible_chars": visible_chars,
683
- "truncated": raw_output_truncated,
684
- "line_capped": any_line_capped,
685
- "redacted_lines": redacted_lines,
686
- },
687
- "budget": {
688
- "max_lines": args.max_lines,
689
- "max_chars": args.max_chars,
690
- "max_line_chars": args.max_line_chars,
691
- },
692
- "command_preview": command_preview(command, line_sanitizer, args.max_line_chars),
693
- "runner_failure_summary": runner_items,
694
- "top_error_lines": top_error_lines,
695
- "representative_head": digest_line_items(head, limit=sample_limit, max_line_chars=args.max_line_chars),
696
- "representative_tail": digest_line_items(
697
- tail_list[-sample_limit:],
698
- limit=sample_limit,
699
- max_line_chars=args.max_line_chars,
700
- ),
701
- }
702
- if duplicate_line_groups:
703
- payload["duplicate_line_groups"] = duplicate_line_groups
704
- if status != "success":
705
- payload["failure_signature"] = build_failure_signature(
706
- status=status,
707
- rc=rc,
708
- timed_out=timed_out,
709
- runner_items=runner_items,
710
- top_error_lines=top_error_lines,
711
- )
712
- payload["next_queries"] = digest_next_queries(
713
- rc=rc,
714
- timed_out=timed_out,
715
- raw_output_truncated=raw_output_truncated,
716
- runner_items=runner_items,
717
- top_error_lines=top_error_lines,
718
- )
719
- return payload
720
-
721
-
722
- def markdown_artifact_receipt_lines(artifact_receipt: dict[str, object]) -> list[str]:
723
- lines = [
724
- "- artifact_receipt: "
725
- f"stored={str(artifact_receipt.get('stored')).lower()} "
726
- f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}\n"
727
- ]
728
- exact = artifact_receipt.get("exact_reexpand")
729
- if isinstance(exact, dict) and exact.get("cli"):
730
- lines.append(f"- exact_reexpand: `{exact.get('cli')}`\n")
731
- return lines
732
-
733
-
734
- def compact_markdown_artifact_receipt(payload: dict[str, object], max_chars: int) -> str:
735
- artifact_receipt = payload.get("artifact_receipt")
736
- if not isinstance(artifact_receipt, dict) or max_chars <= 0:
737
- return ""
738
-
739
- full = "".join(markdown_artifact_receipt_lines(artifact_receipt))
740
- if len(full) <= max_chars:
741
- return full
742
-
743
- artifact_id = artifact_receipt.get("artifact_id") or artifact_receipt.get("error")
744
- stored = str(artifact_receipt.get("stored")).lower()
745
- exact = artifact_receipt.get("exact_reexpand")
746
- exact_available = ""
747
- if isinstance(exact, dict) and "available" in exact:
748
- exact_available = f" exact_available={str(exact.get('available')).lower()}"
749
-
750
- candidates = [
751
- f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}; raise --max-chars for full exact_reexpand\n",
752
- f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}\n",
753
- f"- artifact_receipt: id={artifact_id}\n",
754
- ]
755
- for candidate in candidates:
756
- if len(candidate) <= max_chars:
757
- return candidate
758
- return ""
759
-
760
-
761
- def render_digest_markdown(payload: dict[str, object], max_chars: int) -> str:
762
- raw_output = payload.get("raw_output", {})
763
- budget = payload.get("budget", {})
764
- lines: list[str] = []
765
- non_receipt_lines: list[str] = []
766
-
767
- def add(line: str, *, receipt: bool = False) -> None:
768
- lines.append(line)
769
- if not receipt:
770
- non_receipt_lines.append(line)
771
-
772
- lines.append("[context-guard-kit] semantic digest\n")
773
- non_receipt_lines.append("[context-guard-kit] semantic digest\n")
774
- add(f"- status: {payload.get('status')}\n")
775
- add(f"- exit_code: {payload.get('exit_code')}\n")
776
- add(f"- timed_out: {str(payload.get('timed_out')).lower()}\n")
777
- if isinstance(raw_output, dict):
778
- add(
779
- "- raw_output: "
780
- f"{raw_output.get('lines')} lines/{raw_output.get('chars')} chars"
781
- f" (visible={raw_output.get('visible_chars')}, truncated={str(raw_output.get('truncated')).lower()})\n"
782
- )
783
- if raw_output.get("line_capped"):
784
- add(f"- line_capped: true\n")
785
- if raw_output.get("redacted_lines"):
786
- add(f"- redacted_lines: {raw_output.get('redacted_lines')}\n")
787
- if isinstance(budget, dict):
788
- add(
789
- "- budget: "
790
- f"{budget.get('max_lines')} lines/{budget.get('max_chars')} chars/"
791
- f"line={budget.get('max_line_chars')} chars\n"
792
- )
793
- if payload.get("command_preview"):
794
- add(f"- command: `{payload.get('command_preview')}`\n")
795
- artifact_receipt = payload.get("artifact_receipt")
796
- if isinstance(artifact_receipt, dict):
797
- for line in markdown_artifact_receipt_lines(artifact_receipt):
798
- add(line, receipt=True)
799
- failure_signature = payload.get("failure_signature")
800
- if isinstance(failure_signature, dict):
801
- add(
802
- "- failure_signature: "
803
- f"{failure_signature.get('hash')} ({failure_signature.get('source')})\n"
804
- )
805
-
806
- runner_summary = payload.get("runner_failure_summary")
807
- if isinstance(runner_summary, dict) and runner_summary:
808
- add("\n## runner_failure_summary\n")
809
- for runner, items in sorted(runner_summary.items()):
810
- add(f"- runner={runner}\n")
811
- if isinstance(items, list):
812
- for item in items:
813
- add(f" - {item}\n")
814
-
815
- duplicate_line_groups = payload.get("duplicate_line_groups")
816
- if isinstance(duplicate_line_groups, list) and duplicate_line_groups:
817
- add("\n## duplicate_line_groups\n")
818
- for group in duplicate_line_groups:
819
- if not isinstance(group, dict):
820
- continue
821
- add(
822
- "- "
823
- f"count={group.get('count')} "
824
- f"first_line={group.get('first_line')} "
825
- f"text={group.get('text')}\n"
826
- )
827
-
828
- for title, key in [
829
- ("top_error_lines", "top_error_lines"),
830
- ("representative_head", "representative_head"),
831
- ("representative_tail", "representative_tail"),
832
- ("next_queries", "next_queries"),
833
- ]:
834
- values = payload.get(key)
835
- if isinstance(values, list) and values:
836
- add(f"\n## {title}\n")
837
- for value in values:
838
- add(f"- {value}\n")
839
-
840
- text = "".join(lines)
841
- output, capped = cap_text(text, max_chars)
842
- if not capped:
843
- return output
844
- marker = "[context-guard-kit] digest capped by --max-chars.\n"
845
- if max_chars <= len(marker):
846
- return marker[:max_chars]
847
- reserved_receipt = compact_markdown_artifact_receipt(payload, max_chars - len(marker))
848
- if reserved_receipt:
849
- head_budget = max_chars - len(marker) - len(reserved_receipt)
850
- head = ""
851
- if head_budget > 0:
852
- non_receipt_text = "".join(non_receipt_lines)
853
- text_cap_marker = f"\n[context-guard-kit] text capped: {len(non_receipt_text)} chars total\n"
854
- if len(non_receipt_text) <= head_budget:
855
- head = non_receipt_text
856
- elif head_budget > len(text_cap_marker):
857
- keep = head_budget - len(text_cap_marker)
858
- head = non_receipt_text[:keep].rstrip() + text_cap_marker
859
- if head and not head.endswith("\n"):
860
- head += "\n"
861
- return head + reserved_receipt + marker
862
- output, _ = cap_text(text, max_chars - len(marker))
863
- return output + marker
864
-
865
-
866
- def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
867
- def dumps(data: dict[str, object]) -> str:
868
- return json.dumps(data, ensure_ascii=False, sort_keys=True, indent=2) + "\n"
869
-
870
- def shrink_list_to_fit(data: dict[str, object], values: list[object]) -> None:
871
- if len(dumps(data)) <= max_chars:
872
- return
873
- lo, hi = 0, len(values)
874
- best = 0
875
- original = list(values)
876
- while lo <= hi:
877
- mid = (lo + hi) // 2
878
- values[:] = original[:mid]
879
- if len(dumps(data)) <= max_chars:
880
- best = mid
881
- lo = mid + 1
882
- else:
883
- hi = mid - 1
884
- values[:] = original[:best]
885
-
886
- def first_fitting(candidates: list[dict[str, object]]) -> str:
887
- for candidate in candidates:
888
- output = dumps(candidate)
889
- if len(output) <= max_chars:
890
- return output
891
- return dumps(candidates[-1])
892
-
893
- def compact_artifact_receipt(*, include_exact_reexpand: bool) -> dict[str, object] | None:
894
- artifact_receipt = payload.get("artifact_receipt")
895
- if not isinstance(artifact_receipt, dict):
896
- return None
897
- compact: dict[str, object] = {}
898
- for key in ("stored", "artifact_id", "error", "bytes", "max_bytes"):
899
- if key in artifact_receipt:
900
- compact[key] = artifact_receipt[key]
901
- stored_output = artifact_receipt.get("stored_output")
902
- if isinstance(stored_output, dict):
903
- compact["stored_output"] = {
904
- key: stored_output[key]
905
- for key in ("scope", "bytes", "lines", "sha256")
906
- if key in stored_output
907
- }
908
- exact = artifact_receipt.get("exact_reexpand")
909
- if include_exact_reexpand and isinstance(exact, dict):
910
- compact["exact_reexpand"] = {
911
- key: exact[key]
912
- for key in ("available", "scope", "sha256", "bytes", "lines", "cli", "reason")
913
- if key in exact
914
- }
915
- return compact
916
-
917
- def attach_artifact_receipt(candidate: dict[str, object], artifact_receipt: dict[str, object] | None) -> dict[str, object]:
918
- if artifact_receipt is not None:
919
- candidate["artifact_receipt"] = artifact_receipt
920
- return candidate
921
-
922
- output = dumps(payload)
923
- if len(output) <= max_chars:
924
- return output
925
-
926
- capped = json.loads(json.dumps(payload))
927
- capped["digest_capped"] = True
928
- for key in ("duplicate_line_groups", "representative_tail", "representative_head", "top_error_lines", "next_queries"):
929
- values = capped.get(key)
930
- if isinstance(values, list):
931
- shrink_list_to_fit(capped, values)
932
- failure_signature = capped.get("failure_signature")
933
- if isinstance(failure_signature, dict):
934
- basis = failure_signature.get("basis")
935
- if isinstance(basis, list):
936
- shrink_list_to_fit(capped, basis)
937
- runner_summary = capped.get("runner_failure_summary")
938
- if isinstance(runner_summary, dict):
939
- for runner in sorted(runner_summary):
940
- values = runner_summary.get(runner)
941
- if isinstance(values, list):
942
- shrink_list_to_fit(capped, values)
943
- output = dumps(capped)
944
- if len(output) <= max_chars:
945
- return output
946
-
947
- compact_signature: object | None = None
948
- failure_signature = payload.get("failure_signature")
949
- if isinstance(failure_signature, dict):
950
- compact_signature = {
951
- "hash": failure_signature.get("hash"),
952
- "source": failure_signature.get("source"),
953
- "exit_code": failure_signature.get("exit_code"),
954
- "timed_out": failure_signature.get("timed_out"),
955
- }
956
- compact_receipt = compact_artifact_receipt(include_exact_reexpand=True)
957
- minimal_receipt = compact_artifact_receipt(include_exact_reexpand=False)
958
-
959
- return first_fitting(
960
- [
961
- attach_artifact_receipt(
962
- {
963
- "tool": payload.get("tool"),
964
- "digest_version": payload.get("digest_version"),
965
- "digest_capped": True,
966
- "status": payload.get("status"),
967
- "exit_code": payload.get("exit_code"),
968
- "timed_out": payload.get("timed_out"),
969
- "failure_signature": compact_signature,
970
- "raw_output": payload.get("raw_output"),
971
- "budget": payload.get("budget"),
972
- "next_queries": ["Raise --max-chars or inspect a narrower command for details."],
973
- },
974
- compact_receipt,
975
- ),
976
- attach_artifact_receipt(
977
- {
978
- "digest_capped": True,
979
- "status": payload.get("status"),
980
- "exit_code": payload.get("exit_code"),
981
- "timed_out": payload.get("timed_out"),
982
- "failure_signature": compact_signature,
983
- "raw_output": payload.get("raw_output"),
984
- "next_queries": ["Raise --max-chars or inspect a narrower command for details."],
985
- },
986
- compact_receipt,
987
- ),
988
- attach_artifact_receipt(
989
- {
990
- "digest_capped": True,
991
- "status": payload.get("status"),
992
- "exit_code": payload.get("exit_code"),
993
- "timed_out": payload.get("timed_out"),
994
- "failure_signature": compact_signature,
995
- },
996
- compact_receipt,
997
- ),
998
- attach_artifact_receipt(
999
- {
1000
- "digest_capped": True,
1001
- "status": payload.get("status"),
1002
- "exit_code": payload.get("exit_code"),
1003
- "timed_out": payload.get("timed_out"),
1004
- },
1005
- minimal_receipt,
1006
- ),
1007
- {"digest_capped": True},
1008
- ]
1009
- )
1010
-
1011
-
1012
- _STREAM_END = object()
1013
-
1014
-
1015
- def process_group_exists(pgid: int) -> bool:
1016
- try:
1017
- os.killpg(pgid, 0)
1018
- except ProcessLookupError:
1019
- return False
1020
- except PermissionError:
1021
- return True
1022
- except OSError:
1023
- return False
1024
- return True
1025
-
1026
-
1027
- def terminate_process_tree(
1028
- proc: subprocess.Popen[str],
1029
- *,
1030
- process_group_id: int | None = None,
1031
- include_exited_group: bool = False,
1032
- ) -> None:
1033
- if os.name != "nt":
1034
- pgid = process_group_id if process_group_id is not None else proc.pid
1035
- if proc.poll() is not None and not include_exited_group:
1036
- return
1037
- try:
1038
- os.killpg(pgid, signal.SIGTERM)
1039
- except ProcessLookupError:
1040
- return
1041
- deadline = time.monotonic() + 2
1042
- while time.monotonic() < deadline:
1043
- if proc.poll() is None:
1044
- try:
1045
- proc.wait(timeout=0.05)
1046
- except subprocess.TimeoutExpired:
1047
- pass
1048
- if not process_group_exists(pgid):
1049
- return
1050
- time.sleep(0.05)
1051
- try:
1052
- os.killpg(pgid, signal.SIGKILL)
1053
- except ProcessLookupError:
1054
- return
1055
- return
1056
-
1057
- if proc.poll() is not None:
1058
- return
1059
- try:
1060
- proc.terminate()
1061
- except ProcessLookupError:
1062
- return
1063
- except OSError:
1064
- try:
1065
- proc.kill()
1066
- except OSError:
1067
- return
1068
- try:
1069
- proc.wait(timeout=2)
1070
- return
1071
- except subprocess.TimeoutExpired:
1072
- pass
1073
- try:
1074
- proc.kill()
1075
- except ProcessLookupError:
1076
- return
1077
- except OSError:
1078
- return
1079
-
1080
-
1081
- class TimedCommandStream:
1082
- def __init__(
1083
- self,
1084
- proc: subprocess.Popen[str],
1085
- stdout: Iterable[str],
1086
- *,
1087
- timeout_seconds: int,
1088
- process_group_id: int | None = None,
1089
- ) -> None:
1090
- self.proc = proc
1091
- self.timeout_seconds = timeout_seconds
1092
- self.process_group_id = process_group_id
1093
- self.deadline = time.monotonic() + timeout_seconds
1094
- self.timed_out = False
1095
- self.timeout_reported = False
1096
- self._stream_closed = False
1097
- self._queue: queue.Queue[str | object] = queue.Queue(maxsize=1024)
1098
- self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
1099
- self._thread.start()
1100
-
1101
- def _read_stdout(self, stdout: Iterable[str]) -> None:
1102
- try:
1103
- for line in stdout:
1104
- self._queue.put(line)
1105
- finally:
1106
- self._stream_closed = True
1107
- self._queue.put(_STREAM_END)
1108
-
1109
- def timeout_message(self) -> str:
1110
- return (
1111
- f"[context-guard-kit] command timed out after {self.timeout_seconds}s; "
1112
- "terminated wrapped process\n"
1113
- )
1114
-
1115
- def _mark_timed_out(self) -> None:
1116
- if not self.timed_out:
1117
- self.timed_out = True
1118
- terminate_process_tree(
1119
- self.proc,
1120
- process_group_id=self.process_group_id,
1121
- include_exited_group=True,
1122
- )
1123
-
1124
- def _timeout_line(self) -> str:
1125
- self._mark_timed_out()
1126
- self.timeout_reported = True
1127
- return self.timeout_message()
1128
-
1129
- def __iter__(self) -> Iterator[str]:
1130
- while True:
1131
- remaining = self.deadline - time.monotonic()
1132
- wait_time = 0.05 if self.proc.poll() is not None or self.timed_out else min(0.05, max(0.0, remaining))
1133
- try:
1134
- item = self._queue.get(timeout=wait_time)
1135
- except queue.Empty:
1136
- if remaining <= 0 and not self._stream_closed:
1137
- if not self.timeout_reported:
1138
- yield self._timeout_line()
1139
- break
1140
- continue
1141
- if item is _STREAM_END:
1142
- break
1143
- if not isinstance(item, str):
1144
- continue
1145
- yield item
1146
- if not self._stream_closed and time.monotonic() >= self.deadline:
1147
- if not self.timeout_reported:
1148
- yield self._timeout_line()
1149
- break
1150
-
1151
- def returncode(self) -> int:
1152
- if self.timed_out:
1153
- return TIMEOUT_EXIT_CODE
1154
- remaining = self.deadline - time.monotonic()
1155
- try:
1156
- return self.proc.wait(timeout=max(0.0, remaining))
1157
- except subprocess.TimeoutExpired:
1158
- self._mark_timed_out()
1159
- return TIMEOUT_EXIT_CODE
1160
-
1161
-
1162
- def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
1163
- if os.name == "nt":
1164
- return None
1165
- try:
1166
- return os.getpgid(proc.pid)
1167
- except ProcessLookupError:
1168
- # start_new_session=True makes the child the group leader; if it exits
1169
- # before getpgid(), the group id is still the leader pid while inherited
1170
- # stdout descendants remain alive.
1171
- return proc.pid
1172
-
1173
-
1174
- def main() -> int:
1175
- parser = argparse.ArgumentParser()
1176
- parser.add_argument("--max-lines", type=int, default=220)
1177
- parser.add_argument("--max-chars", type=int, default=20000)
1178
- parser.add_argument("--max-line-chars", type=int, default=4000)
1179
- parser.add_argument("--head-lines", type=int, default=40)
1180
- parser.add_argument("--tail-lines", type=int, default=80)
1181
- parser.add_argument("--error-lines", type=int, default=120)
1182
- parser.add_argument(
1183
- "--runner-summary-items",
1184
- type=int,
1185
- default=12,
1186
- help="maximum runner-specific failure facts to keep per detected runner (0 disables)",
1187
- )
1188
- parser.add_argument(
1189
- "--show-paths",
1190
- action="store_true",
1191
- help="show raw absolute paths in output instead of basename#path:<hash>; local debugging only because private paths may be exposed",
1192
- )
1193
- parser.add_argument(
1194
- "--timeout-seconds",
1195
- type=int,
1196
- default=DEFAULT_TIMEOUT_SECONDS,
1197
- help=(
1198
- "maximum runtime for wrapped commands before terminating the process group "
1199
- f"(default: {DEFAULT_TIMEOUT_SECONDS}, max: {MAX_TIMEOUT_SECONDS})"
1200
- ),
1201
- )
1202
- parser.add_argument(
1203
- "--digest",
1204
- choices=("off", "markdown", "json"),
1205
- default="off",
1206
- help=(
1207
- "emit an opt-in semantic digest instead of raw/trimmed logs "
1208
- "(default: off; formats: markdown, json)"
1209
- ),
1210
- )
1211
- parser.add_argument(
1212
- "--artifact-receipt",
1213
- action="store_true",
1214
- help=(
1215
- "with --digest, store the exact sanitized full output as a local "
1216
- "context-guard-artifact receipt and include re-expand metadata"
1217
- ),
1218
- )
1219
- parser.add_argument(
1220
- "--artifact-dir",
1221
- default=".context-guard/artifacts",
1222
- help="artifact receipt directory used by --artifact-receipt (default: .context-guard/artifacts)",
1223
- )
1224
- parser.add_argument(
1225
- "--artifact-max-bytes",
1226
- type=int,
1227
- default=DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
1228
- help=(
1229
- "maximum sanitized output bytes eligible for --artifact-receipt "
1230
- f"(default: {DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES}, max: {MAX_ARTIFACT_RECEIPT_MAX_BYTES})"
1231
- ),
1232
- )
1233
- parser.add_argument("command", nargs=argparse.REMAINDER)
1234
- args = parser.parse_args()
1235
- normalize_budgets(args)
1236
- args.artifact_max_bytes = bounded_int(
1237
- args.artifact_max_bytes,
1238
- DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
1239
- 1,
1240
- MAX_ARTIFACT_RECEIPT_MAX_BYTES,
1241
- )
1242
- if args.artifact_receipt and args.digest == "off":
1243
- print("trim_command_output.py: --artifact-receipt requires --digest markdown or --digest json", file=sys.stderr)
1244
- return 2
1245
-
1246
- command = args.command
1247
- if command and command[0] == "--":
1248
- command = command[1:]
1249
- if not command:
1250
- print("trim_command_output.py: missing command", file=sys.stderr)
1251
- return 2
1252
-
1253
- popen_kwargs: dict[str, object] = {}
1254
- if os.name != "nt":
1255
- popen_kwargs["start_new_session"] = True
1256
- try:
1257
- proc = subprocess.Popen(
1258
- command,
1259
- stdout=subprocess.PIPE,
1260
- stderr=subprocess.STDOUT,
1261
- text=True,
1262
- bufsize=1,
1263
- errors="replace",
1264
- **popen_kwargs,
1265
- )
1266
- except OSError as exc:
1267
- print(f"context-guard-kit: command failed to start: {exc}", file=sys.stderr)
1268
- return 127
1269
-
1270
- all_lines: list[str] = []
1271
- head: list[str] = []
1272
- tail: collections.deque[str] = collections.deque(maxlen=args.tail_lines)
1273
- error_lines: list[str] = []
1274
- total = 0
1275
- raw_chars = 0
1276
- visible_chars = 0
1277
- any_line_capped = False
1278
- runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
1279
- line_sanitizer = load_line_sanitizer(args.show_paths)
1280
- duplicate_tracker = DuplicateLineTracker()
1281
- redacted_lines = 0
1282
- artifact_lines: list[str] = []
1283
- artifact_capture_bytes = 0
1284
- artifact_capture_overflow = False
1285
-
1286
- if proc.stdout is None:
1287
- print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
1288
- return 1
1289
- command_stream = TimedCommandStream(
1290
- proc,
1291
- proc.stdout,
1292
- timeout_seconds=args.timeout_seconds,
1293
- process_group_id=process_group_id_for(proc),
1294
- )
1295
- for line in command_stream:
1296
- total += 1
1297
- raw_chars += len(line)
1298
- visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1299
- if redacted:
1300
- redacted_lines += 1
1301
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1302
- capture_enabled=args.artifact_receipt,
1303
- sanitized_line=visible_source,
1304
- artifact_lines=artifact_lines,
1305
- capture_bytes=artifact_capture_bytes,
1306
- capture_overflow=artifact_capture_overflow,
1307
- max_bytes=args.artifact_max_bytes,
1308
- )
1309
- visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1310
- any_line_capped = any_line_capped or line_capped
1311
- visible_chars += len(visible_line)
1312
- duplicate_tracker.feed(total, visible_line)
1313
- if total <= args.head_lines:
1314
- head.append(visible_line)
1315
- tail.append(visible_line)
1316
- if ERROR_RE.search(visible_line) and len(error_lines) < args.error_lines:
1317
- error_lines.append(visible_line)
1318
- runner_summary.feed(line)
1319
- if total <= args.max_lines:
1320
- all_lines.append(visible_line)
1321
-
1322
- rc = command_stream.returncode()
1323
- if command_stream.timed_out and not command_stream.timeout_reported:
1324
- line = command_stream.timeout_message()
1325
- command_stream.timeout_reported = True
1326
- total += 1
1327
- raw_chars += len(line)
1328
- visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
1329
- if redacted:
1330
- redacted_lines += 1
1331
- artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
1332
- capture_enabled=args.artifact_receipt,
1333
- sanitized_line=visible_source,
1334
- artifact_lines=artifact_lines,
1335
- capture_bytes=artifact_capture_bytes,
1336
- capture_overflow=artifact_capture_overflow,
1337
- max_bytes=args.artifact_max_bytes,
1338
- )
1339
- visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
1340
- any_line_capped = any_line_capped or line_capped
1341
- visible_chars += len(visible_line)
1342
- duplicate_tracker.feed(total, visible_line)
1343
- if total <= args.head_lines:
1344
- head.append(visible_line)
1345
- tail.append(visible_line)
1346
- if ERROR_RE.search(visible_line) and len(error_lines) < args.error_lines:
1347
- error_lines.append(visible_line)
1348
- runner_summary.feed(line)
1349
- if total <= args.max_lines:
1350
- all_lines.append(visible_line)
1351
-
1352
- if args.digest != "off":
1353
- payload = build_digest_payload(
1354
- args=args,
1355
- command=command,
1356
- rc=rc,
1357
- timed_out=command_stream.timed_out,
1358
- total=total,
1359
- raw_chars=raw_chars,
1360
- visible_chars=visible_chars,
1361
- any_line_capped=any_line_capped,
1362
- redacted_lines=redacted_lines,
1363
- head=head,
1364
- tail=list(tail),
1365
- error_lines=error_lines,
1366
- runner_summary=runner_summary,
1367
- line_sanitizer=line_sanitizer,
1368
- duplicate_line_groups=duplicate_tracker.as_list(),
1369
- )
1370
- if args.artifact_receipt:
1371
- if artifact_capture_overflow:
1372
- payload["artifact_receipt"] = {
1373
- "stored": False,
1374
- "error": "sanitized_output_exceeds_artifact_max_bytes",
1375
- "max_bytes": args.artifact_max_bytes,
1376
- "exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
1377
- }
1378
- else:
1379
- try:
1380
- payload["artifact_receipt"] = store_sanitized_artifact_receipt(
1381
- sanitized_text="".join(artifact_lines),
1382
- command=command,
1383
- args=args,
1384
- line_sanitizer=line_sanitizer,
1385
- redacted_lines=redacted_lines,
1386
- )
1387
- except Exception as exc:
1388
- payload["artifact_receipt"] = {
1389
- "stored": False,
1390
- "error": "artifact_receipt_unavailable",
1391
- "reason": f"{exc.__class__.__name__}: {exc}",
1392
- "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
1393
- }
1394
- if args.digest == "json":
1395
- sys.stdout.write(render_digest_json(payload, args.max_chars))
1396
- else:
1397
- sys.stdout.write(render_digest_markdown(payload, args.max_chars))
1398
- return rc
1399
-
1400
- if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
1401
- sys.stdout.writelines(all_lines)
1402
- else:
1403
- head_budget = min(args.head_lines, max(1, args.max_lines // 4))
1404
- tail_budget = min(args.tail_lines, max(1, args.max_lines // 3))
1405
- head_out = head[:head_budget]
1406
- tail_out = [line for line in list(tail)[-tail_budget:] if line not in set(head_out)]
1407
- remaining = max(0, args.max_lines - len(head_out) - len(tail_out))
1408
- error_out = unique_keep_order(error_lines)[:remaining]
1409
-
1410
- parts: list[str] = []
1411
- parts.append(
1412
- f"[context-guard-kit] output trimmed: {total} lines/{raw_chars} chars "
1413
- f"-> budget about {args.max_lines} log lines/{args.max_chars} chars\n"
1414
- )
1415
- parts.append(f"[context-guard-kit] command exit_code={rc}\n")
1416
- if any_line_capped:
1417
- parts.append(f"[context-guard-kit] one or more lines were capped at {args.max_line_chars} chars\n")
1418
- if redacted_lines:
1419
- parts.append(f"[context-guard-kit] redacted_lines={redacted_lines}\n")
1420
- summary_budget = max(0, min(args.max_lines, max(4, args.max_lines // 3))) if args.max_lines > 0 else 0
1421
- runner_lines = runner_summary.as_lines(args.max_line_chars, summary_budget) if rc != 0 else []
1422
- summary_line_count = len("".join(runner_lines).splitlines())
1423
- remaining_log_budget = max(0, args.max_lines - summary_line_count)
1424
-
1425
- parts.extend(runner_lines)
1426
- parts.append("\n--- head ---\n")
1427
- if remaining_log_budget > 0:
1428
- head_out = head_out[:remaining_log_budget]
1429
- parts.extend(head_out)
1430
- remaining_log_budget -= len(head_out)
1431
- if error_out:
1432
- parts.append("\n--- matched error/failure lines ---\n")
1433
- error_out = error_out[:remaining_log_budget]
1434
- parts.extend(error_out)
1435
- remaining_log_budget -= len(error_out)
1436
- parts.append("\n--- tail ---\n")
1437
- if remaining_log_budget > 0:
1438
- parts.extend(tail_out[-remaining_log_budget:])
1439
- parts.append("\n[context-guard-kit] rerun the command without trim only if more context is essential.\n")
1440
- output, capped = cap_text("".join(parts), args.max_chars)
1441
- if capped:
1442
- output += "[context-guard-kit] final summary was capped by --max-chars.\n"
1443
- sys.stdout.write(output)
1444
-
1445
- return rc
1446
-
1447
-
1448
- if __name__ == "__main__":
1449
- raise SystemExit(main())