@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,1449 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Run a command, preserve exit code, and print a token-budgeted output summary.
|
|
3
|
-
|
|
4
|
-
Designed for Claude Code Bash tool output. It avoids dumping thousands of log
|
|
5
|
-
lines into the conversation while preserving the lines most likely to be useful.
|
|
6
|
-
"""
|
|
7
|
-
from __future__ import annotations
|
|
8
|
-
|
|
9
|
-
import argparse
|
|
10
|
-
import collections
|
|
11
|
-
import hashlib
|
|
12
|
-
import importlib.machinery
|
|
13
|
-
import importlib.util
|
|
14
|
-
import json
|
|
15
|
-
import os
|
|
16
|
-
from pathlib import Path, PurePosixPath
|
|
17
|
-
import queue
|
|
18
|
-
import re
|
|
19
|
-
import shlex
|
|
20
|
-
import signal
|
|
21
|
-
import subprocess
|
|
22
|
-
import sys
|
|
23
|
-
import threading
|
|
24
|
-
import time
|
|
25
|
-
from typing import Iterable, Iterator
|
|
26
|
-
|
|
27
|
-
MAX_SUMMARY_ITEM_CHARS = 500
|
|
28
|
-
MAX_LINES_LIMIT = 5_000
|
|
29
|
-
MAX_CHARS_LIMIT = 1_000_000
|
|
30
|
-
MAX_LINE_CHARS_LIMIT = 100_000
|
|
31
|
-
MAX_SECTION_LINES_LIMIT = 2_000
|
|
32
|
-
MAX_RUNNER_SUMMARY_ITEMS_LIMIT = 100
|
|
33
|
-
DEFAULT_TIMEOUT_SECONDS = 600
|
|
34
|
-
MAX_TIMEOUT_SECONDS = 86_400
|
|
35
|
-
TIMEOUT_EXIT_CODE = 124
|
|
36
|
-
DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES = 10_000_000
|
|
37
|
-
MAX_ARTIFACT_RECEIPT_MAX_BYTES = 100_000_000
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
41
|
-
try:
|
|
42
|
-
number = int(value)
|
|
43
|
-
except (TypeError, ValueError, OverflowError):
|
|
44
|
-
return default
|
|
45
|
-
return min(max(number, minimum), maximum)
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
def normalize_budgets(args: argparse.Namespace) -> None:
|
|
49
|
-
args.max_lines = bounded_int(args.max_lines, 220, 1, MAX_LINES_LIMIT)
|
|
50
|
-
args.max_chars = bounded_int(args.max_chars, 20000, 1, MAX_CHARS_LIMIT)
|
|
51
|
-
args.max_line_chars = bounded_int(args.max_line_chars, 4000, 1, MAX_LINE_CHARS_LIMIT)
|
|
52
|
-
args.head_lines = bounded_int(args.head_lines, 40, 0, MAX_SECTION_LINES_LIMIT)
|
|
53
|
-
args.tail_lines = bounded_int(args.tail_lines, 80, 0, MAX_SECTION_LINES_LIMIT)
|
|
54
|
-
args.error_lines = bounded_int(args.error_lines, 120, 0, MAX_SECTION_LINES_LIMIT)
|
|
55
|
-
args.runner_summary_items = bounded_int(args.runner_summary_items, 12, 0, MAX_RUNNER_SUMMARY_ITEMS_LIMIT)
|
|
56
|
-
args.timeout_seconds = bounded_int(
|
|
57
|
-
args.timeout_seconds,
|
|
58
|
-
DEFAULT_TIMEOUT_SECONDS,
|
|
59
|
-
1,
|
|
60
|
-
MAX_TIMEOUT_SECONDS,
|
|
61
|
-
)
|
|
62
|
-
|
|
63
|
-
TERMINAL_CONTROL_RE = re.compile(
|
|
64
|
-
r"(?:"
|
|
65
|
-
r"\x1b\][^\x07\x1b]*(?:\x07|\x1b\\)|" # OSC title/clipboard controls
|
|
66
|
-
r"\x1b[@-_][0-?]*[ -/]*[@-~]|" # CSI and other ESC sequences
|
|
67
|
-
r"[\x00-\x08\x0b\x0c\x0d\x0e-\x1f\x7f-\x9f]"
|
|
68
|
-
r")"
|
|
69
|
-
)
|
|
70
|
-
ABSOLUTE_PATH_RE = re.compile(r"(?P<prefix>^|[\s('\"=])(?P<path>/(?:[^\s:(),]+/)*[^\s:(),]+)")
|
|
71
|
-
SECRET_KEY = (
|
|
72
|
-
r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
|
|
73
|
-
r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
|
|
74
|
-
)
|
|
75
|
-
FALLBACK_INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
|
|
76
|
-
(re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
77
|
-
(re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
|
|
78
|
-
(re.compile(r"(?i)\bgh[pousr]_[A-Za-z0-9_]{20,}\b"), "[REDACTED]"),
|
|
79
|
-
(re.compile(r"(?i)\bgithub_pat_[A-Za-z0-9_]{20,}\b"), "[REDACTED]"),
|
|
80
|
-
(re.compile(r"(?i)\bglpat-[A-Za-z0-9_-]{12,}\b"), "[REDACTED]"),
|
|
81
|
-
(re.compile(r"(?i)\bxox[abprs]-[A-Za-z0-9-]{10,}\b"), "[REDACTED]"),
|
|
82
|
-
(re.compile(r"\b(?:AKIA|ASIA)[0-9A-Z]{16}\b"), "[REDACTED]"),
|
|
83
|
-
(re.compile(r"\b(?:sk|pk|rk)_(?:live|test)_[A-Za-z0-9]{16,}\b"), "[REDACTED]"),
|
|
84
|
-
(re.compile(r"\bsk-(?:ant|proj)-[A-Za-z0-9_-]{12,}\b"), "[REDACTED]"),
|
|
85
|
-
(re.compile(r"\bsk-[A-Za-z0-9][A-Za-z0-9_-]{20,}\b"), "[REDACTED]"),
|
|
86
|
-
(re.compile(r"\bnpm_[A-Za-z0-9]{20,}\b"), "[REDACTED]"),
|
|
87
|
-
(re.compile(r"(?i)\bAIza[0-9A-Za-z_\-]{20,}\b"), "[REDACTED]"),
|
|
88
|
-
(re.compile(r"\bSG\.[A-Za-z0-9_-]{16,}\.[A-Za-z0-9_-]{16,}\b"), "[REDACTED]"),
|
|
89
|
-
(re.compile(r"\beyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\b"), "[REDACTED]"),
|
|
90
|
-
(re.compile(r"([a-z][a-z0-9+.-]*://)[^/\s:@]+:[^/\s@]+@", re.IGNORECASE), r"\1[REDACTED]@"),
|
|
91
|
-
(re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s&#;]+"), r"\1[REDACTED]"),
|
|
92
|
-
(re.compile(rf"(?i)(\b(?:{SECRET_KEY})\s*[:=]\s*)[^\s]+"), r"\1[REDACTED]"),
|
|
93
|
-
)
|
|
94
|
-
FALLBACK_AUTH_HEADER_RE = re.compile(
|
|
95
|
-
r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
|
|
96
|
-
)
|
|
97
|
-
ERROR_RE = re.compile(
|
|
98
|
-
r"(FAIL|FAILED|ERROR|Error:|Exception|Traceback|AssertionError|panic:|fatal:|"
|
|
99
|
-
r"segmentation fault|not ok|\bE\s+assert|\[ERROR\]|✗|✖)",
|
|
100
|
-
re.IGNORECASE,
|
|
101
|
-
)
|
|
102
|
-
PYTEST_RESULT_RE = re.compile(r"^(?P<kind>FAILED|ERROR)\s+(?P<node>\S+)(?:\s+-\s+(?P<reason>.*))?$")
|
|
103
|
-
PYTEST_LOCATION_RE = re.compile(r"^(?P<file>[^:\s][^:\n]*\.py):(?P<line>\d+):(?P<message>.*)$")
|
|
104
|
-
JEST_FILE_RE = re.compile(
|
|
105
|
-
r"^\s*FAIL\s+(?P<file>\S+(?:\.(?:test|spec)\.[cm]?[jt]sx?|__tests__/\S+\.[cm]?[jt]sx?))"
|
|
106
|
-
r"(?:\s+>\s+(?P<name>.+))?\s*$"
|
|
107
|
-
)
|
|
108
|
-
JEST_TEST_RE = re.compile(r"^\s*[●✕×]\s+(?P<name>.+?)\s*$")
|
|
109
|
-
JEST_AT_RE = re.compile(
|
|
110
|
-
r"^\s*at\s+(?:.+?\s+\()?(?P<file>[^()\s]+?\.[cm]?[jt]sx?):(?P<line>\d+):(?P<col>\d+)\)?\s*$"
|
|
111
|
-
)
|
|
112
|
-
VITEST_LOCATION_RE = re.compile(r"^\s*❯\s+(?P<file>[^()\s]+?\.[cm]?[jt]sx?):(?P<line>\d+):(?P<col>\d+)\s*$")
|
|
113
|
-
GO_FAIL_RE = re.compile(r"^--- FAIL: (?P<name>\S+)(?:\s+\([^)]+\))?")
|
|
114
|
-
GO_LOCATION_RE = re.compile(r"^\s*(?P<file>[^:\s]+_test\.go):(?P<line>\d+):\s*(?P<message>.*)$")
|
|
115
|
-
RUST_THREAD_RE = re.compile(
|
|
116
|
-
r"^thread '(?P<name>[^']+)' panicked at (?:.*,\s+)?(?P<file>[^,\n]+?\.rs):(?P<line>\d+):(?P<col>\d+):?"
|
|
117
|
-
)
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def strip_ansi(text: str) -> str:
|
|
121
|
-
return TERMINAL_CONTROL_RE.sub("", text)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
def anonymize_absolute_paths(text: str) -> str:
|
|
125
|
-
def repl(match: re.Match[str]) -> str:
|
|
126
|
-
prefix = match.group("prefix")
|
|
127
|
-
path = match.group("path")
|
|
128
|
-
name = PurePosixPath(path).name or "path"
|
|
129
|
-
digest = hashlib.sha256(path.encode("utf-8", "replace")).hexdigest()[:12]
|
|
130
|
-
return f"{prefix}{name}#path:{digest}"
|
|
131
|
-
|
|
132
|
-
return ABSOLUTE_PATH_RE.sub(repl, text)
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
class FallbackLineSanitizer:
|
|
136
|
-
def __init__(self, *, show_paths: bool = False, diagnostic: str | None = None) -> None:
|
|
137
|
-
self.show_paths = show_paths
|
|
138
|
-
self.diagnostic = diagnostic
|
|
139
|
-
self.diagnostic_emitted = False
|
|
140
|
-
self.redactions = 0
|
|
141
|
-
|
|
142
|
-
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
143
|
-
if self.diagnostic and not self.diagnostic_emitted:
|
|
144
|
-
print(f"context-guard-kit: sanitizer fallback active: {self.diagnostic}", file=sys.stderr)
|
|
145
|
-
self.diagnostic_emitted = True
|
|
146
|
-
line = strip_ansi(raw_line)
|
|
147
|
-
if not self.show_paths:
|
|
148
|
-
line = anonymize_absolute_paths(line)
|
|
149
|
-
original = line
|
|
150
|
-
auth_match = FALLBACK_AUTH_HEADER_RE.match(line)
|
|
151
|
-
if auth_match:
|
|
152
|
-
line = auth_match.group("prefix") + "[REDACTED]\n"
|
|
153
|
-
else:
|
|
154
|
-
for pattern, repl in FALLBACK_INLINE_PATTERNS:
|
|
155
|
-
line = pattern.sub(repl, line)
|
|
156
|
-
redacted = line != original
|
|
157
|
-
if redacted:
|
|
158
|
-
self.redactions += 1
|
|
159
|
-
return line, redacted
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
def load_line_sanitizer(show_paths: bool) -> object:
|
|
163
|
-
"""Reuse the stronger sanitizer when it is shipped next to this wrapper."""
|
|
164
|
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
165
|
-
load_errors: list[str] = []
|
|
166
|
-
for name in ("sanitize_output.py", "context-guard-sanitize-output"):
|
|
167
|
-
candidate = os.path.join(script_dir, name)
|
|
168
|
-
if not os.path.exists(candidate):
|
|
169
|
-
continue
|
|
170
|
-
try:
|
|
171
|
-
loader = importlib.machinery.SourceFileLoader(f"_claude_token_sanitize_{os.getpid()}", candidate)
|
|
172
|
-
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
173
|
-
if spec is None:
|
|
174
|
-
continue
|
|
175
|
-
module = importlib.util.module_from_spec(spec)
|
|
176
|
-
loader.exec_module(module)
|
|
177
|
-
return module.LineSanitizer(show_paths=show_paths)
|
|
178
|
-
except Exception as exc:
|
|
179
|
-
load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
|
|
180
|
-
continue
|
|
181
|
-
diagnostic = "; ".join(load_errors) if load_errors else "strong sanitizer not found next to trim wrapper"
|
|
182
|
-
return FallbackLineSanitizer(show_paths=show_paths, diagnostic=diagnostic)
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
def load_artifact_store_module() -> object:
|
|
186
|
-
"""Load the adjacent artifact store without importing by package name.
|
|
187
|
-
|
|
188
|
-
The plugin ships helper scripts as sibling executable files, so the trim
|
|
189
|
-
wrapper must resolve both source-tree (`context_escrow.py`) and packaged
|
|
190
|
-
(`context-guard-artifact`) names.
|
|
191
|
-
"""
|
|
192
|
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
193
|
-
load_errors: list[str] = []
|
|
194
|
-
for name in ("context_escrow.py", "context-guard-artifact", "claude-token-artifact"):
|
|
195
|
-
candidate = os.path.join(script_dir, name)
|
|
196
|
-
if not os.path.exists(candidate):
|
|
197
|
-
continue
|
|
198
|
-
try:
|
|
199
|
-
loader = importlib.machinery.SourceFileLoader(f"_context_guard_artifact_{os.getpid()}", candidate)
|
|
200
|
-
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
201
|
-
if spec is None:
|
|
202
|
-
continue
|
|
203
|
-
module = importlib.util.module_from_spec(spec)
|
|
204
|
-
loader.exec_module(module)
|
|
205
|
-
return module
|
|
206
|
-
except Exception as exc:
|
|
207
|
-
load_errors.append(f"{os.path.basename(candidate)} failed to load: {exc.__class__.__name__}: {exc}")
|
|
208
|
-
continue
|
|
209
|
-
diagnostic = "; ".join(load_errors) if load_errors else "artifact store not found next to trim wrapper"
|
|
210
|
-
raise RuntimeError(diagnostic)
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
def store_sanitized_artifact_receipt(
|
|
214
|
-
*,
|
|
215
|
-
sanitized_text: str,
|
|
216
|
-
command: list[str],
|
|
217
|
-
args: argparse.Namespace,
|
|
218
|
-
line_sanitizer: object,
|
|
219
|
-
redacted_lines: int,
|
|
220
|
-
) -> dict[str, object]:
|
|
221
|
-
"""Store exact sanitized output using the existing artifact receipt format."""
|
|
222
|
-
artifact = load_artifact_store_module()
|
|
223
|
-
max_bytes = bounded_int(
|
|
224
|
-
getattr(args, "artifact_max_bytes", DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES),
|
|
225
|
-
DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
|
|
226
|
-
1,
|
|
227
|
-
MAX_ARTIFACT_RECEIPT_MAX_BYTES,
|
|
228
|
-
)
|
|
229
|
-
content_bytes = len(sanitized_text.encode("utf-8", errors="replace"))
|
|
230
|
-
if content_bytes > max_bytes:
|
|
231
|
-
return {
|
|
232
|
-
"stored": False,
|
|
233
|
-
"error": "sanitized_output_exceeds_artifact_max_bytes",
|
|
234
|
-
"bytes": content_bytes,
|
|
235
|
-
"max_bytes": max_bytes,
|
|
236
|
-
"exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
|
|
237
|
-
}
|
|
238
|
-
|
|
239
|
-
directory = artifact.normalize_allowed_first_absolute_symlink(Path(args.artifact_dir).expanduser())
|
|
240
|
-
content_sha = hashlib.sha256(sanitized_text.encode("utf-8", errors="replace")).hexdigest()
|
|
241
|
-
preview = command_preview(command, line_sanitizer, args.max_line_chars)
|
|
242
|
-
id_basis = json.dumps(
|
|
243
|
-
{
|
|
244
|
-
"content_sha256": content_sha,
|
|
245
|
-
"command_preview": preview,
|
|
246
|
-
"input_truncated": False,
|
|
247
|
-
"producer": "context-guard-trim-output",
|
|
248
|
-
},
|
|
249
|
-
sort_keys=True,
|
|
250
|
-
)
|
|
251
|
-
artifact_id = hashlib.sha256(id_basis.encode("utf-8")).hexdigest()[:20]
|
|
252
|
-
content_path, meta_path = artifact.artifact_paths(directory, artifact_id)
|
|
253
|
-
total_lines = sanitized_text.count("\n") + (1 if sanitized_text and not sanitized_text.endswith("\n") else 0)
|
|
254
|
-
content_type = artifact.classify_content_type(sanitized_text)
|
|
255
|
-
strategy = artifact.recommended_strategy(content_type)
|
|
256
|
-
metadata: dict[str, object] = {
|
|
257
|
-
"artifact_id": artifact_id,
|
|
258
|
-
"created_at": int(time.time()),
|
|
259
|
-
"command_preview": preview,
|
|
260
|
-
"content_type": content_type,
|
|
261
|
-
"input": {
|
|
262
|
-
"bytes_read": content_bytes,
|
|
263
|
-
"truncated": False,
|
|
264
|
-
"max_bytes": max_bytes,
|
|
265
|
-
"source": "context-guard-trim-output:sanitized-output",
|
|
266
|
-
},
|
|
267
|
-
"stored_output": {
|
|
268
|
-
"bytes": content_bytes,
|
|
269
|
-
"lines": total_lines,
|
|
270
|
-
"sha256": content_sha,
|
|
271
|
-
"content_file": content_path.name,
|
|
272
|
-
"metadata_file": meta_path.name,
|
|
273
|
-
"scope": "sanitized_full_output",
|
|
274
|
-
},
|
|
275
|
-
"digest": artifact.build_digest(sanitized_text, artifact_id=artifact_id, redacted_lines=redacted_lines),
|
|
276
|
-
"retrieval": {
|
|
277
|
-
"strategy": strategy,
|
|
278
|
-
"deterministic": True,
|
|
279
|
-
"hints": artifact.build_retrieval_hints(
|
|
280
|
-
artifact_id,
|
|
281
|
-
sanitized_text,
|
|
282
|
-
content_type=content_type,
|
|
283
|
-
strategy=strategy,
|
|
284
|
-
total_lines=total_lines,
|
|
285
|
-
),
|
|
286
|
-
},
|
|
287
|
-
}
|
|
288
|
-
artifact.shrink_digest_for_metadata_cap(metadata)
|
|
289
|
-
artifact.write_private_text(content_path, sanitized_text)
|
|
290
|
-
artifact.write_private_text(meta_path, artifact.metadata_json_text(metadata))
|
|
291
|
-
receipt = artifact.receipt_for(metadata)
|
|
292
|
-
query_line_cap = int(getattr(artifact, "MAX_QUERY_LINES", 5_000))
|
|
293
|
-
query_char_cap = 1_000_000
|
|
294
|
-
content_chars = len(sanitized_text)
|
|
295
|
-
exact_reexpand: dict[str, object] = {
|
|
296
|
-
"available": False,
|
|
297
|
-
"scope": "sanitized_full_output",
|
|
298
|
-
"sha256": content_sha,
|
|
299
|
-
"bytes": content_bytes,
|
|
300
|
-
"lines": total_lines,
|
|
301
|
-
"reason": "artifact query cap exceeded; use retrieval hints for exact slices",
|
|
302
|
-
}
|
|
303
|
-
if total_lines <= query_line_cap and content_chars <= query_char_cap:
|
|
304
|
-
raw_artifact_dir = str(getattr(args, "artifact_dir", ".context-guard/artifacts"))
|
|
305
|
-
dir_flags = ""
|
|
306
|
-
if raw_artifact_dir != ".context-guard/artifacts":
|
|
307
|
-
dir_flags = f" --dir {shlex.quote(raw_artifact_dir)}"
|
|
308
|
-
line_flags = ""
|
|
309
|
-
if total_lines > 0:
|
|
310
|
-
line_flags = f" --lines 1:{total_lines} --max-lines {max(1, total_lines)}"
|
|
311
|
-
exact_reexpand = {
|
|
312
|
-
"available": True,
|
|
313
|
-
"scope": "sanitized_full_output",
|
|
314
|
-
"sha256": content_sha,
|
|
315
|
-
"bytes": content_bytes,
|
|
316
|
-
"lines": total_lines,
|
|
317
|
-
"cli": (
|
|
318
|
-
f"context-guard-artifact{dir_flags} get {artifact_id}{line_flags} "
|
|
319
|
-
f"--max-chars {max(1, content_chars)}"
|
|
320
|
-
),
|
|
321
|
-
}
|
|
322
|
-
receipt["exact_reexpand"] = exact_reexpand
|
|
323
|
-
return receipt
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
def capture_sanitized_artifact_line(
|
|
327
|
-
*,
|
|
328
|
-
capture_enabled: bool,
|
|
329
|
-
sanitized_line: str,
|
|
330
|
-
artifact_lines: list[str],
|
|
331
|
-
capture_bytes: int,
|
|
332
|
-
capture_overflow: bool,
|
|
333
|
-
max_bytes: int,
|
|
334
|
-
) -> tuple[int, bool]:
|
|
335
|
-
if not capture_enabled or capture_overflow:
|
|
336
|
-
return capture_bytes, capture_overflow
|
|
337
|
-
source_bytes = len(sanitized_line.encode("utf-8", errors="replace"))
|
|
338
|
-
if capture_bytes + source_bytes <= max_bytes:
|
|
339
|
-
artifact_lines.append(sanitized_line)
|
|
340
|
-
return capture_bytes + source_bytes, False
|
|
341
|
-
artifact_lines.clear()
|
|
342
|
-
return capture_bytes, True
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
def unique_keep_order(lines: Iterable[str]) -> list[str]:
|
|
346
|
-
seen: set[str] = set()
|
|
347
|
-
out: list[str] = []
|
|
348
|
-
for line in lines:
|
|
349
|
-
key = line.rstrip()
|
|
350
|
-
if key not in seen:
|
|
351
|
-
out.append(line)
|
|
352
|
-
seen.add(key)
|
|
353
|
-
return out
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
|
|
357
|
-
if max_line_chars <= 0 or len(line) <= max_line_chars:
|
|
358
|
-
return line, False
|
|
359
|
-
newline = "\n" if line.endswith("\n") else ""
|
|
360
|
-
body = line[:-1] if newline else line
|
|
361
|
-
marker = f"...[line trimmed: {len(body)} chars]"
|
|
362
|
-
keep = max(0, max_line_chars - len(marker) - len(newline))
|
|
363
|
-
return body[:keep] + marker + newline, True
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
|
|
367
|
-
if max_chars <= 0 or len(text) <= max_chars:
|
|
368
|
-
return text, False
|
|
369
|
-
marker = f"\n[context-guard-kit] text capped: {len(text)} chars total\n"
|
|
370
|
-
keep = max(0, max_chars - len(marker))
|
|
371
|
-
return text[:keep].rstrip() + marker, True
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def compact_item(
|
|
375
|
-
text: str,
|
|
376
|
-
limit: int = MAX_SUMMARY_ITEM_CHARS,
|
|
377
|
-
*,
|
|
378
|
-
show_paths: bool = False,
|
|
379
|
-
sanitizer: object | None = None,
|
|
380
|
-
) -> str:
|
|
381
|
-
"""Normalize a failure-summary item without letting one log line dominate memory/output."""
|
|
382
|
-
if sanitizer is None:
|
|
383
|
-
sanitizer = load_line_sanitizer(show_paths)
|
|
384
|
-
sanitized, _ = sanitizer.sanitize(text) # type: ignore[attr-defined]
|
|
385
|
-
item = re.sub(r"\s+", " ", strip_ansi(sanitized).strip())
|
|
386
|
-
if len(item) <= limit:
|
|
387
|
-
return item
|
|
388
|
-
marker = f"...[item trimmed: {len(item)} chars]"
|
|
389
|
-
keep = max(0, limit - len(marker))
|
|
390
|
-
return item[:keep] + marker
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
class RunnerFailureSummary:
|
|
394
|
-
"""Bounded, runner-aware extraction of the most actionable failure lines.
|
|
395
|
-
|
|
396
|
-
The extractor is intentionally online and stores only a small de-duplicated
|
|
397
|
-
set of findings. That keeps the wrapper useful for huge logs without
|
|
398
|
-
retaining the whole command output in memory.
|
|
399
|
-
"""
|
|
400
|
-
|
|
401
|
-
def __init__(self, max_items_per_runner: int, *, show_paths: bool = False) -> None:
|
|
402
|
-
self.max_items_per_runner = max(0, max_items_per_runner)
|
|
403
|
-
self.show_paths = show_paths
|
|
404
|
-
self.sanitizer = load_line_sanitizer(show_paths)
|
|
405
|
-
self.items: dict[str, list[str]] = collections.defaultdict(list)
|
|
406
|
-
self.seen: dict[str, set[str]] = collections.defaultdict(set)
|
|
407
|
-
self.jest_active = False
|
|
408
|
-
self.go_failed_seen = False
|
|
409
|
-
|
|
410
|
-
def add(self, runner: str, item: str) -> None:
|
|
411
|
-
if self.max_items_per_runner <= 0:
|
|
412
|
-
return
|
|
413
|
-
compact = compact_item(item, show_paths=self.show_paths, sanitizer=self.sanitizer)
|
|
414
|
-
if not compact or compact in self.seen[runner]:
|
|
415
|
-
return
|
|
416
|
-
if len(self.items[runner]) >= self.max_items_per_runner:
|
|
417
|
-
return
|
|
418
|
-
self.items[runner].append(compact)
|
|
419
|
-
self.seen[runner].add(compact)
|
|
420
|
-
|
|
421
|
-
def feed(self, line: str) -> None:
|
|
422
|
-
if self.max_items_per_runner <= 0:
|
|
423
|
-
return
|
|
424
|
-
|
|
425
|
-
stripped = strip_ansi(line.rstrip("\n"))
|
|
426
|
-
|
|
427
|
-
match = PYTEST_RESULT_RE.match(stripped)
|
|
428
|
-
if match and (".py" in match.group("node") or "::" in match.group("node")):
|
|
429
|
-
reason = compact_item(match.group("reason") or "", show_paths=self.show_paths, sanitizer=self.sanitizer)
|
|
430
|
-
if reason:
|
|
431
|
-
self.add("pytest", f"{match.group('kind')} {match.group('node')} - {reason}")
|
|
432
|
-
else:
|
|
433
|
-
self.add("pytest", f"{match.group('kind')} {match.group('node')}")
|
|
434
|
-
|
|
435
|
-
match = PYTEST_LOCATION_RE.match(stripped)
|
|
436
|
-
if match and ERROR_RE.search(stripped):
|
|
437
|
-
self.add("pytest", f"{match.group('file')}:{match.group('line')}: {match.group('message').strip()}")
|
|
438
|
-
|
|
439
|
-
match = JEST_FILE_RE.match(stripped)
|
|
440
|
-
if match:
|
|
441
|
-
self.jest_active = True
|
|
442
|
-
self.add("jest/vitest", f"FAIL {match.group('file')}")
|
|
443
|
-
if match.group("name"):
|
|
444
|
-
self.add("jest/vitest", f"test {match.group('name')}")
|
|
445
|
-
|
|
446
|
-
if self.jest_active:
|
|
447
|
-
match = JEST_TEST_RE.match(stripped)
|
|
448
|
-
if match:
|
|
449
|
-
self.add("jest/vitest", f"test {match.group('name')}")
|
|
450
|
-
|
|
451
|
-
match = JEST_AT_RE.match(stripped)
|
|
452
|
-
if match:
|
|
453
|
-
self.add("jest/vitest", f"{match.group('file')}:{match.group('line')}:{match.group('col')}")
|
|
454
|
-
|
|
455
|
-
match = VITEST_LOCATION_RE.match(stripped)
|
|
456
|
-
if match:
|
|
457
|
-
self.add("jest/vitest", f"{match.group('file')}:{match.group('line')}:{match.group('col')}")
|
|
458
|
-
|
|
459
|
-
match = GO_FAIL_RE.match(stripped)
|
|
460
|
-
if match:
|
|
461
|
-
self.go_failed_seen = True
|
|
462
|
-
self.add("go test", f"FAIL {match.group('name')}")
|
|
463
|
-
|
|
464
|
-
match = GO_LOCATION_RE.match(stripped)
|
|
465
|
-
if self.go_failed_seen and match:
|
|
466
|
-
message = match.group("message").strip()
|
|
467
|
-
suffix = f": {message}" if message else ""
|
|
468
|
-
self.add("go test", f"{match.group('file')}:{match.group('line')}{suffix}")
|
|
469
|
-
|
|
470
|
-
match = RUST_THREAD_RE.match(stripped)
|
|
471
|
-
if match:
|
|
472
|
-
self.add(
|
|
473
|
-
"cargo test",
|
|
474
|
-
f"{match.group('name')} at {match.group('file')}:{match.group('line')}:{match.group('col')}",
|
|
475
|
-
)
|
|
476
|
-
|
|
477
|
-
def as_lines(self, max_line_chars: int, max_lines: int) -> list[str]:
|
|
478
|
-
if not self.items:
|
|
479
|
-
return []
|
|
480
|
-
if max_lines <= 0:
|
|
481
|
-
return []
|
|
482
|
-
out = ["\n--- runner failure summary ---\n"]
|
|
483
|
-
used_lines = len(out[0].splitlines())
|
|
484
|
-
for runner in sorted(self.items):
|
|
485
|
-
runner_line = f"runner={runner}\n"
|
|
486
|
-
if used_lines + 1 > max_lines:
|
|
487
|
-
break
|
|
488
|
-
out.append(runner_line)
|
|
489
|
-
used_lines += 1
|
|
490
|
-
for item in self.items[runner]:
|
|
491
|
-
if used_lines + 1 > max_lines:
|
|
492
|
-
break
|
|
493
|
-
line, _ = cap_line(f"- {item}\n", max_line_chars)
|
|
494
|
-
out.append(line)
|
|
495
|
-
used_lines += 1
|
|
496
|
-
return out
|
|
497
|
-
|
|
498
|
-
def as_dict(self) -> dict[str, list[str]]:
|
|
499
|
-
return {runner: list(items) for runner, items in sorted(self.items.items()) if items}
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
def digest_line_items(lines: Iterable[str], *, limit: int, max_line_chars: int) -> list[str]:
|
|
503
|
-
out: list[str] = []
|
|
504
|
-
seen: set[str] = set()
|
|
505
|
-
for line in lines:
|
|
506
|
-
item = strip_ansi(line).strip()
|
|
507
|
-
if not item or item in seen:
|
|
508
|
-
continue
|
|
509
|
-
capped, _ = cap_line(item, max_line_chars)
|
|
510
|
-
out.append(capped.strip())
|
|
511
|
-
seen.add(item)
|
|
512
|
-
if len(out) >= limit:
|
|
513
|
-
break
|
|
514
|
-
return out
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
class DuplicateLineTracker:
|
|
518
|
-
"""Track repeated sanitized lines without retaining unbounded unique output."""
|
|
519
|
-
|
|
520
|
-
def __init__(self, *, max_groups: int = 12, max_unique: int = 2048) -> None:
|
|
521
|
-
self.max_groups = max(0, max_groups)
|
|
522
|
-
self.max_unique = max(1, max_unique)
|
|
523
|
-
self.counts: dict[str, int] = {}
|
|
524
|
-
self.first_line: dict[str, int] = {}
|
|
525
|
-
self.overflow_unique_lines = 0
|
|
526
|
-
|
|
527
|
-
def feed(self, line_number: int, line: str) -> None:
|
|
528
|
-
text = strip_ansi(line).strip()
|
|
529
|
-
if not text:
|
|
530
|
-
return
|
|
531
|
-
if text not in self.counts:
|
|
532
|
-
if len(self.counts) >= self.max_unique:
|
|
533
|
-
self.overflow_unique_lines += 1
|
|
534
|
-
return
|
|
535
|
-
self.counts[text] = 0
|
|
536
|
-
self.first_line[text] = line_number
|
|
537
|
-
self.counts[text] += 1
|
|
538
|
-
|
|
539
|
-
def as_list(self) -> list[dict[str, object]]:
|
|
540
|
-
groups: list[dict[str, object]] = []
|
|
541
|
-
repeated = [
|
|
542
|
-
(text, count)
|
|
543
|
-
for text, count in self.counts.items()
|
|
544
|
-
if count > 1
|
|
545
|
-
]
|
|
546
|
-
for text, count in sorted(repeated, key=lambda item: (-item[1], self.first_line[item[0]], item[0]))[
|
|
547
|
-
: self.max_groups
|
|
548
|
-
]:
|
|
549
|
-
groups.append(
|
|
550
|
-
{
|
|
551
|
-
"count": count,
|
|
552
|
-
"first_line": self.first_line[text],
|
|
553
|
-
"text": text,
|
|
554
|
-
}
|
|
555
|
-
)
|
|
556
|
-
if groups and self.overflow_unique_lines:
|
|
557
|
-
groups.append(
|
|
558
|
-
{
|
|
559
|
-
"count": self.overflow_unique_lines,
|
|
560
|
-
"first_line": None,
|
|
561
|
-
"text": "[context-guard-kit] additional unique lines omitted from duplicate tracking",
|
|
562
|
-
}
|
|
563
|
-
)
|
|
564
|
-
return groups
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
def command_preview(command: list[str], sanitizer: object, max_line_chars: int) -> str:
|
|
568
|
-
try:
|
|
569
|
-
raw = shlex.join(command)
|
|
570
|
-
except Exception:
|
|
571
|
-
raw = " ".join(command)
|
|
572
|
-
sanitized, _ = sanitizer.sanitize(raw + "\n") # type: ignore[attr-defined]
|
|
573
|
-
capped, _ = cap_line(sanitized.strip(), max_line_chars)
|
|
574
|
-
return capped.strip()
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
def digest_next_queries(
|
|
578
|
-
*,
|
|
579
|
-
rc: int,
|
|
580
|
-
timed_out: bool,
|
|
581
|
-
raw_output_truncated: bool,
|
|
582
|
-
runner_items: dict[str, list[str]],
|
|
583
|
-
top_error_lines: list[str],
|
|
584
|
-
) -> list[str]:
|
|
585
|
-
if timed_out:
|
|
586
|
-
return [
|
|
587
|
-
"Inspect timeout cause first; rerun with a narrower command or higher --timeout-seconds only if needed.",
|
|
588
|
-
"If the process spawned children, check whether the wrapped command handles termination cleanly.",
|
|
589
|
-
]
|
|
590
|
-
if rc == 0:
|
|
591
|
-
if raw_output_truncated:
|
|
592
|
-
return [
|
|
593
|
-
"Treat this as success unless a specific assertion needs raw logs.",
|
|
594
|
-
"Query exact raw output only for the component named in the next task.",
|
|
595
|
-
]
|
|
596
|
-
return ["No raw output follow-up needed; command completed successfully."]
|
|
597
|
-
queries: list[str] = []
|
|
598
|
-
if runner_items:
|
|
599
|
-
queries.append("Run the failing test/node from runner_failure_summary directly with minimal verbosity.")
|
|
600
|
-
if top_error_lines:
|
|
601
|
-
queries.append("Inspect top_error_lines before rerunning the full command.")
|
|
602
|
-
if raw_output_truncated:
|
|
603
|
-
queries.append("Rerun without trim only if these failure facts are insufficient.")
|
|
604
|
-
if not queries:
|
|
605
|
-
queries.append("Rerun with a narrower command or grep for the first error before requesting raw output.")
|
|
606
|
-
return queries
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def build_failure_signature(
|
|
610
|
-
*,
|
|
611
|
-
status: str,
|
|
612
|
-
rc: int,
|
|
613
|
-
timed_out: bool,
|
|
614
|
-
runner_items: dict[str, list[str]],
|
|
615
|
-
top_error_lines: list[str],
|
|
616
|
-
) -> dict[str, object]:
|
|
617
|
-
basis: list[str] = []
|
|
618
|
-
source = "status"
|
|
619
|
-
if runner_items:
|
|
620
|
-
source = "runner_failure_summary"
|
|
621
|
-
for runner in sorted(runner_items):
|
|
622
|
-
for item in runner_items[runner]:
|
|
623
|
-
basis.append(f"{runner}: {item}")
|
|
624
|
-
if len(basis) >= 8:
|
|
625
|
-
break
|
|
626
|
-
if len(basis) >= 8:
|
|
627
|
-
break
|
|
628
|
-
elif top_error_lines:
|
|
629
|
-
source = "top_error_lines"
|
|
630
|
-
basis = top_error_lines[:8]
|
|
631
|
-
if not basis:
|
|
632
|
-
basis = [f"status={status}", f"exit_code={rc}", f"timed_out={str(timed_out).lower()}"]
|
|
633
|
-
digest = hashlib.sha256(
|
|
634
|
-
json.dumps(
|
|
635
|
-
{"status": status, "exit_code": rc, "timed_out": timed_out, "basis": basis},
|
|
636
|
-
ensure_ascii=False,
|
|
637
|
-
sort_keys=True,
|
|
638
|
-
).encode("utf-8", errors="replace")
|
|
639
|
-
).hexdigest()[:16]
|
|
640
|
-
return {
|
|
641
|
-
"hash": digest,
|
|
642
|
-
"source": source,
|
|
643
|
-
"basis": basis,
|
|
644
|
-
"exit_code": rc,
|
|
645
|
-
"timed_out": timed_out,
|
|
646
|
-
}
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
def build_digest_payload(
|
|
650
|
-
*,
|
|
651
|
-
args: argparse.Namespace,
|
|
652
|
-
command: list[str],
|
|
653
|
-
rc: int,
|
|
654
|
-
timed_out: bool,
|
|
655
|
-
total: int,
|
|
656
|
-
raw_chars: int,
|
|
657
|
-
visible_chars: int,
|
|
658
|
-
any_line_capped: bool,
|
|
659
|
-
redacted_lines: int,
|
|
660
|
-
head: list[str],
|
|
661
|
-
tail: Iterable[str],
|
|
662
|
-
error_lines: list[str],
|
|
663
|
-
runner_summary: RunnerFailureSummary,
|
|
664
|
-
line_sanitizer: object,
|
|
665
|
-
duplicate_line_groups: list[dict[str, object]] | None = None,
|
|
666
|
-
) -> dict[str, object]:
|
|
667
|
-
raw_output_truncated = total > args.max_lines or visible_chars > args.max_chars or any_line_capped
|
|
668
|
-
status = "timeout" if timed_out else ("success" if rc == 0 else "failure")
|
|
669
|
-
runner_items = runner_summary.as_dict() if rc != 0 else {}
|
|
670
|
-
top_error_lines = digest_line_items(error_lines, limit=12, max_line_chars=args.max_line_chars)
|
|
671
|
-
sample_limit = 8 if status == "success" else 10
|
|
672
|
-
tail_list = list(tail)
|
|
673
|
-
payload: dict[str, object] = {
|
|
674
|
-
"tool": "context-guard-kit.trim_command_output",
|
|
675
|
-
"digest_version": 1,
|
|
676
|
-
"status": status,
|
|
677
|
-
"exit_code": rc,
|
|
678
|
-
"timed_out": timed_out,
|
|
679
|
-
"raw_output": {
|
|
680
|
-
"lines": total,
|
|
681
|
-
"chars": raw_chars,
|
|
682
|
-
"visible_chars": visible_chars,
|
|
683
|
-
"truncated": raw_output_truncated,
|
|
684
|
-
"line_capped": any_line_capped,
|
|
685
|
-
"redacted_lines": redacted_lines,
|
|
686
|
-
},
|
|
687
|
-
"budget": {
|
|
688
|
-
"max_lines": args.max_lines,
|
|
689
|
-
"max_chars": args.max_chars,
|
|
690
|
-
"max_line_chars": args.max_line_chars,
|
|
691
|
-
},
|
|
692
|
-
"command_preview": command_preview(command, line_sanitizer, args.max_line_chars),
|
|
693
|
-
"runner_failure_summary": runner_items,
|
|
694
|
-
"top_error_lines": top_error_lines,
|
|
695
|
-
"representative_head": digest_line_items(head, limit=sample_limit, max_line_chars=args.max_line_chars),
|
|
696
|
-
"representative_tail": digest_line_items(
|
|
697
|
-
tail_list[-sample_limit:],
|
|
698
|
-
limit=sample_limit,
|
|
699
|
-
max_line_chars=args.max_line_chars,
|
|
700
|
-
),
|
|
701
|
-
}
|
|
702
|
-
if duplicate_line_groups:
|
|
703
|
-
payload["duplicate_line_groups"] = duplicate_line_groups
|
|
704
|
-
if status != "success":
|
|
705
|
-
payload["failure_signature"] = build_failure_signature(
|
|
706
|
-
status=status,
|
|
707
|
-
rc=rc,
|
|
708
|
-
timed_out=timed_out,
|
|
709
|
-
runner_items=runner_items,
|
|
710
|
-
top_error_lines=top_error_lines,
|
|
711
|
-
)
|
|
712
|
-
payload["next_queries"] = digest_next_queries(
|
|
713
|
-
rc=rc,
|
|
714
|
-
timed_out=timed_out,
|
|
715
|
-
raw_output_truncated=raw_output_truncated,
|
|
716
|
-
runner_items=runner_items,
|
|
717
|
-
top_error_lines=top_error_lines,
|
|
718
|
-
)
|
|
719
|
-
return payload
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
def markdown_artifact_receipt_lines(artifact_receipt: dict[str, object]) -> list[str]:
|
|
723
|
-
lines = [
|
|
724
|
-
"- artifact_receipt: "
|
|
725
|
-
f"stored={str(artifact_receipt.get('stored')).lower()} "
|
|
726
|
-
f"id={artifact_receipt.get('artifact_id') or artifact_receipt.get('error')}\n"
|
|
727
|
-
]
|
|
728
|
-
exact = artifact_receipt.get("exact_reexpand")
|
|
729
|
-
if isinstance(exact, dict) and exact.get("cli"):
|
|
730
|
-
lines.append(f"- exact_reexpand: `{exact.get('cli')}`\n")
|
|
731
|
-
return lines
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
def compact_markdown_artifact_receipt(payload: dict[str, object], max_chars: int) -> str:
|
|
735
|
-
artifact_receipt = payload.get("artifact_receipt")
|
|
736
|
-
if not isinstance(artifact_receipt, dict) or max_chars <= 0:
|
|
737
|
-
return ""
|
|
738
|
-
|
|
739
|
-
full = "".join(markdown_artifact_receipt_lines(artifact_receipt))
|
|
740
|
-
if len(full) <= max_chars:
|
|
741
|
-
return full
|
|
742
|
-
|
|
743
|
-
artifact_id = artifact_receipt.get("artifact_id") or artifact_receipt.get("error")
|
|
744
|
-
stored = str(artifact_receipt.get("stored")).lower()
|
|
745
|
-
exact = artifact_receipt.get("exact_reexpand")
|
|
746
|
-
exact_available = ""
|
|
747
|
-
if isinstance(exact, dict) and "available" in exact:
|
|
748
|
-
exact_available = f" exact_available={str(exact.get('available')).lower()}"
|
|
749
|
-
|
|
750
|
-
candidates = [
|
|
751
|
-
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}; raise --max-chars for full exact_reexpand\n",
|
|
752
|
-
f"- artifact_receipt: stored={stored} id={artifact_id}{exact_available}\n",
|
|
753
|
-
f"- artifact_receipt: id={artifact_id}\n",
|
|
754
|
-
]
|
|
755
|
-
for candidate in candidates:
|
|
756
|
-
if len(candidate) <= max_chars:
|
|
757
|
-
return candidate
|
|
758
|
-
return ""
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
def render_digest_markdown(payload: dict[str, object], max_chars: int) -> str:
|
|
762
|
-
raw_output = payload.get("raw_output", {})
|
|
763
|
-
budget = payload.get("budget", {})
|
|
764
|
-
lines: list[str] = []
|
|
765
|
-
non_receipt_lines: list[str] = []
|
|
766
|
-
|
|
767
|
-
def add(line: str, *, receipt: bool = False) -> None:
|
|
768
|
-
lines.append(line)
|
|
769
|
-
if not receipt:
|
|
770
|
-
non_receipt_lines.append(line)
|
|
771
|
-
|
|
772
|
-
lines.append("[context-guard-kit] semantic digest\n")
|
|
773
|
-
non_receipt_lines.append("[context-guard-kit] semantic digest\n")
|
|
774
|
-
add(f"- status: {payload.get('status')}\n")
|
|
775
|
-
add(f"- exit_code: {payload.get('exit_code')}\n")
|
|
776
|
-
add(f"- timed_out: {str(payload.get('timed_out')).lower()}\n")
|
|
777
|
-
if isinstance(raw_output, dict):
|
|
778
|
-
add(
|
|
779
|
-
"- raw_output: "
|
|
780
|
-
f"{raw_output.get('lines')} lines/{raw_output.get('chars')} chars"
|
|
781
|
-
f" (visible={raw_output.get('visible_chars')}, truncated={str(raw_output.get('truncated')).lower()})\n"
|
|
782
|
-
)
|
|
783
|
-
if raw_output.get("line_capped"):
|
|
784
|
-
add(f"- line_capped: true\n")
|
|
785
|
-
if raw_output.get("redacted_lines"):
|
|
786
|
-
add(f"- redacted_lines: {raw_output.get('redacted_lines')}\n")
|
|
787
|
-
if isinstance(budget, dict):
|
|
788
|
-
add(
|
|
789
|
-
"- budget: "
|
|
790
|
-
f"{budget.get('max_lines')} lines/{budget.get('max_chars')} chars/"
|
|
791
|
-
f"line={budget.get('max_line_chars')} chars\n"
|
|
792
|
-
)
|
|
793
|
-
if payload.get("command_preview"):
|
|
794
|
-
add(f"- command: `{payload.get('command_preview')}`\n")
|
|
795
|
-
artifact_receipt = payload.get("artifact_receipt")
|
|
796
|
-
if isinstance(artifact_receipt, dict):
|
|
797
|
-
for line in markdown_artifact_receipt_lines(artifact_receipt):
|
|
798
|
-
add(line, receipt=True)
|
|
799
|
-
failure_signature = payload.get("failure_signature")
|
|
800
|
-
if isinstance(failure_signature, dict):
|
|
801
|
-
add(
|
|
802
|
-
"- failure_signature: "
|
|
803
|
-
f"{failure_signature.get('hash')} ({failure_signature.get('source')})\n"
|
|
804
|
-
)
|
|
805
|
-
|
|
806
|
-
runner_summary = payload.get("runner_failure_summary")
|
|
807
|
-
if isinstance(runner_summary, dict) and runner_summary:
|
|
808
|
-
add("\n## runner_failure_summary\n")
|
|
809
|
-
for runner, items in sorted(runner_summary.items()):
|
|
810
|
-
add(f"- runner={runner}\n")
|
|
811
|
-
if isinstance(items, list):
|
|
812
|
-
for item in items:
|
|
813
|
-
add(f" - {item}\n")
|
|
814
|
-
|
|
815
|
-
duplicate_line_groups = payload.get("duplicate_line_groups")
|
|
816
|
-
if isinstance(duplicate_line_groups, list) and duplicate_line_groups:
|
|
817
|
-
add("\n## duplicate_line_groups\n")
|
|
818
|
-
for group in duplicate_line_groups:
|
|
819
|
-
if not isinstance(group, dict):
|
|
820
|
-
continue
|
|
821
|
-
add(
|
|
822
|
-
"- "
|
|
823
|
-
f"count={group.get('count')} "
|
|
824
|
-
f"first_line={group.get('first_line')} "
|
|
825
|
-
f"text={group.get('text')}\n"
|
|
826
|
-
)
|
|
827
|
-
|
|
828
|
-
for title, key in [
|
|
829
|
-
("top_error_lines", "top_error_lines"),
|
|
830
|
-
("representative_head", "representative_head"),
|
|
831
|
-
("representative_tail", "representative_tail"),
|
|
832
|
-
("next_queries", "next_queries"),
|
|
833
|
-
]:
|
|
834
|
-
values = payload.get(key)
|
|
835
|
-
if isinstance(values, list) and values:
|
|
836
|
-
add(f"\n## {title}\n")
|
|
837
|
-
for value in values:
|
|
838
|
-
add(f"- {value}\n")
|
|
839
|
-
|
|
840
|
-
text = "".join(lines)
|
|
841
|
-
output, capped = cap_text(text, max_chars)
|
|
842
|
-
if not capped:
|
|
843
|
-
return output
|
|
844
|
-
marker = "[context-guard-kit] digest capped by --max-chars.\n"
|
|
845
|
-
if max_chars <= len(marker):
|
|
846
|
-
return marker[:max_chars]
|
|
847
|
-
reserved_receipt = compact_markdown_artifact_receipt(payload, max_chars - len(marker))
|
|
848
|
-
if reserved_receipt:
|
|
849
|
-
head_budget = max_chars - len(marker) - len(reserved_receipt)
|
|
850
|
-
head = ""
|
|
851
|
-
if head_budget > 0:
|
|
852
|
-
non_receipt_text = "".join(non_receipt_lines)
|
|
853
|
-
text_cap_marker = f"\n[context-guard-kit] text capped: {len(non_receipt_text)} chars total\n"
|
|
854
|
-
if len(non_receipt_text) <= head_budget:
|
|
855
|
-
head = non_receipt_text
|
|
856
|
-
elif head_budget > len(text_cap_marker):
|
|
857
|
-
keep = head_budget - len(text_cap_marker)
|
|
858
|
-
head = non_receipt_text[:keep].rstrip() + text_cap_marker
|
|
859
|
-
if head and not head.endswith("\n"):
|
|
860
|
-
head += "\n"
|
|
861
|
-
return head + reserved_receipt + marker
|
|
862
|
-
output, _ = cap_text(text, max_chars - len(marker))
|
|
863
|
-
return output + marker
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
def render_digest_json(payload: dict[str, object], max_chars: int) -> str:
|
|
867
|
-
def dumps(data: dict[str, object]) -> str:
|
|
868
|
-
return json.dumps(data, ensure_ascii=False, sort_keys=True, indent=2) + "\n"
|
|
869
|
-
|
|
870
|
-
def shrink_list_to_fit(data: dict[str, object], values: list[object]) -> None:
|
|
871
|
-
if len(dumps(data)) <= max_chars:
|
|
872
|
-
return
|
|
873
|
-
lo, hi = 0, len(values)
|
|
874
|
-
best = 0
|
|
875
|
-
original = list(values)
|
|
876
|
-
while lo <= hi:
|
|
877
|
-
mid = (lo + hi) // 2
|
|
878
|
-
values[:] = original[:mid]
|
|
879
|
-
if len(dumps(data)) <= max_chars:
|
|
880
|
-
best = mid
|
|
881
|
-
lo = mid + 1
|
|
882
|
-
else:
|
|
883
|
-
hi = mid - 1
|
|
884
|
-
values[:] = original[:best]
|
|
885
|
-
|
|
886
|
-
def first_fitting(candidates: list[dict[str, object]]) -> str:
|
|
887
|
-
for candidate in candidates:
|
|
888
|
-
output = dumps(candidate)
|
|
889
|
-
if len(output) <= max_chars:
|
|
890
|
-
return output
|
|
891
|
-
return dumps(candidates[-1])
|
|
892
|
-
|
|
893
|
-
def compact_artifact_receipt(*, include_exact_reexpand: bool) -> dict[str, object] | None:
|
|
894
|
-
artifact_receipt = payload.get("artifact_receipt")
|
|
895
|
-
if not isinstance(artifact_receipt, dict):
|
|
896
|
-
return None
|
|
897
|
-
compact: dict[str, object] = {}
|
|
898
|
-
for key in ("stored", "artifact_id", "error", "bytes", "max_bytes"):
|
|
899
|
-
if key in artifact_receipt:
|
|
900
|
-
compact[key] = artifact_receipt[key]
|
|
901
|
-
stored_output = artifact_receipt.get("stored_output")
|
|
902
|
-
if isinstance(stored_output, dict):
|
|
903
|
-
compact["stored_output"] = {
|
|
904
|
-
key: stored_output[key]
|
|
905
|
-
for key in ("scope", "bytes", "lines", "sha256")
|
|
906
|
-
if key in stored_output
|
|
907
|
-
}
|
|
908
|
-
exact = artifact_receipt.get("exact_reexpand")
|
|
909
|
-
if include_exact_reexpand and isinstance(exact, dict):
|
|
910
|
-
compact["exact_reexpand"] = {
|
|
911
|
-
key: exact[key]
|
|
912
|
-
for key in ("available", "scope", "sha256", "bytes", "lines", "cli", "reason")
|
|
913
|
-
if key in exact
|
|
914
|
-
}
|
|
915
|
-
return compact
|
|
916
|
-
|
|
917
|
-
def attach_artifact_receipt(candidate: dict[str, object], artifact_receipt: dict[str, object] | None) -> dict[str, object]:
|
|
918
|
-
if artifact_receipt is not None:
|
|
919
|
-
candidate["artifact_receipt"] = artifact_receipt
|
|
920
|
-
return candidate
|
|
921
|
-
|
|
922
|
-
output = dumps(payload)
|
|
923
|
-
if len(output) <= max_chars:
|
|
924
|
-
return output
|
|
925
|
-
|
|
926
|
-
capped = json.loads(json.dumps(payload))
|
|
927
|
-
capped["digest_capped"] = True
|
|
928
|
-
for key in ("duplicate_line_groups", "representative_tail", "representative_head", "top_error_lines", "next_queries"):
|
|
929
|
-
values = capped.get(key)
|
|
930
|
-
if isinstance(values, list):
|
|
931
|
-
shrink_list_to_fit(capped, values)
|
|
932
|
-
failure_signature = capped.get("failure_signature")
|
|
933
|
-
if isinstance(failure_signature, dict):
|
|
934
|
-
basis = failure_signature.get("basis")
|
|
935
|
-
if isinstance(basis, list):
|
|
936
|
-
shrink_list_to_fit(capped, basis)
|
|
937
|
-
runner_summary = capped.get("runner_failure_summary")
|
|
938
|
-
if isinstance(runner_summary, dict):
|
|
939
|
-
for runner in sorted(runner_summary):
|
|
940
|
-
values = runner_summary.get(runner)
|
|
941
|
-
if isinstance(values, list):
|
|
942
|
-
shrink_list_to_fit(capped, values)
|
|
943
|
-
output = dumps(capped)
|
|
944
|
-
if len(output) <= max_chars:
|
|
945
|
-
return output
|
|
946
|
-
|
|
947
|
-
compact_signature: object | None = None
|
|
948
|
-
failure_signature = payload.get("failure_signature")
|
|
949
|
-
if isinstance(failure_signature, dict):
|
|
950
|
-
compact_signature = {
|
|
951
|
-
"hash": failure_signature.get("hash"),
|
|
952
|
-
"source": failure_signature.get("source"),
|
|
953
|
-
"exit_code": failure_signature.get("exit_code"),
|
|
954
|
-
"timed_out": failure_signature.get("timed_out"),
|
|
955
|
-
}
|
|
956
|
-
compact_receipt = compact_artifact_receipt(include_exact_reexpand=True)
|
|
957
|
-
minimal_receipt = compact_artifact_receipt(include_exact_reexpand=False)
|
|
958
|
-
|
|
959
|
-
return first_fitting(
|
|
960
|
-
[
|
|
961
|
-
attach_artifact_receipt(
|
|
962
|
-
{
|
|
963
|
-
"tool": payload.get("tool"),
|
|
964
|
-
"digest_version": payload.get("digest_version"),
|
|
965
|
-
"digest_capped": True,
|
|
966
|
-
"status": payload.get("status"),
|
|
967
|
-
"exit_code": payload.get("exit_code"),
|
|
968
|
-
"timed_out": payload.get("timed_out"),
|
|
969
|
-
"failure_signature": compact_signature,
|
|
970
|
-
"raw_output": payload.get("raw_output"),
|
|
971
|
-
"budget": payload.get("budget"),
|
|
972
|
-
"next_queries": ["Raise --max-chars or inspect a narrower command for details."],
|
|
973
|
-
},
|
|
974
|
-
compact_receipt,
|
|
975
|
-
),
|
|
976
|
-
attach_artifact_receipt(
|
|
977
|
-
{
|
|
978
|
-
"digest_capped": True,
|
|
979
|
-
"status": payload.get("status"),
|
|
980
|
-
"exit_code": payload.get("exit_code"),
|
|
981
|
-
"timed_out": payload.get("timed_out"),
|
|
982
|
-
"failure_signature": compact_signature,
|
|
983
|
-
"raw_output": payload.get("raw_output"),
|
|
984
|
-
"next_queries": ["Raise --max-chars or inspect a narrower command for details."],
|
|
985
|
-
},
|
|
986
|
-
compact_receipt,
|
|
987
|
-
),
|
|
988
|
-
attach_artifact_receipt(
|
|
989
|
-
{
|
|
990
|
-
"digest_capped": True,
|
|
991
|
-
"status": payload.get("status"),
|
|
992
|
-
"exit_code": payload.get("exit_code"),
|
|
993
|
-
"timed_out": payload.get("timed_out"),
|
|
994
|
-
"failure_signature": compact_signature,
|
|
995
|
-
},
|
|
996
|
-
compact_receipt,
|
|
997
|
-
),
|
|
998
|
-
attach_artifact_receipt(
|
|
999
|
-
{
|
|
1000
|
-
"digest_capped": True,
|
|
1001
|
-
"status": payload.get("status"),
|
|
1002
|
-
"exit_code": payload.get("exit_code"),
|
|
1003
|
-
"timed_out": payload.get("timed_out"),
|
|
1004
|
-
},
|
|
1005
|
-
minimal_receipt,
|
|
1006
|
-
),
|
|
1007
|
-
{"digest_capped": True},
|
|
1008
|
-
]
|
|
1009
|
-
)
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
_STREAM_END = object()
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
def process_group_exists(pgid: int) -> bool:
|
|
1016
|
-
try:
|
|
1017
|
-
os.killpg(pgid, 0)
|
|
1018
|
-
except ProcessLookupError:
|
|
1019
|
-
return False
|
|
1020
|
-
except PermissionError:
|
|
1021
|
-
return True
|
|
1022
|
-
except OSError:
|
|
1023
|
-
return False
|
|
1024
|
-
return True
|
|
1025
|
-
|
|
1026
|
-
|
|
1027
|
-
def terminate_process_tree(
|
|
1028
|
-
proc: subprocess.Popen[str],
|
|
1029
|
-
*,
|
|
1030
|
-
process_group_id: int | None = None,
|
|
1031
|
-
include_exited_group: bool = False,
|
|
1032
|
-
) -> None:
|
|
1033
|
-
if os.name != "nt":
|
|
1034
|
-
pgid = process_group_id if process_group_id is not None else proc.pid
|
|
1035
|
-
if proc.poll() is not None and not include_exited_group:
|
|
1036
|
-
return
|
|
1037
|
-
try:
|
|
1038
|
-
os.killpg(pgid, signal.SIGTERM)
|
|
1039
|
-
except ProcessLookupError:
|
|
1040
|
-
return
|
|
1041
|
-
deadline = time.monotonic() + 2
|
|
1042
|
-
while time.monotonic() < deadline:
|
|
1043
|
-
if proc.poll() is None:
|
|
1044
|
-
try:
|
|
1045
|
-
proc.wait(timeout=0.05)
|
|
1046
|
-
except subprocess.TimeoutExpired:
|
|
1047
|
-
pass
|
|
1048
|
-
if not process_group_exists(pgid):
|
|
1049
|
-
return
|
|
1050
|
-
time.sleep(0.05)
|
|
1051
|
-
try:
|
|
1052
|
-
os.killpg(pgid, signal.SIGKILL)
|
|
1053
|
-
except ProcessLookupError:
|
|
1054
|
-
return
|
|
1055
|
-
return
|
|
1056
|
-
|
|
1057
|
-
if proc.poll() is not None:
|
|
1058
|
-
return
|
|
1059
|
-
try:
|
|
1060
|
-
proc.terminate()
|
|
1061
|
-
except ProcessLookupError:
|
|
1062
|
-
return
|
|
1063
|
-
except OSError:
|
|
1064
|
-
try:
|
|
1065
|
-
proc.kill()
|
|
1066
|
-
except OSError:
|
|
1067
|
-
return
|
|
1068
|
-
try:
|
|
1069
|
-
proc.wait(timeout=2)
|
|
1070
|
-
return
|
|
1071
|
-
except subprocess.TimeoutExpired:
|
|
1072
|
-
pass
|
|
1073
|
-
try:
|
|
1074
|
-
proc.kill()
|
|
1075
|
-
except ProcessLookupError:
|
|
1076
|
-
return
|
|
1077
|
-
except OSError:
|
|
1078
|
-
return
|
|
1079
|
-
|
|
1080
|
-
|
|
1081
|
-
class TimedCommandStream:
|
|
1082
|
-
def __init__(
|
|
1083
|
-
self,
|
|
1084
|
-
proc: subprocess.Popen[str],
|
|
1085
|
-
stdout: Iterable[str],
|
|
1086
|
-
*,
|
|
1087
|
-
timeout_seconds: int,
|
|
1088
|
-
process_group_id: int | None = None,
|
|
1089
|
-
) -> None:
|
|
1090
|
-
self.proc = proc
|
|
1091
|
-
self.timeout_seconds = timeout_seconds
|
|
1092
|
-
self.process_group_id = process_group_id
|
|
1093
|
-
self.deadline = time.monotonic() + timeout_seconds
|
|
1094
|
-
self.timed_out = False
|
|
1095
|
-
self.timeout_reported = False
|
|
1096
|
-
self._stream_closed = False
|
|
1097
|
-
self._queue: queue.Queue[str | object] = queue.Queue(maxsize=1024)
|
|
1098
|
-
self._thread = threading.Thread(target=self._read_stdout, args=(stdout,), daemon=True)
|
|
1099
|
-
self._thread.start()
|
|
1100
|
-
|
|
1101
|
-
def _read_stdout(self, stdout: Iterable[str]) -> None:
|
|
1102
|
-
try:
|
|
1103
|
-
for line in stdout:
|
|
1104
|
-
self._queue.put(line)
|
|
1105
|
-
finally:
|
|
1106
|
-
self._stream_closed = True
|
|
1107
|
-
self._queue.put(_STREAM_END)
|
|
1108
|
-
|
|
1109
|
-
def timeout_message(self) -> str:
|
|
1110
|
-
return (
|
|
1111
|
-
f"[context-guard-kit] command timed out after {self.timeout_seconds}s; "
|
|
1112
|
-
"terminated wrapped process\n"
|
|
1113
|
-
)
|
|
1114
|
-
|
|
1115
|
-
def _mark_timed_out(self) -> None:
|
|
1116
|
-
if not self.timed_out:
|
|
1117
|
-
self.timed_out = True
|
|
1118
|
-
terminate_process_tree(
|
|
1119
|
-
self.proc,
|
|
1120
|
-
process_group_id=self.process_group_id,
|
|
1121
|
-
include_exited_group=True,
|
|
1122
|
-
)
|
|
1123
|
-
|
|
1124
|
-
def _timeout_line(self) -> str:
|
|
1125
|
-
self._mark_timed_out()
|
|
1126
|
-
self.timeout_reported = True
|
|
1127
|
-
return self.timeout_message()
|
|
1128
|
-
|
|
1129
|
-
def __iter__(self) -> Iterator[str]:
|
|
1130
|
-
while True:
|
|
1131
|
-
remaining = self.deadline - time.monotonic()
|
|
1132
|
-
wait_time = 0.05 if self.proc.poll() is not None or self.timed_out else min(0.05, max(0.0, remaining))
|
|
1133
|
-
try:
|
|
1134
|
-
item = self._queue.get(timeout=wait_time)
|
|
1135
|
-
except queue.Empty:
|
|
1136
|
-
if remaining <= 0 and not self._stream_closed:
|
|
1137
|
-
if not self.timeout_reported:
|
|
1138
|
-
yield self._timeout_line()
|
|
1139
|
-
break
|
|
1140
|
-
continue
|
|
1141
|
-
if item is _STREAM_END:
|
|
1142
|
-
break
|
|
1143
|
-
if not isinstance(item, str):
|
|
1144
|
-
continue
|
|
1145
|
-
yield item
|
|
1146
|
-
if not self._stream_closed and time.monotonic() >= self.deadline:
|
|
1147
|
-
if not self.timeout_reported:
|
|
1148
|
-
yield self._timeout_line()
|
|
1149
|
-
break
|
|
1150
|
-
|
|
1151
|
-
def returncode(self) -> int:
|
|
1152
|
-
if self.timed_out:
|
|
1153
|
-
return TIMEOUT_EXIT_CODE
|
|
1154
|
-
remaining = self.deadline - time.monotonic()
|
|
1155
|
-
try:
|
|
1156
|
-
return self.proc.wait(timeout=max(0.0, remaining))
|
|
1157
|
-
except subprocess.TimeoutExpired:
|
|
1158
|
-
self._mark_timed_out()
|
|
1159
|
-
return TIMEOUT_EXIT_CODE
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
def process_group_id_for(proc: subprocess.Popen[str]) -> int | None:
|
|
1163
|
-
if os.name == "nt":
|
|
1164
|
-
return None
|
|
1165
|
-
try:
|
|
1166
|
-
return os.getpgid(proc.pid)
|
|
1167
|
-
except ProcessLookupError:
|
|
1168
|
-
# start_new_session=True makes the child the group leader; if it exits
|
|
1169
|
-
# before getpgid(), the group id is still the leader pid while inherited
|
|
1170
|
-
# stdout descendants remain alive.
|
|
1171
|
-
return proc.pid
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
def main() -> int:
|
|
1175
|
-
parser = argparse.ArgumentParser()
|
|
1176
|
-
parser.add_argument("--max-lines", type=int, default=220)
|
|
1177
|
-
parser.add_argument("--max-chars", type=int, default=20000)
|
|
1178
|
-
parser.add_argument("--max-line-chars", type=int, default=4000)
|
|
1179
|
-
parser.add_argument("--head-lines", type=int, default=40)
|
|
1180
|
-
parser.add_argument("--tail-lines", type=int, default=80)
|
|
1181
|
-
parser.add_argument("--error-lines", type=int, default=120)
|
|
1182
|
-
parser.add_argument(
|
|
1183
|
-
"--runner-summary-items",
|
|
1184
|
-
type=int,
|
|
1185
|
-
default=12,
|
|
1186
|
-
help="maximum runner-specific failure facts to keep per detected runner (0 disables)",
|
|
1187
|
-
)
|
|
1188
|
-
parser.add_argument(
|
|
1189
|
-
"--show-paths",
|
|
1190
|
-
action="store_true",
|
|
1191
|
-
help="show raw absolute paths in output instead of basename#path:<hash>; local debugging only because private paths may be exposed",
|
|
1192
|
-
)
|
|
1193
|
-
parser.add_argument(
|
|
1194
|
-
"--timeout-seconds",
|
|
1195
|
-
type=int,
|
|
1196
|
-
default=DEFAULT_TIMEOUT_SECONDS,
|
|
1197
|
-
help=(
|
|
1198
|
-
"maximum runtime for wrapped commands before terminating the process group "
|
|
1199
|
-
f"(default: {DEFAULT_TIMEOUT_SECONDS}, max: {MAX_TIMEOUT_SECONDS})"
|
|
1200
|
-
),
|
|
1201
|
-
)
|
|
1202
|
-
parser.add_argument(
|
|
1203
|
-
"--digest",
|
|
1204
|
-
choices=("off", "markdown", "json"),
|
|
1205
|
-
default="off",
|
|
1206
|
-
help=(
|
|
1207
|
-
"emit an opt-in semantic digest instead of raw/trimmed logs "
|
|
1208
|
-
"(default: off; formats: markdown, json)"
|
|
1209
|
-
),
|
|
1210
|
-
)
|
|
1211
|
-
parser.add_argument(
|
|
1212
|
-
"--artifact-receipt",
|
|
1213
|
-
action="store_true",
|
|
1214
|
-
help=(
|
|
1215
|
-
"with --digest, store the exact sanitized full output as a local "
|
|
1216
|
-
"context-guard-artifact receipt and include re-expand metadata"
|
|
1217
|
-
),
|
|
1218
|
-
)
|
|
1219
|
-
parser.add_argument(
|
|
1220
|
-
"--artifact-dir",
|
|
1221
|
-
default=".context-guard/artifacts",
|
|
1222
|
-
help="artifact receipt directory used by --artifact-receipt (default: .context-guard/artifacts)",
|
|
1223
|
-
)
|
|
1224
|
-
parser.add_argument(
|
|
1225
|
-
"--artifact-max-bytes",
|
|
1226
|
-
type=int,
|
|
1227
|
-
default=DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
|
|
1228
|
-
help=(
|
|
1229
|
-
"maximum sanitized output bytes eligible for --artifact-receipt "
|
|
1230
|
-
f"(default: {DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES}, max: {MAX_ARTIFACT_RECEIPT_MAX_BYTES})"
|
|
1231
|
-
),
|
|
1232
|
-
)
|
|
1233
|
-
parser.add_argument("command", nargs=argparse.REMAINDER)
|
|
1234
|
-
args = parser.parse_args()
|
|
1235
|
-
normalize_budgets(args)
|
|
1236
|
-
args.artifact_max_bytes = bounded_int(
|
|
1237
|
-
args.artifact_max_bytes,
|
|
1238
|
-
DEFAULT_ARTIFACT_RECEIPT_MAX_BYTES,
|
|
1239
|
-
1,
|
|
1240
|
-
MAX_ARTIFACT_RECEIPT_MAX_BYTES,
|
|
1241
|
-
)
|
|
1242
|
-
if args.artifact_receipt and args.digest == "off":
|
|
1243
|
-
print("trim_command_output.py: --artifact-receipt requires --digest markdown or --digest json", file=sys.stderr)
|
|
1244
|
-
return 2
|
|
1245
|
-
|
|
1246
|
-
command = args.command
|
|
1247
|
-
if command and command[0] == "--":
|
|
1248
|
-
command = command[1:]
|
|
1249
|
-
if not command:
|
|
1250
|
-
print("trim_command_output.py: missing command", file=sys.stderr)
|
|
1251
|
-
return 2
|
|
1252
|
-
|
|
1253
|
-
popen_kwargs: dict[str, object] = {}
|
|
1254
|
-
if os.name != "nt":
|
|
1255
|
-
popen_kwargs["start_new_session"] = True
|
|
1256
|
-
try:
|
|
1257
|
-
proc = subprocess.Popen(
|
|
1258
|
-
command,
|
|
1259
|
-
stdout=subprocess.PIPE,
|
|
1260
|
-
stderr=subprocess.STDOUT,
|
|
1261
|
-
text=True,
|
|
1262
|
-
bufsize=1,
|
|
1263
|
-
errors="replace",
|
|
1264
|
-
**popen_kwargs,
|
|
1265
|
-
)
|
|
1266
|
-
except OSError as exc:
|
|
1267
|
-
print(f"context-guard-kit: command failed to start: {exc}", file=sys.stderr)
|
|
1268
|
-
return 127
|
|
1269
|
-
|
|
1270
|
-
all_lines: list[str] = []
|
|
1271
|
-
head: list[str] = []
|
|
1272
|
-
tail: collections.deque[str] = collections.deque(maxlen=args.tail_lines)
|
|
1273
|
-
error_lines: list[str] = []
|
|
1274
|
-
total = 0
|
|
1275
|
-
raw_chars = 0
|
|
1276
|
-
visible_chars = 0
|
|
1277
|
-
any_line_capped = False
|
|
1278
|
-
runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
|
|
1279
|
-
line_sanitizer = load_line_sanitizer(args.show_paths)
|
|
1280
|
-
duplicate_tracker = DuplicateLineTracker()
|
|
1281
|
-
redacted_lines = 0
|
|
1282
|
-
artifact_lines: list[str] = []
|
|
1283
|
-
artifact_capture_bytes = 0
|
|
1284
|
-
artifact_capture_overflow = False
|
|
1285
|
-
|
|
1286
|
-
if proc.stdout is None:
|
|
1287
|
-
print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
|
|
1288
|
-
return 1
|
|
1289
|
-
command_stream = TimedCommandStream(
|
|
1290
|
-
proc,
|
|
1291
|
-
proc.stdout,
|
|
1292
|
-
timeout_seconds=args.timeout_seconds,
|
|
1293
|
-
process_group_id=process_group_id_for(proc),
|
|
1294
|
-
)
|
|
1295
|
-
for line in command_stream:
|
|
1296
|
-
total += 1
|
|
1297
|
-
raw_chars += len(line)
|
|
1298
|
-
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1299
|
-
if redacted:
|
|
1300
|
-
redacted_lines += 1
|
|
1301
|
-
artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
|
|
1302
|
-
capture_enabled=args.artifact_receipt,
|
|
1303
|
-
sanitized_line=visible_source,
|
|
1304
|
-
artifact_lines=artifact_lines,
|
|
1305
|
-
capture_bytes=artifact_capture_bytes,
|
|
1306
|
-
capture_overflow=artifact_capture_overflow,
|
|
1307
|
-
max_bytes=args.artifact_max_bytes,
|
|
1308
|
-
)
|
|
1309
|
-
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1310
|
-
any_line_capped = any_line_capped or line_capped
|
|
1311
|
-
visible_chars += len(visible_line)
|
|
1312
|
-
duplicate_tracker.feed(total, visible_line)
|
|
1313
|
-
if total <= args.head_lines:
|
|
1314
|
-
head.append(visible_line)
|
|
1315
|
-
tail.append(visible_line)
|
|
1316
|
-
if ERROR_RE.search(visible_line) and len(error_lines) < args.error_lines:
|
|
1317
|
-
error_lines.append(visible_line)
|
|
1318
|
-
runner_summary.feed(line)
|
|
1319
|
-
if total <= args.max_lines:
|
|
1320
|
-
all_lines.append(visible_line)
|
|
1321
|
-
|
|
1322
|
-
rc = command_stream.returncode()
|
|
1323
|
-
if command_stream.timed_out and not command_stream.timeout_reported:
|
|
1324
|
-
line = command_stream.timeout_message()
|
|
1325
|
-
command_stream.timeout_reported = True
|
|
1326
|
-
total += 1
|
|
1327
|
-
raw_chars += len(line)
|
|
1328
|
-
visible_source, redacted = line_sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
1329
|
-
if redacted:
|
|
1330
|
-
redacted_lines += 1
|
|
1331
|
-
artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
|
|
1332
|
-
capture_enabled=args.artifact_receipt,
|
|
1333
|
-
sanitized_line=visible_source,
|
|
1334
|
-
artifact_lines=artifact_lines,
|
|
1335
|
-
capture_bytes=artifact_capture_bytes,
|
|
1336
|
-
capture_overflow=artifact_capture_overflow,
|
|
1337
|
-
max_bytes=args.artifact_max_bytes,
|
|
1338
|
-
)
|
|
1339
|
-
visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
|
|
1340
|
-
any_line_capped = any_line_capped or line_capped
|
|
1341
|
-
visible_chars += len(visible_line)
|
|
1342
|
-
duplicate_tracker.feed(total, visible_line)
|
|
1343
|
-
if total <= args.head_lines:
|
|
1344
|
-
head.append(visible_line)
|
|
1345
|
-
tail.append(visible_line)
|
|
1346
|
-
if ERROR_RE.search(visible_line) and len(error_lines) < args.error_lines:
|
|
1347
|
-
error_lines.append(visible_line)
|
|
1348
|
-
runner_summary.feed(line)
|
|
1349
|
-
if total <= args.max_lines:
|
|
1350
|
-
all_lines.append(visible_line)
|
|
1351
|
-
|
|
1352
|
-
if args.digest != "off":
|
|
1353
|
-
payload = build_digest_payload(
|
|
1354
|
-
args=args,
|
|
1355
|
-
command=command,
|
|
1356
|
-
rc=rc,
|
|
1357
|
-
timed_out=command_stream.timed_out,
|
|
1358
|
-
total=total,
|
|
1359
|
-
raw_chars=raw_chars,
|
|
1360
|
-
visible_chars=visible_chars,
|
|
1361
|
-
any_line_capped=any_line_capped,
|
|
1362
|
-
redacted_lines=redacted_lines,
|
|
1363
|
-
head=head,
|
|
1364
|
-
tail=list(tail),
|
|
1365
|
-
error_lines=error_lines,
|
|
1366
|
-
runner_summary=runner_summary,
|
|
1367
|
-
line_sanitizer=line_sanitizer,
|
|
1368
|
-
duplicate_line_groups=duplicate_tracker.as_list(),
|
|
1369
|
-
)
|
|
1370
|
-
if args.artifact_receipt:
|
|
1371
|
-
if artifact_capture_overflow:
|
|
1372
|
-
payload["artifact_receipt"] = {
|
|
1373
|
-
"stored": False,
|
|
1374
|
-
"error": "sanitized_output_exceeds_artifact_max_bytes",
|
|
1375
|
-
"max_bytes": args.artifact_max_bytes,
|
|
1376
|
-
"exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
|
|
1377
|
-
}
|
|
1378
|
-
else:
|
|
1379
|
-
try:
|
|
1380
|
-
payload["artifact_receipt"] = store_sanitized_artifact_receipt(
|
|
1381
|
-
sanitized_text="".join(artifact_lines),
|
|
1382
|
-
command=command,
|
|
1383
|
-
args=args,
|
|
1384
|
-
line_sanitizer=line_sanitizer,
|
|
1385
|
-
redacted_lines=redacted_lines,
|
|
1386
|
-
)
|
|
1387
|
-
except Exception as exc:
|
|
1388
|
-
payload["artifact_receipt"] = {
|
|
1389
|
-
"stored": False,
|
|
1390
|
-
"error": "artifact_receipt_unavailable",
|
|
1391
|
-
"reason": f"{exc.__class__.__name__}: {exc}",
|
|
1392
|
-
"exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
|
|
1393
|
-
}
|
|
1394
|
-
if args.digest == "json":
|
|
1395
|
-
sys.stdout.write(render_digest_json(payload, args.max_chars))
|
|
1396
|
-
else:
|
|
1397
|
-
sys.stdout.write(render_digest_markdown(payload, args.max_chars))
|
|
1398
|
-
return rc
|
|
1399
|
-
|
|
1400
|
-
if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
|
|
1401
|
-
sys.stdout.writelines(all_lines)
|
|
1402
|
-
else:
|
|
1403
|
-
head_budget = min(args.head_lines, max(1, args.max_lines // 4))
|
|
1404
|
-
tail_budget = min(args.tail_lines, max(1, args.max_lines // 3))
|
|
1405
|
-
head_out = head[:head_budget]
|
|
1406
|
-
tail_out = [line for line in list(tail)[-tail_budget:] if line not in set(head_out)]
|
|
1407
|
-
remaining = max(0, args.max_lines - len(head_out) - len(tail_out))
|
|
1408
|
-
error_out = unique_keep_order(error_lines)[:remaining]
|
|
1409
|
-
|
|
1410
|
-
parts: list[str] = []
|
|
1411
|
-
parts.append(
|
|
1412
|
-
f"[context-guard-kit] output trimmed: {total} lines/{raw_chars} chars "
|
|
1413
|
-
f"-> budget about {args.max_lines} log lines/{args.max_chars} chars\n"
|
|
1414
|
-
)
|
|
1415
|
-
parts.append(f"[context-guard-kit] command exit_code={rc}\n")
|
|
1416
|
-
if any_line_capped:
|
|
1417
|
-
parts.append(f"[context-guard-kit] one or more lines were capped at {args.max_line_chars} chars\n")
|
|
1418
|
-
if redacted_lines:
|
|
1419
|
-
parts.append(f"[context-guard-kit] redacted_lines={redacted_lines}\n")
|
|
1420
|
-
summary_budget = max(0, min(args.max_lines, max(4, args.max_lines // 3))) if args.max_lines > 0 else 0
|
|
1421
|
-
runner_lines = runner_summary.as_lines(args.max_line_chars, summary_budget) if rc != 0 else []
|
|
1422
|
-
summary_line_count = len("".join(runner_lines).splitlines())
|
|
1423
|
-
remaining_log_budget = max(0, args.max_lines - summary_line_count)
|
|
1424
|
-
|
|
1425
|
-
parts.extend(runner_lines)
|
|
1426
|
-
parts.append("\n--- head ---\n")
|
|
1427
|
-
if remaining_log_budget > 0:
|
|
1428
|
-
head_out = head_out[:remaining_log_budget]
|
|
1429
|
-
parts.extend(head_out)
|
|
1430
|
-
remaining_log_budget -= len(head_out)
|
|
1431
|
-
if error_out:
|
|
1432
|
-
parts.append("\n--- matched error/failure lines ---\n")
|
|
1433
|
-
error_out = error_out[:remaining_log_budget]
|
|
1434
|
-
parts.extend(error_out)
|
|
1435
|
-
remaining_log_budget -= len(error_out)
|
|
1436
|
-
parts.append("\n--- tail ---\n")
|
|
1437
|
-
if remaining_log_budget > 0:
|
|
1438
|
-
parts.extend(tail_out[-remaining_log_budget:])
|
|
1439
|
-
parts.append("\n[context-guard-kit] rerun the command without trim only if more context is essential.\n")
|
|
1440
|
-
output, capped = cap_text("".join(parts), args.max_chars)
|
|
1441
|
-
if capped:
|
|
1442
|
-
output += "[context-guard-kit] final summary was capped by --max-chars.\n"
|
|
1443
|
-
sys.stdout.write(output)
|
|
1444
|
-
|
|
1445
|
-
return rc
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
if __name__ == "__main__":
|
|
1449
|
-
raise SystemExit(main())
|