@ictechgy/context-guard 0.4.9 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +16 -0
- package/README.ko.md +41 -24
- package/README.md +66 -26
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +21 -13
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +99 -18
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,637 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Validate and apply bounded declarative command-output filters.
|
|
3
|
-
|
|
4
|
-
This helper is intentionally opt-in. User filter configs live outside package
|
|
5
|
-
code and invalid/no-match/failure cases pass command output through rather than
|
|
6
|
-
risk hiding evidence.
|
|
7
|
-
"""
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import argparse
|
|
11
|
-
import codecs
|
|
12
|
-
from dataclasses import dataclass
|
|
13
|
-
import json
|
|
14
|
-
import os
|
|
15
|
-
from pathlib import Path
|
|
16
|
-
import re
|
|
17
|
-
import shlex
|
|
18
|
-
import signal
|
|
19
|
-
import subprocess
|
|
20
|
-
import sys
|
|
21
|
-
import threading
|
|
22
|
-
import time
|
|
23
|
-
from typing import Any, Iterable
|
|
24
|
-
|
|
25
|
-
SCHEMA_VERSION = "contextguard.filter-dsl.v1"
|
|
26
|
-
TOOL_NAME = "context-guard-filter"
|
|
27
|
-
MAX_CONFIG_BYTES = 1_000_000
|
|
28
|
-
MAX_FILTERS = 100
|
|
29
|
-
MAX_REGEXES_PER_FILTER = 20
|
|
30
|
-
MAX_REGEX_CHARS = 500
|
|
31
|
-
MAX_ARG_PARTS = 64
|
|
32
|
-
MAX_ARG_CHARS = 200
|
|
33
|
-
DEFAULT_MAX_CAPTURE_BYTES = 5_000_000
|
|
34
|
-
MAX_CAPTURE_BYTES_LIMIT = 50_000_000
|
|
35
|
-
DEFAULT_MAX_LINE_CHARS = 100_000
|
|
36
|
-
MAX_LINE_CHARS_LIMIT = 1_000_000
|
|
37
|
-
MAX_EMIT_LINES = 5_000
|
|
38
|
-
DEFAULT_TIMEOUT_SECONDS = 600
|
|
39
|
-
MAX_TIMEOUT_SECONDS = 86_400
|
|
40
|
-
TIMEOUT_EXIT_CODE = 124
|
|
41
|
-
TIMEOUT_PIPE_DRAIN_GRACE_SECONDS = 5.0
|
|
42
|
-
PIPE_THREAD_CLOSE_GRACE_SECONDS = 1.0
|
|
43
|
-
FILTER_KEYS = {"id", "match", "passthrough_on_exit", "include_regex", "exclude_regex", "head_lines", "tail_lines", "max_lines"}
|
|
44
|
-
MATCH_KEYS = {"argv_prefix", "argv_regex"}
|
|
45
|
-
PROTECTED_BASENAMES = {
|
|
46
|
-
"git",
|
|
47
|
-
"gh",
|
|
48
|
-
"pytest",
|
|
49
|
-
"ruff",
|
|
50
|
-
"mypy",
|
|
51
|
-
"eslint",
|
|
52
|
-
"vitest",
|
|
53
|
-
"jest",
|
|
54
|
-
}
|
|
55
|
-
PROTECTED_NPM_TASKS = {"test", "lint"}
|
|
56
|
-
PROTECTED_PYTHON_MODULES = {"pytest", "ruff", "mypy"}
|
|
57
|
-
PROTECTED_DIRECT_NAMES = {"pytest", "ruff", "mypy", "eslint", "vitest", "jest", "tox"}
|
|
58
|
-
PROTECTED_INTENT_TOKENS = {"test", "tests", "lint", "clippy"}
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@dataclass(frozen=True)
|
|
62
|
-
class CompiledFilter:
|
|
63
|
-
id: str
|
|
64
|
-
argv_prefix: tuple[str, ...] | None
|
|
65
|
-
argv_regex: re.Pattern[str] | None
|
|
66
|
-
passthrough_on_exit: bool
|
|
67
|
-
include_regex: tuple[re.Pattern[str], ...]
|
|
68
|
-
exclude_regex: tuple[re.Pattern[str], ...]
|
|
69
|
-
head_lines: int | None
|
|
70
|
-
tail_lines: int | None
|
|
71
|
-
max_lines: int | None
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
75
|
-
try:
|
|
76
|
-
number = int(value)
|
|
77
|
-
except (TypeError, ValueError, OverflowError):
|
|
78
|
-
return default
|
|
79
|
-
return min(max(number, minimum), maximum)
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
def compact(text: str, limit: int = 160) -> str:
|
|
83
|
-
text = " ".join(str(text).split())
|
|
84
|
-
if len(text) <= limit:
|
|
85
|
-
return text
|
|
86
|
-
return text[: max(0, limit - 20)] + f"…[trimmed:{len(text)}]"
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
def read_json_limited(path: Path) -> tuple[Any | None, list[str]]:
|
|
90
|
-
try:
|
|
91
|
-
size = path.stat().st_size
|
|
92
|
-
if size > MAX_CONFIG_BYTES:
|
|
93
|
-
return None, [f"config file too large: {size}>{MAX_CONFIG_BYTES} bytes"]
|
|
94
|
-
raw = path.read_text(encoding="utf-8")
|
|
95
|
-
except OSError as exc:
|
|
96
|
-
return None, [f"could not read config: {exc.strerror or exc.__class__.__name__}"]
|
|
97
|
-
try:
|
|
98
|
-
return json.loads(raw), []
|
|
99
|
-
except json.JSONDecodeError as exc:
|
|
100
|
-
return None, [f"invalid JSON at line {exc.lineno}: {exc.msg}"]
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def validate_str_list(value: Any, *, field: str, errors: list[str], max_items: int = MAX_REGEXES_PER_FILTER) -> list[str]:
|
|
104
|
-
if value is None:
|
|
105
|
-
return []
|
|
106
|
-
if not isinstance(value, list):
|
|
107
|
-
errors.append(f"{field} must be a list")
|
|
108
|
-
return []
|
|
109
|
-
if len(value) > max_items:
|
|
110
|
-
errors.append(f"{field} has too many items: {len(value)}>{max_items}")
|
|
111
|
-
out: list[str] = []
|
|
112
|
-
for idx, item in enumerate(value[:max_items]):
|
|
113
|
-
if not isinstance(item, str) or not item.strip():
|
|
114
|
-
errors.append(f"{field}[{idx}] must be a non-empty string")
|
|
115
|
-
continue
|
|
116
|
-
if len(item) > MAX_REGEX_CHARS:
|
|
117
|
-
errors.append(f"{field}[{idx}] exceeds {MAX_REGEX_CHARS} chars")
|
|
118
|
-
continue
|
|
119
|
-
out.append(item)
|
|
120
|
-
return out
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def compile_regexes(patterns: Iterable[str], *, field: str, errors: list[str]) -> tuple[re.Pattern[str], ...]:
|
|
124
|
-
compiled: list[re.Pattern[str]] = []
|
|
125
|
-
for idx, pattern in enumerate(patterns):
|
|
126
|
-
try:
|
|
127
|
-
compiled.append(re.compile(pattern))
|
|
128
|
-
except re.error as exc:
|
|
129
|
-
errors.append(f"{field}[{idx}] invalid regex: {compact(str(exc), 120)}")
|
|
130
|
-
return tuple(compiled)
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
def bounded_optional_int(raw: Any, *, field: str, errors: list[str], minimum: int = 0) -> int | None:
|
|
134
|
-
if raw is None:
|
|
135
|
-
return None
|
|
136
|
-
if not isinstance(raw, int) or isinstance(raw, bool):
|
|
137
|
-
errors.append(f"{field} must be an integer")
|
|
138
|
-
return None
|
|
139
|
-
if raw < minimum or raw > MAX_EMIT_LINES:
|
|
140
|
-
errors.append(f"{field} out of bounds: {minimum}..{MAX_EMIT_LINES}")
|
|
141
|
-
return None
|
|
142
|
-
return raw
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
def validate_config(raw: Any) -> tuple[list[CompiledFilter], list[str]]:
|
|
146
|
-
errors: list[str] = []
|
|
147
|
-
if not isinstance(raw, dict):
|
|
148
|
-
return [], ["config root must be a JSON object"]
|
|
149
|
-
unknown_root = sorted(set(raw) - {"schema_version", "filters"})
|
|
150
|
-
if unknown_root:
|
|
151
|
-
errors.append(f"unknown root keys: {', '.join(unknown_root)}")
|
|
152
|
-
if raw.get("schema_version") != SCHEMA_VERSION:
|
|
153
|
-
errors.append(f"schema_version must be {SCHEMA_VERSION}")
|
|
154
|
-
filters_raw = raw.get("filters")
|
|
155
|
-
if not isinstance(filters_raw, list) or not filters_raw:
|
|
156
|
-
errors.append("filters must be a non-empty list")
|
|
157
|
-
return [], errors
|
|
158
|
-
if len(filters_raw) > MAX_FILTERS:
|
|
159
|
-
errors.append(f"filters has too many items: {len(filters_raw)}>{MAX_FILTERS}")
|
|
160
|
-
seen_ids: set[str] = set()
|
|
161
|
-
compiled: list[CompiledFilter] = []
|
|
162
|
-
for idx, item in enumerate(filters_raw[:MAX_FILTERS]):
|
|
163
|
-
prefix = f"filters[{idx}]"
|
|
164
|
-
if not isinstance(item, dict):
|
|
165
|
-
errors.append(f"{prefix} must be an object")
|
|
166
|
-
continue
|
|
167
|
-
unknown = sorted(set(item) - FILTER_KEYS)
|
|
168
|
-
if unknown:
|
|
169
|
-
errors.append(f"{prefix} unknown keys: {', '.join(unknown)}")
|
|
170
|
-
fid = item.get("id")
|
|
171
|
-
if not isinstance(fid, str) or not re.fullmatch(r"[A-Za-z0-9._-]{1,80}", fid):
|
|
172
|
-
errors.append(f"{prefix}.id must match [A-Za-z0-9._-] and be <=80 chars")
|
|
173
|
-
fid = f"invalid-{idx}"
|
|
174
|
-
elif fid in seen_ids:
|
|
175
|
-
errors.append(f"{prefix}.id duplicates {fid}")
|
|
176
|
-
seen_ids.add(str(fid))
|
|
177
|
-
match = item.get("match")
|
|
178
|
-
argv_prefix: tuple[str, ...] | None = None
|
|
179
|
-
argv_regex: re.Pattern[str] | None = None
|
|
180
|
-
if not isinstance(match, dict):
|
|
181
|
-
errors.append(f"{prefix}.match must be an object")
|
|
182
|
-
else:
|
|
183
|
-
unknown_match = sorted(set(match) - MATCH_KEYS)
|
|
184
|
-
if unknown_match:
|
|
185
|
-
errors.append(f"{prefix}.match unknown keys: {', '.join(unknown_match)}")
|
|
186
|
-
if "argv_prefix" in match:
|
|
187
|
-
parts = validate_str_list(match.get("argv_prefix"), field=f"{prefix}.match.argv_prefix", errors=errors, max_items=MAX_ARG_PARTS)
|
|
188
|
-
for part_idx, part in enumerate(parts):
|
|
189
|
-
if len(part) > MAX_ARG_CHARS:
|
|
190
|
-
errors.append(f"{prefix}.match.argv_prefix[{part_idx}] exceeds {MAX_ARG_CHARS} chars")
|
|
191
|
-
if parts:
|
|
192
|
-
argv_prefix = tuple(parts)
|
|
193
|
-
if "argv_regex" in match:
|
|
194
|
-
pattern = match.get("argv_regex")
|
|
195
|
-
if not isinstance(pattern, str) or not pattern.strip():
|
|
196
|
-
errors.append(f"{prefix}.match.argv_regex must be a non-empty string")
|
|
197
|
-
elif len(pattern) > MAX_REGEX_CHARS:
|
|
198
|
-
errors.append(f"{prefix}.match.argv_regex exceeds {MAX_REGEX_CHARS} chars")
|
|
199
|
-
else:
|
|
200
|
-
compiled_argv_regex = compile_regexes([pattern], field=f"{prefix}.match.argv_regex", errors=errors)
|
|
201
|
-
argv_regex = compiled_argv_regex[0] if compiled_argv_regex else None
|
|
202
|
-
if not argv_prefix and argv_regex is None:
|
|
203
|
-
errors.append(f"{prefix}.match requires argv_prefix or argv_regex")
|
|
204
|
-
passthrough = item.get("passthrough_on_exit", True)
|
|
205
|
-
if not isinstance(passthrough, bool):
|
|
206
|
-
errors.append(f"{prefix}.passthrough_on_exit must be boolean")
|
|
207
|
-
passthrough = True
|
|
208
|
-
include = validate_str_list(item.get("include_regex"), field=f"{prefix}.include_regex", errors=errors)
|
|
209
|
-
exclude = validate_str_list(item.get("exclude_regex"), field=f"{prefix}.exclude_regex", errors=errors)
|
|
210
|
-
if len(include) + len(exclude) > MAX_REGEXES_PER_FILTER:
|
|
211
|
-
errors.append(f"{prefix} has too many regexes: {len(include) + len(exclude)}>{MAX_REGEXES_PER_FILTER}")
|
|
212
|
-
head = bounded_optional_int(item.get("head_lines"), field=f"{prefix}.head_lines", errors=errors)
|
|
213
|
-
tail = bounded_optional_int(item.get("tail_lines"), field=f"{prefix}.tail_lines", errors=errors)
|
|
214
|
-
max_lines = bounded_optional_int(item.get("max_lines"), field=f"{prefix}.max_lines", errors=errors, minimum=1)
|
|
215
|
-
compiled.append(CompiledFilter(
|
|
216
|
-
id=str(fid),
|
|
217
|
-
argv_prefix=argv_prefix,
|
|
218
|
-
argv_regex=argv_regex,
|
|
219
|
-
passthrough_on_exit=passthrough,
|
|
220
|
-
include_regex=compile_regexes(include, field=f"{prefix}.include_regex", errors=errors),
|
|
221
|
-
exclude_regex=compile_regexes(exclude, field=f"{prefix}.exclude_regex", errors=errors),
|
|
222
|
-
head_lines=head,
|
|
223
|
-
tail_lines=tail,
|
|
224
|
-
max_lines=max_lines,
|
|
225
|
-
))
|
|
226
|
-
return compiled, errors
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def load_filters(path: Path) -> tuple[list[CompiledFilter], list[str]]:
|
|
230
|
-
raw, read_errors = read_json_limited(path)
|
|
231
|
-
if read_errors:
|
|
232
|
-
return [], read_errors
|
|
233
|
-
return validate_config(raw)
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
def command_text(argv: list[str]) -> str:
|
|
237
|
-
try:
|
|
238
|
-
return shlex.join(argv)
|
|
239
|
-
except Exception:
|
|
240
|
-
return " ".join(argv)
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
def filter_matches(flt: CompiledFilter, argv: list[str]) -> bool:
|
|
244
|
-
if flt.argv_prefix is not None and tuple(argv[: len(flt.argv_prefix)]) == flt.argv_prefix:
|
|
245
|
-
return True
|
|
246
|
-
if flt.argv_regex is not None and flt.argv_regex.search(command_text(argv)):
|
|
247
|
-
return True
|
|
248
|
-
return False
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
def basename(arg: str) -> str:
|
|
252
|
-
return Path(arg).name.lower()
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
def argv_signal_tokens(argv: list[str]) -> set[str]:
|
|
256
|
-
tokens: set[str] = set()
|
|
257
|
-
for arg in argv:
|
|
258
|
-
lowered = basename(arg)
|
|
259
|
-
if lowered:
|
|
260
|
-
tokens.add(lowered)
|
|
261
|
-
tokens.update(part for part in re.split(r"[^a-z0-9]+", lowered) if part)
|
|
262
|
-
return tokens
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
def has_test_lint_signal(argv: list[str]) -> bool:
|
|
266
|
-
tokens = argv_signal_tokens(argv)
|
|
267
|
-
return bool(tokens & PROTECTED_DIRECT_NAMES or tokens & PROTECTED_INTENT_TOKENS)
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
def is_protected_command(argv: list[str]) -> bool:
|
|
271
|
-
if not argv:
|
|
272
|
-
return False
|
|
273
|
-
first = basename(argv[0])
|
|
274
|
-
if first in PROTECTED_BASENAMES:
|
|
275
|
-
return True
|
|
276
|
-
if first in {"python", "python3"} and len(argv) >= 3 and argv[1] == "-m" and basename(argv[2]) in PROTECTED_PYTHON_MODULES:
|
|
277
|
-
return True
|
|
278
|
-
if first in {"npm", "pnpm", "yarn"} and len(argv) >= 2:
|
|
279
|
-
if argv[1] in PROTECTED_NPM_TASKS:
|
|
280
|
-
return True
|
|
281
|
-
if len(argv) >= 3 and argv[1] == "run" and has_test_lint_signal(argv[2:]):
|
|
282
|
-
return True
|
|
283
|
-
if len(argv) >= 3 and argv[1] in {"exec", "x", "dlx"} and has_test_lint_signal(argv[2:]):
|
|
284
|
-
return True
|
|
285
|
-
if first in {"npx", "bun", "make", "gradle", "gradlew", "mvn", "poetry", "uv", "pipenv", "hatch", "tox"} and has_test_lint_signal(argv):
|
|
286
|
-
return True
|
|
287
|
-
if first == "go" and len(argv) >= 2 and argv[1] == "test":
|
|
288
|
-
return True
|
|
289
|
-
if first == "cargo" and len(argv) >= 2 and argv[1] in {"test", "clippy"}:
|
|
290
|
-
return True
|
|
291
|
-
return False
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
def cap_line(line: str, max_chars: int) -> str:
|
|
295
|
-
if len(line) <= max_chars:
|
|
296
|
-
return line
|
|
297
|
-
suffix = "\n" if line.endswith("\n") else ""
|
|
298
|
-
marker = f"...[line capped:{len(line)} chars]"
|
|
299
|
-
return line[: max(0, max_chars - len(marker) - len(suffix))] + marker + suffix
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def select_lines(lines: list[str], flt: CompiledFilter, max_line_chars: int) -> list[str]:
|
|
303
|
-
selected = [cap_line(line, max_line_chars) for line in lines]
|
|
304
|
-
if flt.include_regex:
|
|
305
|
-
selected = [line for line in selected if any(pattern.search(line) for pattern in flt.include_regex)]
|
|
306
|
-
if flt.exclude_regex:
|
|
307
|
-
selected = [line for line in selected if not any(pattern.search(line) for pattern in flt.exclude_regex)]
|
|
308
|
-
if flt.head_lines is not None or flt.tail_lines is not None:
|
|
309
|
-
head_n = flt.head_lines if flt.head_lines is not None else 0
|
|
310
|
-
tail_n = flt.tail_lines if flt.tail_lines is not None else 0
|
|
311
|
-
head = selected[:head_n] if head_n else []
|
|
312
|
-
tail = selected[-tail_n:] if tail_n else []
|
|
313
|
-
if head and tail:
|
|
314
|
-
seen_head_count = len(head)
|
|
315
|
-
tail = tail[max(0, seen_head_count + len(tail) - len(selected)):]
|
|
316
|
-
selected = head + tail
|
|
317
|
-
if flt.max_lines is not None and len(selected) > flt.max_lines:
|
|
318
|
-
selected = selected[:flt.max_lines]
|
|
319
|
-
if len(selected) > MAX_EMIT_LINES:
|
|
320
|
-
selected = selected[:MAX_EMIT_LINES]
|
|
321
|
-
return selected
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
def validation_payload(valid: bool, errors: list[str], count: int = 0) -> dict[str, Any]:
|
|
325
|
-
return {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "validate", "valid": valid, "filter_count": count, "errors": errors}
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
def print_validation(valid: bool, errors: list[str], count: int, as_json: bool) -> None:
|
|
329
|
-
if as_json:
|
|
330
|
-
print(json.dumps(validation_payload(valid, errors, count), ensure_ascii=False, sort_keys=True))
|
|
331
|
-
elif valid:
|
|
332
|
-
print(f"{TOOL_NAME}: valid filter config ({count} filter(s))")
|
|
333
|
-
else:
|
|
334
|
-
print(f"{TOOL_NAME}: invalid filter config", file=sys.stderr)
|
|
335
|
-
for error in errors:
|
|
336
|
-
print(f"- {error}", file=sys.stderr)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
@dataclass
|
|
340
|
-
class CommandResult:
|
|
341
|
-
returncode: int
|
|
342
|
-
stdout_text: str
|
|
343
|
-
stderr_text: str
|
|
344
|
-
output_bytes: int
|
|
345
|
-
capture_limited: bool
|
|
346
|
-
timed_out: bool
|
|
347
|
-
drain_timed_out: bool
|
|
348
|
-
passthrough_emitted: bool
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
def write_binary_chunk(stream: Any, chunk: bytes) -> None:
|
|
352
|
-
if not chunk:
|
|
353
|
-
return
|
|
354
|
-
stream.flush()
|
|
355
|
-
binary = getattr(stream, "buffer", None)
|
|
356
|
-
if binary is not None:
|
|
357
|
-
binary.write(chunk)
|
|
358
|
-
else:
|
|
359
|
-
stream.write(chunk.decode("utf-8", "replace"))
|
|
360
|
-
stream.flush()
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
class BoundedCapture:
|
|
364
|
-
def __init__(self, max_capture_bytes: int) -> None:
|
|
365
|
-
self.max_capture_bytes = max_capture_bytes
|
|
366
|
-
self.stdout = bytearray()
|
|
367
|
-
self.stderr = bytearray()
|
|
368
|
-
self.output_bytes = 0
|
|
369
|
-
self.capture_limited = False
|
|
370
|
-
self.passthrough_emitted = False
|
|
371
|
-
self._lock = threading.Lock()
|
|
372
|
-
self._emit_condition = threading.Condition()
|
|
373
|
-
self._next_emit_order = 0
|
|
374
|
-
self._active_emit_order = 0
|
|
375
|
-
self._stdout_decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
376
|
-
self._stderr_decoder = codecs.getincrementaldecoder("utf-8")("replace")
|
|
377
|
-
|
|
378
|
-
def consume(self, stream_name: str, chunk: bytes) -> None:
|
|
379
|
-
if not chunk:
|
|
380
|
-
return
|
|
381
|
-
passthrough: list[tuple[Any, bytes]] = []
|
|
382
|
-
emit_order: int | None = None
|
|
383
|
-
with self._lock:
|
|
384
|
-
self.output_bytes += len(chunk)
|
|
385
|
-
if self.capture_limited:
|
|
386
|
-
passthrough.append((sys.stdout if stream_name == "stdout" else sys.stderr, chunk))
|
|
387
|
-
else:
|
|
388
|
-
stored_total = len(self.stdout) + len(self.stderr)
|
|
389
|
-
remaining = self.max_capture_bytes - stored_total
|
|
390
|
-
target = self.stdout if stream_name == "stdout" else self.stderr
|
|
391
|
-
if len(chunk) <= remaining:
|
|
392
|
-
target.extend(chunk)
|
|
393
|
-
return
|
|
394
|
-
if remaining > 0:
|
|
395
|
-
target.extend(chunk[:remaining])
|
|
396
|
-
overflow = chunk[remaining:]
|
|
397
|
-
else:
|
|
398
|
-
overflow = chunk
|
|
399
|
-
self.capture_limited = True
|
|
400
|
-
self.passthrough_emitted = True
|
|
401
|
-
passthrough.extend(
|
|
402
|
-
[
|
|
403
|
-
(sys.stdout, bytes(self.stdout)),
|
|
404
|
-
(sys.stderr, bytes(self.stderr)),
|
|
405
|
-
(sys.stdout if stream_name == "stdout" else sys.stderr, overflow),
|
|
406
|
-
]
|
|
407
|
-
)
|
|
408
|
-
if passthrough:
|
|
409
|
-
emit_order = self._next_emit_order
|
|
410
|
-
self._next_emit_order += 1
|
|
411
|
-
if emit_order is None:
|
|
412
|
-
return
|
|
413
|
-
with self._emit_condition:
|
|
414
|
-
while emit_order != self._active_emit_order:
|
|
415
|
-
self._emit_condition.wait()
|
|
416
|
-
try:
|
|
417
|
-
for stream, payload in passthrough:
|
|
418
|
-
write_binary_chunk(stream, payload)
|
|
419
|
-
finally:
|
|
420
|
-
with self._emit_condition:
|
|
421
|
-
self._active_emit_order += 1
|
|
422
|
-
self._emit_condition.notify_all()
|
|
423
|
-
|
|
424
|
-
def text(self) -> tuple[str, str]:
|
|
425
|
-
with self._lock:
|
|
426
|
-
stdout_bytes = bytes(self.stdout)
|
|
427
|
-
stderr_bytes = bytes(self.stderr)
|
|
428
|
-
stdout = self._stdout_decoder.decode(stdout_bytes, final=True)
|
|
429
|
-
stderr = self._stderr_decoder.decode(stderr_bytes, final=True)
|
|
430
|
-
return stdout, stderr
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
def run_command(argv: list[str], timeout_seconds: int, max_capture_bytes: int) -> CommandResult:
|
|
434
|
-
if not argv:
|
|
435
|
-
stderr = f"{TOOL_NAME}: command failed to start: no command provided\n"
|
|
436
|
-
output_bytes = len(stderr.encode("utf-8", "replace"))
|
|
437
|
-
return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
|
|
438
|
-
capture = BoundedCapture(max_capture_bytes)
|
|
439
|
-
|
|
440
|
-
def read_pipe(pipe: Any, stream_name: str) -> None:
|
|
441
|
-
try:
|
|
442
|
-
while True:
|
|
443
|
-
chunk = pipe.read(64 * 1024)
|
|
444
|
-
if not chunk:
|
|
445
|
-
break
|
|
446
|
-
capture.consume(stream_name, chunk)
|
|
447
|
-
finally:
|
|
448
|
-
try:
|
|
449
|
-
pipe.close()
|
|
450
|
-
except OSError:
|
|
451
|
-
pass
|
|
452
|
-
|
|
453
|
-
def terminate_processes(proc: subprocess.Popen[bytes], *, force: bool) -> None:
|
|
454
|
-
if os.name == "posix":
|
|
455
|
-
try:
|
|
456
|
-
os.killpg(proc.pid, signal.SIGKILL if force else signal.SIGTERM)
|
|
457
|
-
return
|
|
458
|
-
except ProcessLookupError:
|
|
459
|
-
return
|
|
460
|
-
except OSError:
|
|
461
|
-
pass
|
|
462
|
-
try:
|
|
463
|
-
if proc.poll() is not None:
|
|
464
|
-
return
|
|
465
|
-
if force:
|
|
466
|
-
proc.kill()
|
|
467
|
-
else:
|
|
468
|
-
proc.terminate()
|
|
469
|
-
except (OSError, ValueError):
|
|
470
|
-
pass
|
|
471
|
-
|
|
472
|
-
def close_pipes(proc: subprocess.Popen[bytes]) -> None:
|
|
473
|
-
for pipe in (proc.stdout, proc.stderr):
|
|
474
|
-
if pipe is None:
|
|
475
|
-
continue
|
|
476
|
-
try:
|
|
477
|
-
pipe.close()
|
|
478
|
-
except OSError:
|
|
479
|
-
pass
|
|
480
|
-
|
|
481
|
-
def join_threads_until(threads: tuple[threading.Thread, threading.Thread], deadline: float) -> bool:
|
|
482
|
-
for thread in threads:
|
|
483
|
-
remaining = max(0.0, deadline - time.monotonic())
|
|
484
|
-
thread.join(timeout=remaining)
|
|
485
|
-
return all(not thread.is_alive() for thread in threads)
|
|
486
|
-
|
|
487
|
-
def terminate_and_close(proc: subprocess.Popen[bytes], threads: tuple[threading.Thread, threading.Thread]) -> None:
|
|
488
|
-
terminate_processes(proc, force=False)
|
|
489
|
-
try:
|
|
490
|
-
proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
491
|
-
except subprocess.TimeoutExpired:
|
|
492
|
-
pass
|
|
493
|
-
if join_threads_until(threads, time.monotonic() + PIPE_THREAD_CLOSE_GRACE_SECONDS):
|
|
494
|
-
return
|
|
495
|
-
terminate_processes(proc, force=True)
|
|
496
|
-
try:
|
|
497
|
-
proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
498
|
-
except subprocess.TimeoutExpired:
|
|
499
|
-
pass
|
|
500
|
-
close_pipes(proc)
|
|
501
|
-
for thread in threads:
|
|
502
|
-
thread.join(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
|
|
503
|
-
|
|
504
|
-
try:
|
|
505
|
-
started_at = time.monotonic()
|
|
506
|
-
proc = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=(os.name == "posix"))
|
|
507
|
-
assert proc.stdout is not None
|
|
508
|
-
assert proc.stderr is not None
|
|
509
|
-
stdout_thread = threading.Thread(target=read_pipe, args=(proc.stdout, "stdout"), daemon=True)
|
|
510
|
-
stderr_thread = threading.Thread(target=read_pipe, args=(proc.stderr, "stderr"), daemon=True)
|
|
511
|
-
reader_threads = (stdout_thread, stderr_thread)
|
|
512
|
-
stdout_thread.start()
|
|
513
|
-
stderr_thread.start()
|
|
514
|
-
timed_out = False
|
|
515
|
-
drain_timed_out = False
|
|
516
|
-
try:
|
|
517
|
-
returncode = proc.wait(timeout=timeout_seconds)
|
|
518
|
-
except subprocess.TimeoutExpired:
|
|
519
|
-
timed_out = True
|
|
520
|
-
returncode = TIMEOUT_EXIT_CODE
|
|
521
|
-
terminate_and_close(proc, reader_threads)
|
|
522
|
-
drain_deadline = time.monotonic() + TIMEOUT_PIPE_DRAIN_GRACE_SECONDS
|
|
523
|
-
if not join_threads_until(reader_threads, drain_deadline):
|
|
524
|
-
drain_timed_out = True
|
|
525
|
-
terminate_and_close(proc, reader_threads)
|
|
526
|
-
if timed_out:
|
|
527
|
-
capture.consume("stderr", f"\n[{TOOL_NAME}] command timed out after {timeout_seconds}s\n".encode("utf-8"))
|
|
528
|
-
elif drain_timed_out:
|
|
529
|
-
capture.consume("stderr", f"\n[{TOOL_NAME}] command pipe drain timed out after direct process exit\n".encode("utf-8"))
|
|
530
|
-
stdout_text, stderr_text = ("", "") if capture.capture_limited else capture.text()
|
|
531
|
-
return CommandResult(returncode, stdout_text, stderr_text, capture.output_bytes, capture.capture_limited, timed_out, drain_timed_out, capture.passthrough_emitted)
|
|
532
|
-
except OSError as exc:
|
|
533
|
-
stderr = f"{TOOL_NAME}: command failed to start: {exc.strerror or exc.__class__.__name__}\n"
|
|
534
|
-
encoded = stderr.encode("utf-8", "replace")
|
|
535
|
-
output_bytes = len(encoded)
|
|
536
|
-
return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
def emit_run_report(args: argparse.Namespace, payload: dict[str, Any]) -> None:
|
|
540
|
-
if payload.get("protected_nonzero"):
|
|
541
|
-
return
|
|
542
|
-
if args.json_report:
|
|
543
|
-
print(json.dumps(payload, ensure_ascii=False, sort_keys=True), file=sys.stderr)
|
|
544
|
-
elif payload.get("decision") == "passthrough" and payload.get("reason") not in {"no-match", "nonzero-passthrough"}:
|
|
545
|
-
print(f"{TOOL_NAME}: passthrough: {payload.get('reason')}", file=sys.stderr)
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
def cmd_validate(args: argparse.Namespace) -> int:
|
|
549
|
-
filters, errors = load_filters(Path(args.config).expanduser())
|
|
550
|
-
print_validation(not errors, errors, len(filters), args.json)
|
|
551
|
-
return 0 if not errors else 2
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
def cmd_run(args: argparse.Namespace) -> int:
|
|
555
|
-
command = list(args.command)
|
|
556
|
-
if command and command[0] == "--":
|
|
557
|
-
command = command[1:]
|
|
558
|
-
if not command:
|
|
559
|
-
print(f"{TOOL_NAME}: missing command", file=sys.stderr)
|
|
560
|
-
return 2
|
|
561
|
-
max_capture = bounded_int(args.max_capture_bytes, DEFAULT_MAX_CAPTURE_BYTES, 1, MAX_CAPTURE_BYTES_LIMIT)
|
|
562
|
-
max_line_chars = bounded_int(args.max_line_chars, DEFAULT_MAX_LINE_CHARS, 1, MAX_LINE_CHARS_LIMIT)
|
|
563
|
-
timeout_seconds = bounded_int(args.timeout_seconds, DEFAULT_TIMEOUT_SECONDS, 1, MAX_TIMEOUT_SECONDS)
|
|
564
|
-
filters, errors = load_filters(Path(args.config).expanduser())
|
|
565
|
-
result = run_command(command, timeout_seconds, max_capture)
|
|
566
|
-
rc = result.returncode
|
|
567
|
-
output = result.stdout_text + result.stderr_text
|
|
568
|
-
protected_nonzero = rc != 0 and is_protected_command(command)
|
|
569
|
-
report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
|
|
570
|
-
if result.timed_out:
|
|
571
|
-
report["reason"] = "timeout"
|
|
572
|
-
elif result.drain_timed_out:
|
|
573
|
-
report["reason"] = "pipe-drain-timeout"
|
|
574
|
-
elif errors:
|
|
575
|
-
report["reason"] = "invalid-config"
|
|
576
|
-
report["errors"] = errors[:10]
|
|
577
|
-
elif result.capture_limited:
|
|
578
|
-
report["reason"] = "capture-limit"
|
|
579
|
-
report["output_bytes"] = result.output_bytes
|
|
580
|
-
report["max_capture_bytes"] = max_capture
|
|
581
|
-
else:
|
|
582
|
-
matched = next((flt for flt in filters if filter_matches(flt, command)), None)
|
|
583
|
-
if matched is None:
|
|
584
|
-
report["reason"] = "no-match"
|
|
585
|
-
elif protected_nonzero:
|
|
586
|
-
report["reason"] = "protected-nonzero"
|
|
587
|
-
report["filter_id"] = matched.id
|
|
588
|
-
elif rc != 0 and matched.passthrough_on_exit:
|
|
589
|
-
report["reason"] = "nonzero-passthrough"
|
|
590
|
-
report["filter_id"] = matched.id
|
|
591
|
-
else:
|
|
592
|
-
try:
|
|
593
|
-
lines = output.splitlines(keepends=True)
|
|
594
|
-
filtered = select_lines(lines, matched, max_line_chars)
|
|
595
|
-
except re.error as exc:
|
|
596
|
-
report["reason"] = f"filter-error:{compact(str(exc), 80)}"
|
|
597
|
-
report["filter_id"] = matched.id
|
|
598
|
-
else:
|
|
599
|
-
if output and not filtered:
|
|
600
|
-
report["reason"] = "empty-output-fallback"
|
|
601
|
-
report["filter_id"] = matched.id
|
|
602
|
-
else:
|
|
603
|
-
sys.stdout.write("".join(filtered))
|
|
604
|
-
report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": len(lines), "output_lines": len(filtered)})
|
|
605
|
-
emit_run_report(args, report)
|
|
606
|
-
return rc
|
|
607
|
-
if not result.passthrough_emitted:
|
|
608
|
-
sys.stdout.write(result.stdout_text)
|
|
609
|
-
sys.stderr.write(result.stderr_text)
|
|
610
|
-
emit_run_report(args, report)
|
|
611
|
-
return rc
|
|
612
|
-
|
|
613
|
-
def build_parser() -> argparse.ArgumentParser:
|
|
614
|
-
parser = argparse.ArgumentParser(prog=TOOL_NAME, description="Validate and apply bounded declarative command-output filters. Filtered mode applies line rules to combined stdout+stderr and writes the filtered result to stdout; passthrough mode preserves stdout/stderr streams.")
|
|
615
|
-
sub = parser.add_subparsers(dest="command_name", required=True)
|
|
616
|
-
validate = sub.add_parser("validate", help="validate a filter DSL JSON file")
|
|
617
|
-
validate.add_argument("--config", required=True, help="path to user-owned filter JSON")
|
|
618
|
-
validate.add_argument("--json", action="store_true", help="emit validation result as JSON")
|
|
619
|
-
validate.set_defaults(func=cmd_validate)
|
|
620
|
-
run = sub.add_parser("run", help="run a command and apply the first matching safe filter")
|
|
621
|
-
run.add_argument("--config", required=True, help="path to user-owned filter JSON")
|
|
622
|
-
run.add_argument("--json-report", action="store_true", help="emit filter decision JSON to stderr; protected nonzero passthrough suppresses reports to preserve raw stderr")
|
|
623
|
-
run.add_argument("--max-capture-bytes", type=int, default=DEFAULT_MAX_CAPTURE_BYTES)
|
|
624
|
-
run.add_argument("--max-line-chars", type=int, default=DEFAULT_MAX_LINE_CHARS)
|
|
625
|
-
run.add_argument("--timeout-seconds", type=int, default=DEFAULT_TIMEOUT_SECONDS)
|
|
626
|
-
run.add_argument("command", nargs=argparse.REMAINDER)
|
|
627
|
-
run.set_defaults(func=cmd_run)
|
|
628
|
-
return parser
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
def main() -> int:
|
|
632
|
-
args = build_parser().parse_args()
|
|
633
|
-
return int(args.func(args))
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
if __name__ == "__main__":
|
|
637
|
-
raise SystemExit(main())
|