@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,637 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Validate and apply bounded declarative command-output filters.
3
-
4
- This helper is intentionally opt-in. User filter configs live outside package
5
- code and invalid/no-match/failure cases pass command output through rather than
6
- risk hiding evidence.
7
- """
8
- from __future__ import annotations
9
-
10
- import argparse
11
- import codecs
12
- from dataclasses import dataclass
13
- import json
14
- import os
15
- from pathlib import Path
16
- import re
17
- import shlex
18
- import signal
19
- import subprocess
20
- import sys
21
- import threading
22
- import time
23
- from typing import Any, Iterable
24
-
25
- SCHEMA_VERSION = "contextguard.filter-dsl.v1"
26
- TOOL_NAME = "context-guard-filter"
27
- MAX_CONFIG_BYTES = 1_000_000
28
- MAX_FILTERS = 100
29
- MAX_REGEXES_PER_FILTER = 20
30
- MAX_REGEX_CHARS = 500
31
- MAX_ARG_PARTS = 64
32
- MAX_ARG_CHARS = 200
33
- DEFAULT_MAX_CAPTURE_BYTES = 5_000_000
34
- MAX_CAPTURE_BYTES_LIMIT = 50_000_000
35
- DEFAULT_MAX_LINE_CHARS = 100_000
36
- MAX_LINE_CHARS_LIMIT = 1_000_000
37
- MAX_EMIT_LINES = 5_000
38
- DEFAULT_TIMEOUT_SECONDS = 600
39
- MAX_TIMEOUT_SECONDS = 86_400
40
- TIMEOUT_EXIT_CODE = 124
41
- TIMEOUT_PIPE_DRAIN_GRACE_SECONDS = 5.0
42
- PIPE_THREAD_CLOSE_GRACE_SECONDS = 1.0
43
- FILTER_KEYS = {"id", "match", "passthrough_on_exit", "include_regex", "exclude_regex", "head_lines", "tail_lines", "max_lines"}
44
- MATCH_KEYS = {"argv_prefix", "argv_regex"}
45
- PROTECTED_BASENAMES = {
46
- "git",
47
- "gh",
48
- "pytest",
49
- "ruff",
50
- "mypy",
51
- "eslint",
52
- "vitest",
53
- "jest",
54
- }
55
- PROTECTED_NPM_TASKS = {"test", "lint"}
56
- PROTECTED_PYTHON_MODULES = {"pytest", "ruff", "mypy"}
57
- PROTECTED_DIRECT_NAMES = {"pytest", "ruff", "mypy", "eslint", "vitest", "jest", "tox"}
58
- PROTECTED_INTENT_TOKENS = {"test", "tests", "lint", "clippy"}
59
-
60
-
61
- @dataclass(frozen=True)
62
- class CompiledFilter:
63
- id: str
64
- argv_prefix: tuple[str, ...] | None
65
- argv_regex: re.Pattern[str] | None
66
- passthrough_on_exit: bool
67
- include_regex: tuple[re.Pattern[str], ...]
68
- exclude_regex: tuple[re.Pattern[str], ...]
69
- head_lines: int | None
70
- tail_lines: int | None
71
- max_lines: int | None
72
-
73
-
74
- def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
75
- try:
76
- number = int(value)
77
- except (TypeError, ValueError, OverflowError):
78
- return default
79
- return min(max(number, minimum), maximum)
80
-
81
-
82
- def compact(text: str, limit: int = 160) -> str:
83
- text = " ".join(str(text).split())
84
- if len(text) <= limit:
85
- return text
86
- return text[: max(0, limit - 20)] + f"…[trimmed:{len(text)}]"
87
-
88
-
89
- def read_json_limited(path: Path) -> tuple[Any | None, list[str]]:
90
- try:
91
- size = path.stat().st_size
92
- if size > MAX_CONFIG_BYTES:
93
- return None, [f"config file too large: {size}>{MAX_CONFIG_BYTES} bytes"]
94
- raw = path.read_text(encoding="utf-8")
95
- except OSError as exc:
96
- return None, [f"could not read config: {exc.strerror or exc.__class__.__name__}"]
97
- try:
98
- return json.loads(raw), []
99
- except json.JSONDecodeError as exc:
100
- return None, [f"invalid JSON at line {exc.lineno}: {exc.msg}"]
101
-
102
-
103
- def validate_str_list(value: Any, *, field: str, errors: list[str], max_items: int = MAX_REGEXES_PER_FILTER) -> list[str]:
104
- if value is None:
105
- return []
106
- if not isinstance(value, list):
107
- errors.append(f"{field} must be a list")
108
- return []
109
- if len(value) > max_items:
110
- errors.append(f"{field} has too many items: {len(value)}>{max_items}")
111
- out: list[str] = []
112
- for idx, item in enumerate(value[:max_items]):
113
- if not isinstance(item, str) or not item.strip():
114
- errors.append(f"{field}[{idx}] must be a non-empty string")
115
- continue
116
- if len(item) > MAX_REGEX_CHARS:
117
- errors.append(f"{field}[{idx}] exceeds {MAX_REGEX_CHARS} chars")
118
- continue
119
- out.append(item)
120
- return out
121
-
122
-
123
- def compile_regexes(patterns: Iterable[str], *, field: str, errors: list[str]) -> tuple[re.Pattern[str], ...]:
124
- compiled: list[re.Pattern[str]] = []
125
- for idx, pattern in enumerate(patterns):
126
- try:
127
- compiled.append(re.compile(pattern))
128
- except re.error as exc:
129
- errors.append(f"{field}[{idx}] invalid regex: {compact(str(exc), 120)}")
130
- return tuple(compiled)
131
-
132
-
133
- def bounded_optional_int(raw: Any, *, field: str, errors: list[str], minimum: int = 0) -> int | None:
134
- if raw is None:
135
- return None
136
- if not isinstance(raw, int) or isinstance(raw, bool):
137
- errors.append(f"{field} must be an integer")
138
- return None
139
- if raw < minimum or raw > MAX_EMIT_LINES:
140
- errors.append(f"{field} out of bounds: {minimum}..{MAX_EMIT_LINES}")
141
- return None
142
- return raw
143
-
144
-
145
- def validate_config(raw: Any) -> tuple[list[CompiledFilter], list[str]]:
146
- errors: list[str] = []
147
- if not isinstance(raw, dict):
148
- return [], ["config root must be a JSON object"]
149
- unknown_root = sorted(set(raw) - {"schema_version", "filters"})
150
- if unknown_root:
151
- errors.append(f"unknown root keys: {', '.join(unknown_root)}")
152
- if raw.get("schema_version") != SCHEMA_VERSION:
153
- errors.append(f"schema_version must be {SCHEMA_VERSION}")
154
- filters_raw = raw.get("filters")
155
- if not isinstance(filters_raw, list) or not filters_raw:
156
- errors.append("filters must be a non-empty list")
157
- return [], errors
158
- if len(filters_raw) > MAX_FILTERS:
159
- errors.append(f"filters has too many items: {len(filters_raw)}>{MAX_FILTERS}")
160
- seen_ids: set[str] = set()
161
- compiled: list[CompiledFilter] = []
162
- for idx, item in enumerate(filters_raw[:MAX_FILTERS]):
163
- prefix = f"filters[{idx}]"
164
- if not isinstance(item, dict):
165
- errors.append(f"{prefix} must be an object")
166
- continue
167
- unknown = sorted(set(item) - FILTER_KEYS)
168
- if unknown:
169
- errors.append(f"{prefix} unknown keys: {', '.join(unknown)}")
170
- fid = item.get("id")
171
- if not isinstance(fid, str) or not re.fullmatch(r"[A-Za-z0-9._-]{1,80}", fid):
172
- errors.append(f"{prefix}.id must match [A-Za-z0-9._-] and be <=80 chars")
173
- fid = f"invalid-{idx}"
174
- elif fid in seen_ids:
175
- errors.append(f"{prefix}.id duplicates {fid}")
176
- seen_ids.add(str(fid))
177
- match = item.get("match")
178
- argv_prefix: tuple[str, ...] | None = None
179
- argv_regex: re.Pattern[str] | None = None
180
- if not isinstance(match, dict):
181
- errors.append(f"{prefix}.match must be an object")
182
- else:
183
- unknown_match = sorted(set(match) - MATCH_KEYS)
184
- if unknown_match:
185
- errors.append(f"{prefix}.match unknown keys: {', '.join(unknown_match)}")
186
- if "argv_prefix" in match:
187
- parts = validate_str_list(match.get("argv_prefix"), field=f"{prefix}.match.argv_prefix", errors=errors, max_items=MAX_ARG_PARTS)
188
- for part_idx, part in enumerate(parts):
189
- if len(part) > MAX_ARG_CHARS:
190
- errors.append(f"{prefix}.match.argv_prefix[{part_idx}] exceeds {MAX_ARG_CHARS} chars")
191
- if parts:
192
- argv_prefix = tuple(parts)
193
- if "argv_regex" in match:
194
- pattern = match.get("argv_regex")
195
- if not isinstance(pattern, str) or not pattern.strip():
196
- errors.append(f"{prefix}.match.argv_regex must be a non-empty string")
197
- elif len(pattern) > MAX_REGEX_CHARS:
198
- errors.append(f"{prefix}.match.argv_regex exceeds {MAX_REGEX_CHARS} chars")
199
- else:
200
- compiled_argv_regex = compile_regexes([pattern], field=f"{prefix}.match.argv_regex", errors=errors)
201
- argv_regex = compiled_argv_regex[0] if compiled_argv_regex else None
202
- if not argv_prefix and argv_regex is None:
203
- errors.append(f"{prefix}.match requires argv_prefix or argv_regex")
204
- passthrough = item.get("passthrough_on_exit", True)
205
- if not isinstance(passthrough, bool):
206
- errors.append(f"{prefix}.passthrough_on_exit must be boolean")
207
- passthrough = True
208
- include = validate_str_list(item.get("include_regex"), field=f"{prefix}.include_regex", errors=errors)
209
- exclude = validate_str_list(item.get("exclude_regex"), field=f"{prefix}.exclude_regex", errors=errors)
210
- if len(include) + len(exclude) > MAX_REGEXES_PER_FILTER:
211
- errors.append(f"{prefix} has too many regexes: {len(include) + len(exclude)}>{MAX_REGEXES_PER_FILTER}")
212
- head = bounded_optional_int(item.get("head_lines"), field=f"{prefix}.head_lines", errors=errors)
213
- tail = bounded_optional_int(item.get("tail_lines"), field=f"{prefix}.tail_lines", errors=errors)
214
- max_lines = bounded_optional_int(item.get("max_lines"), field=f"{prefix}.max_lines", errors=errors, minimum=1)
215
- compiled.append(CompiledFilter(
216
- id=str(fid),
217
- argv_prefix=argv_prefix,
218
- argv_regex=argv_regex,
219
- passthrough_on_exit=passthrough,
220
- include_regex=compile_regexes(include, field=f"{prefix}.include_regex", errors=errors),
221
- exclude_regex=compile_regexes(exclude, field=f"{prefix}.exclude_regex", errors=errors),
222
- head_lines=head,
223
- tail_lines=tail,
224
- max_lines=max_lines,
225
- ))
226
- return compiled, errors
227
-
228
-
229
- def load_filters(path: Path) -> tuple[list[CompiledFilter], list[str]]:
230
- raw, read_errors = read_json_limited(path)
231
- if read_errors:
232
- return [], read_errors
233
- return validate_config(raw)
234
-
235
-
236
- def command_text(argv: list[str]) -> str:
237
- try:
238
- return shlex.join(argv)
239
- except Exception:
240
- return " ".join(argv)
241
-
242
-
243
- def filter_matches(flt: CompiledFilter, argv: list[str]) -> bool:
244
- if flt.argv_prefix is not None and tuple(argv[: len(flt.argv_prefix)]) == flt.argv_prefix:
245
- return True
246
- if flt.argv_regex is not None and flt.argv_regex.search(command_text(argv)):
247
- return True
248
- return False
249
-
250
-
251
- def basename(arg: str) -> str:
252
- return Path(arg).name.lower()
253
-
254
-
255
- def argv_signal_tokens(argv: list[str]) -> set[str]:
256
- tokens: set[str] = set()
257
- for arg in argv:
258
- lowered = basename(arg)
259
- if lowered:
260
- tokens.add(lowered)
261
- tokens.update(part for part in re.split(r"[^a-z0-9]+", lowered) if part)
262
- return tokens
263
-
264
-
265
- def has_test_lint_signal(argv: list[str]) -> bool:
266
- tokens = argv_signal_tokens(argv)
267
- return bool(tokens & PROTECTED_DIRECT_NAMES or tokens & PROTECTED_INTENT_TOKENS)
268
-
269
-
270
- def is_protected_command(argv: list[str]) -> bool:
271
- if not argv:
272
- return False
273
- first = basename(argv[0])
274
- if first in PROTECTED_BASENAMES:
275
- return True
276
- if first in {"python", "python3"} and len(argv) >= 3 and argv[1] == "-m" and basename(argv[2]) in PROTECTED_PYTHON_MODULES:
277
- return True
278
- if first in {"npm", "pnpm", "yarn"} and len(argv) >= 2:
279
- if argv[1] in PROTECTED_NPM_TASKS:
280
- return True
281
- if len(argv) >= 3 and argv[1] == "run" and has_test_lint_signal(argv[2:]):
282
- return True
283
- if len(argv) >= 3 and argv[1] in {"exec", "x", "dlx"} and has_test_lint_signal(argv[2:]):
284
- return True
285
- if first in {"npx", "bun", "make", "gradle", "gradlew", "mvn", "poetry", "uv", "pipenv", "hatch", "tox"} and has_test_lint_signal(argv):
286
- return True
287
- if first == "go" and len(argv) >= 2 and argv[1] == "test":
288
- return True
289
- if first == "cargo" and len(argv) >= 2 and argv[1] in {"test", "clippy"}:
290
- return True
291
- return False
292
-
293
-
294
- def cap_line(line: str, max_chars: int) -> str:
295
- if len(line) <= max_chars:
296
- return line
297
- suffix = "\n" if line.endswith("\n") else ""
298
- marker = f"...[line capped:{len(line)} chars]"
299
- return line[: max(0, max_chars - len(marker) - len(suffix))] + marker + suffix
300
-
301
-
302
- def select_lines(lines: list[str], flt: CompiledFilter, max_line_chars: int) -> list[str]:
303
- selected = [cap_line(line, max_line_chars) for line in lines]
304
- if flt.include_regex:
305
- selected = [line for line in selected if any(pattern.search(line) for pattern in flt.include_regex)]
306
- if flt.exclude_regex:
307
- selected = [line for line in selected if not any(pattern.search(line) for pattern in flt.exclude_regex)]
308
- if flt.head_lines is not None or flt.tail_lines is not None:
309
- head_n = flt.head_lines if flt.head_lines is not None else 0
310
- tail_n = flt.tail_lines if flt.tail_lines is not None else 0
311
- head = selected[:head_n] if head_n else []
312
- tail = selected[-tail_n:] if tail_n else []
313
- if head and tail:
314
- seen_head_count = len(head)
315
- tail = tail[max(0, seen_head_count + len(tail) - len(selected)):]
316
- selected = head + tail
317
- if flt.max_lines is not None and len(selected) > flt.max_lines:
318
- selected = selected[:flt.max_lines]
319
- if len(selected) > MAX_EMIT_LINES:
320
- selected = selected[:MAX_EMIT_LINES]
321
- return selected
322
-
323
-
324
- def validation_payload(valid: bool, errors: list[str], count: int = 0) -> dict[str, Any]:
325
- return {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "validate", "valid": valid, "filter_count": count, "errors": errors}
326
-
327
-
328
- def print_validation(valid: bool, errors: list[str], count: int, as_json: bool) -> None:
329
- if as_json:
330
- print(json.dumps(validation_payload(valid, errors, count), ensure_ascii=False, sort_keys=True))
331
- elif valid:
332
- print(f"{TOOL_NAME}: valid filter config ({count} filter(s))")
333
- else:
334
- print(f"{TOOL_NAME}: invalid filter config", file=sys.stderr)
335
- for error in errors:
336
- print(f"- {error}", file=sys.stderr)
337
-
338
-
339
- @dataclass
340
- class CommandResult:
341
- returncode: int
342
- stdout_text: str
343
- stderr_text: str
344
- output_bytes: int
345
- capture_limited: bool
346
- timed_out: bool
347
- drain_timed_out: bool
348
- passthrough_emitted: bool
349
-
350
-
351
- def write_binary_chunk(stream: Any, chunk: bytes) -> None:
352
- if not chunk:
353
- return
354
- stream.flush()
355
- binary = getattr(stream, "buffer", None)
356
- if binary is not None:
357
- binary.write(chunk)
358
- else:
359
- stream.write(chunk.decode("utf-8", "replace"))
360
- stream.flush()
361
-
362
-
363
- class BoundedCapture:
364
- def __init__(self, max_capture_bytes: int) -> None:
365
- self.max_capture_bytes = max_capture_bytes
366
- self.stdout = bytearray()
367
- self.stderr = bytearray()
368
- self.output_bytes = 0
369
- self.capture_limited = False
370
- self.passthrough_emitted = False
371
- self._lock = threading.Lock()
372
- self._emit_condition = threading.Condition()
373
- self._next_emit_order = 0
374
- self._active_emit_order = 0
375
- self._stdout_decoder = codecs.getincrementaldecoder("utf-8")("replace")
376
- self._stderr_decoder = codecs.getincrementaldecoder("utf-8")("replace")
377
-
378
- def consume(self, stream_name: str, chunk: bytes) -> None:
379
- if not chunk:
380
- return
381
- passthrough: list[tuple[Any, bytes]] = []
382
- emit_order: int | None = None
383
- with self._lock:
384
- self.output_bytes += len(chunk)
385
- if self.capture_limited:
386
- passthrough.append((sys.stdout if stream_name == "stdout" else sys.stderr, chunk))
387
- else:
388
- stored_total = len(self.stdout) + len(self.stderr)
389
- remaining = self.max_capture_bytes - stored_total
390
- target = self.stdout if stream_name == "stdout" else self.stderr
391
- if len(chunk) <= remaining:
392
- target.extend(chunk)
393
- return
394
- if remaining > 0:
395
- target.extend(chunk[:remaining])
396
- overflow = chunk[remaining:]
397
- else:
398
- overflow = chunk
399
- self.capture_limited = True
400
- self.passthrough_emitted = True
401
- passthrough.extend(
402
- [
403
- (sys.stdout, bytes(self.stdout)),
404
- (sys.stderr, bytes(self.stderr)),
405
- (sys.stdout if stream_name == "stdout" else sys.stderr, overflow),
406
- ]
407
- )
408
- if passthrough:
409
- emit_order = self._next_emit_order
410
- self._next_emit_order += 1
411
- if emit_order is None:
412
- return
413
- with self._emit_condition:
414
- while emit_order != self._active_emit_order:
415
- self._emit_condition.wait()
416
- try:
417
- for stream, payload in passthrough:
418
- write_binary_chunk(stream, payload)
419
- finally:
420
- with self._emit_condition:
421
- self._active_emit_order += 1
422
- self._emit_condition.notify_all()
423
-
424
- def text(self) -> tuple[str, str]:
425
- with self._lock:
426
- stdout_bytes = bytes(self.stdout)
427
- stderr_bytes = bytes(self.stderr)
428
- stdout = self._stdout_decoder.decode(stdout_bytes, final=True)
429
- stderr = self._stderr_decoder.decode(stderr_bytes, final=True)
430
- return stdout, stderr
431
-
432
-
433
- def run_command(argv: list[str], timeout_seconds: int, max_capture_bytes: int) -> CommandResult:
434
- if not argv:
435
- stderr = f"{TOOL_NAME}: command failed to start: no command provided\n"
436
- output_bytes = len(stderr.encode("utf-8", "replace"))
437
- return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
438
- capture = BoundedCapture(max_capture_bytes)
439
-
440
- def read_pipe(pipe: Any, stream_name: str) -> None:
441
- try:
442
- while True:
443
- chunk = pipe.read(64 * 1024)
444
- if not chunk:
445
- break
446
- capture.consume(stream_name, chunk)
447
- finally:
448
- try:
449
- pipe.close()
450
- except OSError:
451
- pass
452
-
453
- def terminate_processes(proc: subprocess.Popen[bytes], *, force: bool) -> None:
454
- if os.name == "posix":
455
- try:
456
- os.killpg(proc.pid, signal.SIGKILL if force else signal.SIGTERM)
457
- return
458
- except ProcessLookupError:
459
- return
460
- except OSError:
461
- pass
462
- try:
463
- if proc.poll() is not None:
464
- return
465
- if force:
466
- proc.kill()
467
- else:
468
- proc.terminate()
469
- except (OSError, ValueError):
470
- pass
471
-
472
- def close_pipes(proc: subprocess.Popen[bytes]) -> None:
473
- for pipe in (proc.stdout, proc.stderr):
474
- if pipe is None:
475
- continue
476
- try:
477
- pipe.close()
478
- except OSError:
479
- pass
480
-
481
- def join_threads_until(threads: tuple[threading.Thread, threading.Thread], deadline: float) -> bool:
482
- for thread in threads:
483
- remaining = max(0.0, deadline - time.monotonic())
484
- thread.join(timeout=remaining)
485
- return all(not thread.is_alive() for thread in threads)
486
-
487
- def terminate_and_close(proc: subprocess.Popen[bytes], threads: tuple[threading.Thread, threading.Thread]) -> None:
488
- terminate_processes(proc, force=False)
489
- try:
490
- proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
491
- except subprocess.TimeoutExpired:
492
- pass
493
- if join_threads_until(threads, time.monotonic() + PIPE_THREAD_CLOSE_GRACE_SECONDS):
494
- return
495
- terminate_processes(proc, force=True)
496
- try:
497
- proc.wait(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
498
- except subprocess.TimeoutExpired:
499
- pass
500
- close_pipes(proc)
501
- for thread in threads:
502
- thread.join(timeout=PIPE_THREAD_CLOSE_GRACE_SECONDS)
503
-
504
- try:
505
- started_at = time.monotonic()
506
- proc = subprocess.Popen(argv, stdout=subprocess.PIPE, stderr=subprocess.PIPE, start_new_session=(os.name == "posix"))
507
- assert proc.stdout is not None
508
- assert proc.stderr is not None
509
- stdout_thread = threading.Thread(target=read_pipe, args=(proc.stdout, "stdout"), daemon=True)
510
- stderr_thread = threading.Thread(target=read_pipe, args=(proc.stderr, "stderr"), daemon=True)
511
- reader_threads = (stdout_thread, stderr_thread)
512
- stdout_thread.start()
513
- stderr_thread.start()
514
- timed_out = False
515
- drain_timed_out = False
516
- try:
517
- returncode = proc.wait(timeout=timeout_seconds)
518
- except subprocess.TimeoutExpired:
519
- timed_out = True
520
- returncode = TIMEOUT_EXIT_CODE
521
- terminate_and_close(proc, reader_threads)
522
- drain_deadline = time.monotonic() + TIMEOUT_PIPE_DRAIN_GRACE_SECONDS
523
- if not join_threads_until(reader_threads, drain_deadline):
524
- drain_timed_out = True
525
- terminate_and_close(proc, reader_threads)
526
- if timed_out:
527
- capture.consume("stderr", f"\n[{TOOL_NAME}] command timed out after {timeout_seconds}s\n".encode("utf-8"))
528
- elif drain_timed_out:
529
- capture.consume("stderr", f"\n[{TOOL_NAME}] command pipe drain timed out after direct process exit\n".encode("utf-8"))
530
- stdout_text, stderr_text = ("", "") if capture.capture_limited else capture.text()
531
- return CommandResult(returncode, stdout_text, stderr_text, capture.output_bytes, capture.capture_limited, timed_out, drain_timed_out, capture.passthrough_emitted)
532
- except OSError as exc:
533
- stderr = f"{TOOL_NAME}: command failed to start: {exc.strerror or exc.__class__.__name__}\n"
534
- encoded = stderr.encode("utf-8", "replace")
535
- output_bytes = len(encoded)
536
- return CommandResult(127, "", stderr, output_bytes, False, False, False, False)
537
-
538
-
539
- def emit_run_report(args: argparse.Namespace, payload: dict[str, Any]) -> None:
540
- if payload.get("protected_nonzero"):
541
- return
542
- if args.json_report:
543
- print(json.dumps(payload, ensure_ascii=False, sort_keys=True), file=sys.stderr)
544
- elif payload.get("decision") == "passthrough" and payload.get("reason") not in {"no-match", "nonzero-passthrough"}:
545
- print(f"{TOOL_NAME}: passthrough: {payload.get('reason')}", file=sys.stderr)
546
-
547
-
548
- def cmd_validate(args: argparse.Namespace) -> int:
549
- filters, errors = load_filters(Path(args.config).expanduser())
550
- print_validation(not errors, errors, len(filters), args.json)
551
- return 0 if not errors else 2
552
-
553
-
554
- def cmd_run(args: argparse.Namespace) -> int:
555
- command = list(args.command)
556
- if command and command[0] == "--":
557
- command = command[1:]
558
- if not command:
559
- print(f"{TOOL_NAME}: missing command", file=sys.stderr)
560
- return 2
561
- max_capture = bounded_int(args.max_capture_bytes, DEFAULT_MAX_CAPTURE_BYTES, 1, MAX_CAPTURE_BYTES_LIMIT)
562
- max_line_chars = bounded_int(args.max_line_chars, DEFAULT_MAX_LINE_CHARS, 1, MAX_LINE_CHARS_LIMIT)
563
- timeout_seconds = bounded_int(args.timeout_seconds, DEFAULT_TIMEOUT_SECONDS, 1, MAX_TIMEOUT_SECONDS)
564
- filters, errors = load_filters(Path(args.config).expanduser())
565
- result = run_command(command, timeout_seconds, max_capture)
566
- rc = result.returncode
567
- output = result.stdout_text + result.stderr_text
568
- protected_nonzero = rc != 0 and is_protected_command(command)
569
- report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
570
- if result.timed_out:
571
- report["reason"] = "timeout"
572
- elif result.drain_timed_out:
573
- report["reason"] = "pipe-drain-timeout"
574
- elif errors:
575
- report["reason"] = "invalid-config"
576
- report["errors"] = errors[:10]
577
- elif result.capture_limited:
578
- report["reason"] = "capture-limit"
579
- report["output_bytes"] = result.output_bytes
580
- report["max_capture_bytes"] = max_capture
581
- else:
582
- matched = next((flt for flt in filters if filter_matches(flt, command)), None)
583
- if matched is None:
584
- report["reason"] = "no-match"
585
- elif protected_nonzero:
586
- report["reason"] = "protected-nonzero"
587
- report["filter_id"] = matched.id
588
- elif rc != 0 and matched.passthrough_on_exit:
589
- report["reason"] = "nonzero-passthrough"
590
- report["filter_id"] = matched.id
591
- else:
592
- try:
593
- lines = output.splitlines(keepends=True)
594
- filtered = select_lines(lines, matched, max_line_chars)
595
- except re.error as exc:
596
- report["reason"] = f"filter-error:{compact(str(exc), 80)}"
597
- report["filter_id"] = matched.id
598
- else:
599
- if output and not filtered:
600
- report["reason"] = "empty-output-fallback"
601
- report["filter_id"] = matched.id
602
- else:
603
- sys.stdout.write("".join(filtered))
604
- report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": len(lines), "output_lines": len(filtered)})
605
- emit_run_report(args, report)
606
- return rc
607
- if not result.passthrough_emitted:
608
- sys.stdout.write(result.stdout_text)
609
- sys.stderr.write(result.stderr_text)
610
- emit_run_report(args, report)
611
- return rc
612
-
613
- def build_parser() -> argparse.ArgumentParser:
614
- parser = argparse.ArgumentParser(prog=TOOL_NAME, description="Validate and apply bounded declarative command-output filters. Filtered mode applies line rules to combined stdout+stderr and writes the filtered result to stdout; passthrough mode preserves stdout/stderr streams.")
615
- sub = parser.add_subparsers(dest="command_name", required=True)
616
- validate = sub.add_parser("validate", help="validate a filter DSL JSON file")
617
- validate.add_argument("--config", required=True, help="path to user-owned filter JSON")
618
- validate.add_argument("--json", action="store_true", help="emit validation result as JSON")
619
- validate.set_defaults(func=cmd_validate)
620
- run = sub.add_parser("run", help="run a command and apply the first matching safe filter")
621
- run.add_argument("--config", required=True, help="path to user-owned filter JSON")
622
- run.add_argument("--json-report", action="store_true", help="emit filter decision JSON to stderr; protected nonzero passthrough suppresses reports to preserve raw stderr")
623
- run.add_argument("--max-capture-bytes", type=int, default=DEFAULT_MAX_CAPTURE_BYTES)
624
- run.add_argument("--max-line-chars", type=int, default=DEFAULT_MAX_LINE_CHARS)
625
- run.add_argument("--timeout-seconds", type=int, default=DEFAULT_TIMEOUT_SECONDS)
626
- run.add_argument("command", nargs=argparse.REMAINDER)
627
- run.set_defaults(func=cmd_run)
628
- return parser
629
-
630
-
631
- def main() -> int:
632
- args = build_parser().parse_args()
633
- return int(args.func(args))
634
-
635
-
636
- if __name__ == "__main__":
637
- raise SystemExit(main())