@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,501 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Claude Code PreToolUse hook: wrap noisy Bash commands.
|
|
3
|
-
|
|
4
|
-
Reads hook JSON from stdin and prints a JSON response understood by Claude Code.
|
|
5
|
-
Install via `.claude/settings.json` hooks. Keep this script project-local during
|
|
6
|
-
experiments so it can be versioned and reviewed.
|
|
7
|
-
"""
|
|
8
|
-
from __future__ import annotations
|
|
9
|
-
|
|
10
|
-
import json
|
|
11
|
-
import os
|
|
12
|
-
import re
|
|
13
|
-
import shlex
|
|
14
|
-
import sys
|
|
15
|
-
|
|
16
|
-
# Reject actual shell control operators after shlex tokenization. Quoted search
|
|
17
|
-
# patterns such as `rg "token|password"` and `grep "^foo$"` are safe to wrap,
|
|
18
|
-
# but real pipes, redirects, command substitutions, and sequencing are not.
|
|
19
|
-
SHELL_OPERATOR_TOKENS = {";", ";;", ";&", ";;&", "&", "&&", "|", "||", "<", ">", "<<", ">>", "<>", "(", ")"}
|
|
20
|
-
SHELL_OPERATOR_CHARS = frozenset(";&|<>()")
|
|
21
|
-
ENV_ASSIGNMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_]*=.*")
|
|
22
|
-
WRAPPER_BASENAMES = frozenset({
|
|
23
|
-
"trim_command_output.py",
|
|
24
|
-
"context-guard-trim-output",
|
|
25
|
-
"claude-trim-output",
|
|
26
|
-
"sanitize_output.py",
|
|
27
|
-
"context-guard-sanitize-output",
|
|
28
|
-
"claude-sanitize-output",
|
|
29
|
-
})
|
|
30
|
-
FAIL_OPEN_ENV = "CONTEXT_GUARD_SANITIZER_FAIL_OPEN"
|
|
31
|
-
LEGACY_FAIL_OPEN_ENV = "CLAUDE_TOKEN_SANITIZER_FAIL_OPEN"
|
|
32
|
-
FAIL_OPEN_VALUES = {"1", "true", "yes", "on"}
|
|
33
|
-
UNPARSEABLE_SANITIZER_RISK_RE = re.compile(
|
|
34
|
-
r"(?i)(?:^|[\s;&|()])"
|
|
35
|
-
r"(?:rg|grep|egrep|fgrep|journalctl|kubectl|oc|docker|podman|docker-compose|git|find)"
|
|
36
|
-
r"(?:$|[\s;&|()])"
|
|
37
|
-
)
|
|
38
|
-
|
|
39
|
-
# kubectl/docker/podman/oc 글로벌 옵션 중 다음 토큰을 value로 소비하는 형태.
|
|
40
|
-
# `-n prod`, `--context=prod`, `-f file.yml` 같은 케이스를 hub로 흡수해
|
|
41
|
-
# `kubectl -n prod logs api`, `docker --context prod logs api`,
|
|
42
|
-
# `docker compose -f compose.yml logs web` 가 sanitize wrapper를 거치도록 한다.
|
|
43
|
-
_VALUE_TAKING_FLAGS = frozenset({
|
|
44
|
-
"-n", "--namespace",
|
|
45
|
-
"--context",
|
|
46
|
-
"--kubeconfig",
|
|
47
|
-
"--cluster",
|
|
48
|
-
"--user", "--token",
|
|
49
|
-
"--as", "--as-group",
|
|
50
|
-
"-s", "--server",
|
|
51
|
-
"-c",
|
|
52
|
-
"-H", "--host",
|
|
53
|
-
"--config",
|
|
54
|
-
"--log-level",
|
|
55
|
-
"-f", "--file",
|
|
56
|
-
"-p", "--project-name",
|
|
57
|
-
})
|
|
58
|
-
|
|
59
|
-
# find 가 단순 path listing 이 아니라 임의 명령 출력을 발생시킬 수 있는 액션.
|
|
60
|
-
# 이 액션들은 .env / 자격증명 파일 내용까지 노출 가능하므로 trim 대신 sanitize 로 라우팅한다.
|
|
61
|
-
_FIND_OUTPUT_RISK_ACTIONS = frozenset({
|
|
62
|
-
"-delete",
|
|
63
|
-
"-exec", "-execdir",
|
|
64
|
-
"-ok", "-okdir",
|
|
65
|
-
"-fprint", "-fprint0", "-fprintf", "-fls",
|
|
66
|
-
})
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def find_wrapper(kind: str) -> str | None:
|
|
70
|
-
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
71
|
-
if kind == "sanitize":
|
|
72
|
-
candidates = [
|
|
73
|
-
os.path.join(script_dir, "context-guard-sanitize-output"),
|
|
74
|
-
os.path.join(script_dir, "sanitize_output.py"),
|
|
75
|
-
]
|
|
76
|
-
else:
|
|
77
|
-
candidates = [
|
|
78
|
-
os.path.join(script_dir, "context-guard-trim-output"),
|
|
79
|
-
os.path.join(script_dir, "trim_command_output.py"),
|
|
80
|
-
]
|
|
81
|
-
for path in candidates:
|
|
82
|
-
if os.path.exists(path):
|
|
83
|
-
return path
|
|
84
|
-
return None
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
def fail_open_source_env() -> str | None:
|
|
88
|
-
canonical_value = os.environ.get(FAIL_OPEN_ENV)
|
|
89
|
-
if canonical_value is not None:
|
|
90
|
-
return FAIL_OPEN_ENV if canonical_value.strip().lower() in FAIL_OPEN_VALUES else None
|
|
91
|
-
if os.environ.get(LEGACY_FAIL_OPEN_ENV, "").strip().lower() in FAIL_OPEN_VALUES:
|
|
92
|
-
return LEGACY_FAIL_OPEN_ENV
|
|
93
|
-
return None
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
def fail_open_enabled() -> bool:
|
|
97
|
-
return fail_open_source_env() is not None
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
def print_noop() -> None:
|
|
101
|
-
print("{}")
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def deny(reason: str) -> None:
|
|
105
|
-
print(f"context-guard-rewrite-bash: {reason}", file=sys.stderr)
|
|
106
|
-
fail_open_env = fail_open_source_env()
|
|
107
|
-
if fail_open_env is not None:
|
|
108
|
-
print(
|
|
109
|
-
f"context-guard-rewrite-bash: {fail_open_env}=1 active; leaving command unchanged intentionally",
|
|
110
|
-
file=sys.stderr,
|
|
111
|
-
)
|
|
112
|
-
print_noop()
|
|
113
|
-
return
|
|
114
|
-
print(json.dumps({
|
|
115
|
-
"hookSpecificOutput": {
|
|
116
|
-
"hookEventName": "PreToolUse",
|
|
117
|
-
"permissionDecision": "deny",
|
|
118
|
-
"permissionDecisionReason": reason,
|
|
119
|
-
}
|
|
120
|
-
}, ensure_ascii=False))
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def unparseable_command_needs_sanitizer(command: str) -> bool:
|
|
124
|
-
"""Return True for shell-compound commands likely to print secret-bearing output."""
|
|
125
|
-
if not UNPARSEABLE_SANITIZER_RISK_RE.search(command):
|
|
126
|
-
return False
|
|
127
|
-
lowered = command.lower()
|
|
128
|
-
if re.search(r"(?:^|[\s;&|()])(?:rg|grep|egrep|fgrep)(?:$|[\s;&|()])", lowered):
|
|
129
|
-
return True
|
|
130
|
-
if re.search(r"(?:^|[\s;&|()])(?:journalctl|kubectl|oc|docker|podman|docker-compose)(?:$|[\s;&|()])", lowered):
|
|
131
|
-
return any(word in lowered for word in (" logs", " log ", "journalctl"))
|
|
132
|
-
if re.search(r"(?:^|[\s;&|()])git(?:$|[\s;&|()])", lowered):
|
|
133
|
-
return any(word in lowered for word in (" diff", " show", " grep", " log")) and (
|
|
134
|
-
" diff" in lowered or " show" in lowered or " grep" in lowered or " -p" in lowered or " --patch" in lowered
|
|
135
|
-
)
|
|
136
|
-
if re.search(r"(?:^|[\s;&|()])find(?:$|[\s;&|()])", lowered):
|
|
137
|
-
return any(action in lowered for action in (" -exec", " -execdir", " -ok", " -okdir", " -delete", " -fprint", " -fls"))
|
|
138
|
-
return False
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def split_single_safe_command(command: str) -> list[str] | None:
|
|
142
|
-
if not command.strip():
|
|
143
|
-
return None
|
|
144
|
-
if any(char in command for char in "\n\r\t"):
|
|
145
|
-
return None
|
|
146
|
-
try:
|
|
147
|
-
lexer = shlex.shlex(command, posix=True, punctuation_chars=True)
|
|
148
|
-
lexer.whitespace_split = True
|
|
149
|
-
argv = list(lexer)
|
|
150
|
-
except ValueError:
|
|
151
|
-
return None
|
|
152
|
-
if not argv:
|
|
153
|
-
return None
|
|
154
|
-
for token in argv:
|
|
155
|
-
if token in SHELL_OPERATOR_TOKENS or (
|
|
156
|
-
any(char in SHELL_OPERATOR_CHARS for char in token)
|
|
157
|
-
and all(char in SHELL_OPERATOR_CHARS for char in token)
|
|
158
|
-
):
|
|
159
|
-
return None
|
|
160
|
-
if any(char in token for char in "`\n\r\t"):
|
|
161
|
-
return None
|
|
162
|
-
if "$(" in token or "${" in token:
|
|
163
|
-
return None
|
|
164
|
-
return argv
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
def command_basename(command: str) -> str:
|
|
168
|
-
return os.path.basename(command)
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
def strip_env_prefix(argv: list[str]) -> list[str]:
|
|
172
|
-
"""Return the executable argv after leading `KEY=VALUE` or `env` wrappers."""
|
|
173
|
-
i = 0
|
|
174
|
-
while i < len(argv) and ENV_ASSIGNMENT_RE.match(argv[i]):
|
|
175
|
-
i += 1
|
|
176
|
-
if i < len(argv) and argv[i] == "env":
|
|
177
|
-
i += 1
|
|
178
|
-
while i < len(argv):
|
|
179
|
-
token = argv[i]
|
|
180
|
-
if token in {"-i", "--ignore-environment"}:
|
|
181
|
-
i += 1
|
|
182
|
-
continue
|
|
183
|
-
if token in {"-u", "--unset"} and i + 1 < len(argv):
|
|
184
|
-
i += 2
|
|
185
|
-
continue
|
|
186
|
-
if token.startswith("-u") and token != "-u":
|
|
187
|
-
i += 1
|
|
188
|
-
continue
|
|
189
|
-
if token.startswith("--unset="):
|
|
190
|
-
i += 1
|
|
191
|
-
continue
|
|
192
|
-
if token.startswith("-"):
|
|
193
|
-
i += 1
|
|
194
|
-
continue
|
|
195
|
-
if ENV_ASSIGNMENT_RE.match(token):
|
|
196
|
-
i += 1
|
|
197
|
-
continue
|
|
198
|
-
break
|
|
199
|
-
return argv[i:]
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
def npm_script_args(rest: list[str]) -> list[str]:
|
|
203
|
-
value_options = {"--prefix", "--workspace", "-w", "--filter", "--cwd", "-C"}
|
|
204
|
-
i = 0
|
|
205
|
-
while i < len(rest):
|
|
206
|
-
arg = rest[i]
|
|
207
|
-
if arg in value_options:
|
|
208
|
-
i += 2
|
|
209
|
-
continue
|
|
210
|
-
if arg.startswith("-"):
|
|
211
|
-
i += 1
|
|
212
|
-
continue
|
|
213
|
-
break
|
|
214
|
-
return rest[i:]
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
def is_noisy_command(argv: list[str]) -> bool:
|
|
218
|
-
argv = strip_env_prefix(argv)
|
|
219
|
-
if not argv:
|
|
220
|
-
return False
|
|
221
|
-
first = command_basename(argv[0])
|
|
222
|
-
rest = argv[1:]
|
|
223
|
-
|
|
224
|
-
if first in {"npm", "pnpm", "yarn", "bun"}:
|
|
225
|
-
script_args = npm_script_args(rest)
|
|
226
|
-
if not script_args:
|
|
227
|
-
return False
|
|
228
|
-
command = script_args[0]
|
|
229
|
-
if command == "test":
|
|
230
|
-
return True
|
|
231
|
-
if command in {"run", "run-script"} and len(script_args) > 1:
|
|
232
|
-
script = script_args[1]
|
|
233
|
-
return script == "build" or script == "lint" or script.startswith("test")
|
|
234
|
-
return command in {"build", "lint"}
|
|
235
|
-
if first in {"pytest", "tox", "jest", "vitest"}:
|
|
236
|
-
return True
|
|
237
|
-
if first == "npx" and any(arg in {"jest", "vitest"} for arg in rest):
|
|
238
|
-
return True
|
|
239
|
-
if re.fullmatch(r"python(?:\d+(?:\.\d+)?)?", first) and len(argv) > 2 and argv[1] == "-m" and argv[2] in {"pytest", "unittest"}:
|
|
240
|
-
return True
|
|
241
|
-
if first == "go" and "test" in rest:
|
|
242
|
-
return True
|
|
243
|
-
if first == "cargo" and "test" in rest:
|
|
244
|
-
return True
|
|
245
|
-
if first in {"mvn", "mvnw", "./mvnw"} and "test" in rest:
|
|
246
|
-
return True
|
|
247
|
-
if first in {"gradle", "gradlew", "./gradlew"} and "test" in rest:
|
|
248
|
-
return True
|
|
249
|
-
if first == "make" and any(arg in {"test", "build", "lint"} for arg in rest):
|
|
250
|
-
return True
|
|
251
|
-
return False
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
def _skip_leading_flags(rest: list[str]) -> list[str]:
|
|
255
|
-
"""rest 의 앞쪽 `-`/`--` 플래그(와 value-taking 플래그의 다음 토큰)를 건너뛴다.
|
|
256
|
-
|
|
257
|
-
value-taking flag 목록(`_VALUE_TAKING_FLAGS`)에 들지 않은 `-`-시작 토큰은 boolean
|
|
258
|
-
이라고 가정한다. 알 수 없는 value flag 는 매칭 누락으로 이어지지만, 그래도
|
|
259
|
-
upper layer 가 미가공 명령으로 떨어뜨리는 안전한 degrade 이므로 보수적으로 처리.
|
|
260
|
-
"""
|
|
261
|
-
i = 0
|
|
262
|
-
while i < len(rest):
|
|
263
|
-
token = rest[i]
|
|
264
|
-
if not token.startswith("-"):
|
|
265
|
-
break
|
|
266
|
-
if "=" in token:
|
|
267
|
-
i += 1
|
|
268
|
-
continue
|
|
269
|
-
if token in _VALUE_TAKING_FLAGS and i + 1 < len(rest):
|
|
270
|
-
i += 2
|
|
271
|
-
else:
|
|
272
|
-
i += 1
|
|
273
|
-
return rest[i:]
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
def is_dir_traversal_command(argv: list[str]) -> bool:
|
|
277
|
-
"""순수 path-listing 형태의 `find` / `tree` 만 trim wrapper 라우팅 대상.
|
|
278
|
-
|
|
279
|
-
`find` 가 `-exec` / `-delete` / `-fprint*` 등 임의 명령 출력을 만들어내는 액션을
|
|
280
|
-
포함하면 `.env` 같은 자격증명 내용을 흘릴 수 있으므로 본 함수는 False 를 반환하고,
|
|
281
|
-
`is_log_streaming_command` 가 sanitize 라우팅으로 대신 잡는다. `tree` 는 본질적으로
|
|
282
|
-
출력 형식이 fixed 이라 별도 분기가 없다.
|
|
283
|
-
"""
|
|
284
|
-
argv = strip_env_prefix(argv)
|
|
285
|
-
if not argv:
|
|
286
|
-
return False
|
|
287
|
-
first = command_basename(argv[0])
|
|
288
|
-
rest = argv[1:]
|
|
289
|
-
if first == "tree":
|
|
290
|
-
return True
|
|
291
|
-
if first == "find":
|
|
292
|
-
return not any(arg in _FIND_OUTPUT_RISK_ACTIONS for arg in rest)
|
|
293
|
-
if first == "fd":
|
|
294
|
-
return True
|
|
295
|
-
if first == "rg" and any(arg == "--files" for arg in rest):
|
|
296
|
-
return True
|
|
297
|
-
return False
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
def is_log_streaming_command(argv: list[str]) -> bool:
|
|
301
|
-
"""Production 로그 스트림 / 자격증명을 흘릴 수 있는 명령은 sanitize wrapper 로 라우팅.
|
|
302
|
-
|
|
303
|
-
대상:
|
|
304
|
-
- `kubectl logs` / `oc logs` / `podman logs`
|
|
305
|
-
- `docker logs` / `docker compose logs` / `docker stack logs` / `podman compose|stack logs`
|
|
306
|
-
- `docker-compose logs` (v1)
|
|
307
|
-
- `journalctl` (systemd 로그, secret bearing 가능)
|
|
308
|
-
- `find` 가 `-exec` / `-delete` / `-fprint` 같은 임의 출력 액션을 포함하는 형태
|
|
309
|
-
|
|
310
|
-
글로벌 옵션 (`-n prod`, `--context=stage`, `-f compose.yml`) 도 `_skip_leading_flags`
|
|
311
|
-
로 흡수한다. 한계: `kubectl exec ... -- cat /var/log/...` 같은 우회는 별도 룰이
|
|
312
|
-
필요하며 여기서는 처리하지 않는다.
|
|
313
|
-
"""
|
|
314
|
-
argv = strip_env_prefix(argv)
|
|
315
|
-
if not argv:
|
|
316
|
-
return False
|
|
317
|
-
first = command_basename(argv[0])
|
|
318
|
-
rest = argv[1:]
|
|
319
|
-
|
|
320
|
-
if first == "journalctl":
|
|
321
|
-
return True
|
|
322
|
-
if first == "find" and any(arg in _FIND_OUTPUT_RISK_ACTIONS for arg in rest):
|
|
323
|
-
return True
|
|
324
|
-
if first in {"kubectl", "oc"}:
|
|
325
|
-
rest = _skip_leading_flags(rest)
|
|
326
|
-
return bool(rest) and rest[0] == "logs"
|
|
327
|
-
if first == "docker-compose":
|
|
328
|
-
rest = _skip_leading_flags(rest)
|
|
329
|
-
return bool(rest) and rest[0] == "logs"
|
|
330
|
-
if first in {"docker", "podman"}:
|
|
331
|
-
rest = _skip_leading_flags(rest)
|
|
332
|
-
if not rest:
|
|
333
|
-
return False
|
|
334
|
-
sub = rest[0]
|
|
335
|
-
if sub == "logs":
|
|
336
|
-
return True
|
|
337
|
-
if sub in {"compose", "stack"}:
|
|
338
|
-
rest = _skip_leading_flags(rest[1:])
|
|
339
|
-
return bool(rest) and rest[0] == "logs"
|
|
340
|
-
return False
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
def is_already_wrapped(argv: list[str]) -> bool:
|
|
344
|
-
"""argv 가 이미 trim/sanitize wrapper 호출이면 True.
|
|
345
|
-
|
|
346
|
-
bare 호출 (`context-guard-trim-output ...`), python wrapper 호출
|
|
347
|
-
(`python3 .../trim_command_output.py ...`), 절대경로 호출 모두 흡수한다.
|
|
348
|
-
명령 raw 문자열에 substring 검색을 하면 컨테이너 이름이 우연히
|
|
349
|
-
`context-guard-sanitize-output` 같으면 false-bypass 되므로 argv 기반으로 판단한다.
|
|
350
|
-
"""
|
|
351
|
-
argv = strip_env_prefix(argv)
|
|
352
|
-
if not argv:
|
|
353
|
-
return False
|
|
354
|
-
head = argv[0]
|
|
355
|
-
if re.fullmatch(r"python(?:\d+(?:\.\d+)?)?", os.path.basename(head)) and len(argv) > 1:
|
|
356
|
-
head = argv[1]
|
|
357
|
-
return os.path.basename(head) in WRAPPER_BASENAMES
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
def is_sanitizable_output_command(argv: list[str]) -> bool:
|
|
361
|
-
argv = strip_env_prefix(argv)
|
|
362
|
-
if not argv:
|
|
363
|
-
return False
|
|
364
|
-
first = command_basename(argv[0])
|
|
365
|
-
rest = argv[1:]
|
|
366
|
-
|
|
367
|
-
if first in {"rg", "grep", "egrep", "fgrep"}:
|
|
368
|
-
# `rg --files` is path listing rather than content search; the large
|
|
369
|
-
# read/diet guards are better fits there.
|
|
370
|
-
return not any(arg == "--files" for arg in rest)
|
|
371
|
-
if first == "git" and rest:
|
|
372
|
-
rest = git_subcommand_args(rest)
|
|
373
|
-
if not rest:
|
|
374
|
-
return False
|
|
375
|
-
subcommand = rest[0]
|
|
376
|
-
if subcommand == "grep":
|
|
377
|
-
return True
|
|
378
|
-
if subcommand in {"diff", "show"}:
|
|
379
|
-
return True
|
|
380
|
-
if subcommand == "log" and any(arg == "-p" or arg.startswith("--patch") for arg in rest[1:]):
|
|
381
|
-
return True
|
|
382
|
-
return False
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
def git_subcommand_args(rest: list[str]) -> list[str]:
|
|
386
|
-
value_options = {"-C", "-c", "--git-dir", "--work-tree", "--namespace", "--exec-path", "--config-env"}
|
|
387
|
-
i = 0
|
|
388
|
-
while i < len(rest):
|
|
389
|
-
token = rest[i]
|
|
390
|
-
if token == "--":
|
|
391
|
-
return rest[i + 1:]
|
|
392
|
-
if token in value_options and i + 1 < len(rest):
|
|
393
|
-
i += 2
|
|
394
|
-
continue
|
|
395
|
-
if any(token.startswith(prefix + "=") for prefix in value_options if prefix.startswith("--")):
|
|
396
|
-
i += 1
|
|
397
|
-
continue
|
|
398
|
-
if token in {"--no-pager", "--paginate", "--bare", "--literal-pathspecs", "--no-optional-locks"}:
|
|
399
|
-
i += 1
|
|
400
|
-
continue
|
|
401
|
-
if token.startswith("-"):
|
|
402
|
-
i += 1
|
|
403
|
-
continue
|
|
404
|
-
break
|
|
405
|
-
return rest[i:]
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
def build_wrapped_command(wrapper: str, command: str) -> str:
|
|
409
|
-
if wrapper.endswith(".py"):
|
|
410
|
-
prefix = ["python3", wrapper]
|
|
411
|
-
else:
|
|
412
|
-
prefix = [wrapper]
|
|
413
|
-
wrapped_argv = prefix + ["--max-lines", "220", "--", "bash", "-lc", command]
|
|
414
|
-
return shlex.join(wrapped_argv)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
def build_sanitized_command(wrapper: str, command: str) -> str:
|
|
418
|
-
if wrapper.endswith(".py"):
|
|
419
|
-
prefix = ["python3", wrapper]
|
|
420
|
-
else:
|
|
421
|
-
prefix = [wrapper]
|
|
422
|
-
wrapped_argv = prefix + ["--max-lines", "220", "--", "bash", "-lc", command]
|
|
423
|
-
return shlex.join(wrapped_argv)
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
def main() -> int:
|
|
427
|
-
try:
|
|
428
|
-
payload = json.load(sys.stdin)
|
|
429
|
-
except json.JSONDecodeError as exc:
|
|
430
|
-
print(f"context-guard-rewrite-bash: invalid hook JSON: {exc}", file=sys.stderr)
|
|
431
|
-
print("{}")
|
|
432
|
-
return 0
|
|
433
|
-
|
|
434
|
-
if not isinstance(payload, dict):
|
|
435
|
-
print("{}")
|
|
436
|
-
return 0
|
|
437
|
-
tool_input = payload.get("tool_input") or payload.get("toolInput") or {}
|
|
438
|
-
if not isinstance(tool_input, dict):
|
|
439
|
-
print("{}")
|
|
440
|
-
return 0
|
|
441
|
-
command = tool_input.get("command") or ""
|
|
442
|
-
|
|
443
|
-
if not command:
|
|
444
|
-
print("{}")
|
|
445
|
-
return 0
|
|
446
|
-
|
|
447
|
-
argv = split_single_safe_command(command)
|
|
448
|
-
if not argv:
|
|
449
|
-
if unparseable_command_needs_sanitizer(command):
|
|
450
|
-
deny(
|
|
451
|
-
"Search/diff/log command contains shell operators that cannot be safely rewritten. "
|
|
452
|
-
"Run the command through context-guard-sanitize-output explicitly, simplify it, or set "
|
|
453
|
-
f"{FAIL_OPEN_ENV}=1 to run unsanitized intentionally."
|
|
454
|
-
)
|
|
455
|
-
return 0
|
|
456
|
-
print_noop()
|
|
457
|
-
return 0
|
|
458
|
-
|
|
459
|
-
# argv 기반으로 이미 wrap 된 명령인지 검사한다. 단순 substring 매칭은 컨테이너명 등이
|
|
460
|
-
# 우연히 wrapper 이름과 일치할 때 false-bypass 를 일으킬 수 있다.
|
|
461
|
-
if is_already_wrapped(argv):
|
|
462
|
-
print("{}")
|
|
463
|
-
return 0
|
|
464
|
-
|
|
465
|
-
if is_noisy_command(argv) or is_dir_traversal_command(argv):
|
|
466
|
-
wrapper = find_wrapper("trim")
|
|
467
|
-
if wrapper is None:
|
|
468
|
-
deny(
|
|
469
|
-
"Noisy command blocked because context-guard-trim-output is not installed next to "
|
|
470
|
-
"context-guard-rewrite-bash. Install the trim wrapper or set "
|
|
471
|
-
f"{FAIL_OPEN_ENV}=1 to run untrimmed intentionally."
|
|
472
|
-
)
|
|
473
|
-
return 0
|
|
474
|
-
wrapped = build_wrapped_command(wrapper, command)
|
|
475
|
-
elif is_sanitizable_output_command(argv) or is_log_streaming_command(argv):
|
|
476
|
-
wrapper = find_wrapper("sanitize")
|
|
477
|
-
if wrapper is None:
|
|
478
|
-
reason = (
|
|
479
|
-
"Search/diff command blocked because context-guard-sanitize-output is not installed next to "
|
|
480
|
-
"context-guard-rewrite-bash. Install the sanitizer or set "
|
|
481
|
-
f"{FAIL_OPEN_ENV}=1 to run unsanitized intentionally."
|
|
482
|
-
)
|
|
483
|
-
deny(reason)
|
|
484
|
-
return 0
|
|
485
|
-
wrapped = build_sanitized_command(wrapper, command)
|
|
486
|
-
else:
|
|
487
|
-
print("{}")
|
|
488
|
-
return 0
|
|
489
|
-
|
|
490
|
-
response = {
|
|
491
|
-
"hookSpecificOutput": {
|
|
492
|
-
"hookEventName": "PreToolUse",
|
|
493
|
-
"updatedInput": {"command": wrapped},
|
|
494
|
-
}
|
|
495
|
-
}
|
|
496
|
-
print(json.dumps(response, ensure_ascii=False))
|
|
497
|
-
return 0
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
if __name__ == "__main__":
|
|
501
|
-
raise SystemExit(main())
|