@ictechgy/context-guard 0.4.8 → 0.4.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/README.ko.md +92 -37
- package/README.md +111 -37
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +8 -1
- package/package.json +3 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +9 -6
- package/plugins/context-guard/README.md +27 -12
- package/plugins/context-guard/bin/context-guard +113 -26
- package/plugins/context-guard/bin/context-guard-artifact +542 -46
- package/plugins/context-guard/bin/context-guard-cache-score +380 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +783 -4
- package/plugins/context-guard/bin/context-guard-experiments +2211 -121
- package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +602 -43
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
- package/plugins/context-guard/lib/context_guard_commands.py +206 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -2339
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -1,690 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env python3
|
|
2
|
-
"""Claude Code PreToolUse hook: block large whole-file Read calls.
|
|
3
|
-
|
|
4
|
-
The hook nudges Claude toward symbol-scoped reads before a huge file is inserted
|
|
5
|
-
into the conversation. It is opt-in through project settings and can be disabled
|
|
6
|
-
with CONTEXT_GUARD_READ_GUARD=0. Legacy CLAUDE_TOKEN_* environment variables
|
|
7
|
-
remain supported for existing project settings.
|
|
8
|
-
"""
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import errno
|
|
12
|
-
import hashlib
|
|
13
|
-
import importlib.util
|
|
14
|
-
import json
|
|
15
|
-
import os
|
|
16
|
-
import re
|
|
17
|
-
import secrets
|
|
18
|
-
import shlex
|
|
19
|
-
import stat
|
|
20
|
-
import sys
|
|
21
|
-
from pathlib import Path
|
|
22
|
-
from typing import Any
|
|
23
|
-
|
|
24
|
-
SCRIPT_DIR = Path(__file__).resolve().parent
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
def _load_hook_secret_patterns():
|
|
28
|
-
searched = []
|
|
29
|
-
for helper_dir in (SCRIPT_DIR, SCRIPT_DIR.parent / "lib"):
|
|
30
|
-
helper_path = helper_dir / "hook_secret_patterns.py"
|
|
31
|
-
searched.append(str(helper_path))
|
|
32
|
-
if not helper_path.is_file():
|
|
33
|
-
continue
|
|
34
|
-
spec = importlib.util.spec_from_file_location("_claude_token_hook_secret_patterns", helper_path)
|
|
35
|
-
if spec is None or spec.loader is None:
|
|
36
|
-
continue
|
|
37
|
-
module = importlib.util.module_from_spec(spec)
|
|
38
|
-
spec.loader.exec_module(module)
|
|
39
|
-
return module
|
|
40
|
-
raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
_hook_secret_patterns = _load_hook_secret_patterns()
|
|
44
|
-
CONTROL_CHAR_RE = _hook_secret_patterns.CONTROL_CHAR_RE
|
|
45
|
-
hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
|
|
46
|
-
|
|
47
|
-
DEFAULT_MAX_BYTES = 48_000
|
|
48
|
-
DEFAULT_MAX_LINE_RANGE = 400
|
|
49
|
-
MAX_BYTES_LIMIT = 1_000_000
|
|
50
|
-
MAX_LINE_RANGE_LIMIT = 20_000
|
|
51
|
-
OUTLINE_MAX_BYTES = 200_000
|
|
52
|
-
OUTLINE_MAX_ITEMS = 12
|
|
53
|
-
READ_GUARD_STATE_DIR = Path(".context-guard")
|
|
54
|
-
READ_GUARD_STATE_FILE = "read-guard-cache.json"
|
|
55
|
-
READ_GUARD_STATE_MAX_ITEMS = 20
|
|
56
|
-
GUARD_ENV = "CONTEXT_GUARD_READ_GUARD"
|
|
57
|
-
LEGACY_GUARD_ENV = "CLAUDE_TOKEN_READ_GUARD"
|
|
58
|
-
MAX_BYTES_ENV = "CONTEXT_GUARD_READ_GUARD_MAX_BYTES"
|
|
59
|
-
LEGACY_MAX_BYTES_ENV = "CLAUDE_TOKEN_READ_GUARD_MAX_BYTES"
|
|
60
|
-
MAX_LINE_RANGE_ENV = "CONTEXT_GUARD_READ_GUARD_MAX_LINES"
|
|
61
|
-
LEGACY_MAX_LINE_RANGE_ENV = "CLAUDE_TOKEN_READ_GUARD_MAX_LINES"
|
|
62
|
-
PATH_LABEL_MAX_CHARS = 160
|
|
63
|
-
ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
64
|
-
"tmp": Path("/private/tmp"),
|
|
65
|
-
"var": Path("/private/var"),
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def truthy_disabled(value: str | None) -> bool:
|
|
70
|
-
return str(value or "").strip().lower() in {"0", "false", "no", "off", "disabled"}
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
def env_value(name: str, legacy_name: str | None = None) -> str | None:
|
|
74
|
-
value = os.environ.get(name)
|
|
75
|
-
if value is not None or legacy_name is None:
|
|
76
|
-
return value
|
|
77
|
-
return os.environ.get(legacy_name)
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
def bounded_env_int(name: str, legacy_name: str | None, default: int, minimum: int, maximum: int) -> int:
|
|
81
|
-
raw = env_value(name, legacy_name)
|
|
82
|
-
if not raw:
|
|
83
|
-
return default
|
|
84
|
-
try:
|
|
85
|
-
number = int(raw)
|
|
86
|
-
except (TypeError, ValueError, OverflowError):
|
|
87
|
-
return default
|
|
88
|
-
return min(max(number, minimum), maximum)
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
92
|
-
try:
|
|
93
|
-
number = int(value)
|
|
94
|
-
except (TypeError, ValueError, OverflowError):
|
|
95
|
-
return default
|
|
96
|
-
return min(max(number, minimum), maximum)
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
def max_bytes() -> int:
|
|
100
|
-
return bounded_env_int(MAX_BYTES_ENV, LEGACY_MAX_BYTES_ENV, DEFAULT_MAX_BYTES, 1, MAX_BYTES_LIMIT)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
def max_line_range() -> int:
|
|
104
|
-
return bounded_env_int(
|
|
105
|
-
MAX_LINE_RANGE_ENV,
|
|
106
|
-
LEGACY_MAX_LINE_RANGE_ENV,
|
|
107
|
-
DEFAULT_MAX_LINE_RANGE,
|
|
108
|
-
1,
|
|
109
|
-
MAX_LINE_RANGE_LIMIT,
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def tool_input(payload: dict[str, Any]) -> dict[str, Any]:
|
|
114
|
-
value = payload.get("tool_input") or payload.get("toolInput") or {}
|
|
115
|
-
return value if isinstance(value, dict) else {}
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
def read_path_from_payload(payload: dict[str, Any]) -> str:
|
|
119
|
-
data = tool_input(payload)
|
|
120
|
-
for key in ("file_path", "path", "filePath"):
|
|
121
|
-
value = data.get(key)
|
|
122
|
-
if isinstance(value, str) and value.strip():
|
|
123
|
-
return value
|
|
124
|
-
return ""
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
def tool_name(payload: dict[str, Any]) -> str:
|
|
128
|
-
value = payload.get("tool_name") or payload.get("toolName") or ""
|
|
129
|
-
return value if isinstance(value, str) else ""
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
def compact_hook_text(value: str, limit: int = PATH_LABEL_MAX_CHARS) -> str:
|
|
133
|
-
compact = " ".join(CONTROL_CHAR_RE.sub(" ", value.strip()).split())
|
|
134
|
-
if len(compact) > limit:
|
|
135
|
-
compact = compact[: limit - 15].rstrip() + "...[truncated]"
|
|
136
|
-
return compact
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
def anonymized_path_label(path: Path) -> str:
|
|
140
|
-
try:
|
|
141
|
-
raw = str(path.resolve())
|
|
142
|
-
except OSError:
|
|
143
|
-
raw = str(path)
|
|
144
|
-
digest = hashlib.sha256(raw.encode("utf-8", "replace")).hexdigest()[:12]
|
|
145
|
-
return f"redacted-path#path:{digest}"
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
def bounded_line_range_requested(payload: dict[str, Any]) -> bool:
|
|
149
|
-
data = tool_input(payload)
|
|
150
|
-
raw_limit = data.get("limit")
|
|
151
|
-
if raw_limit is None:
|
|
152
|
-
return False
|
|
153
|
-
try:
|
|
154
|
-
limit = int(raw_limit)
|
|
155
|
-
except (TypeError, ValueError):
|
|
156
|
-
return False
|
|
157
|
-
if limit <= 0 or limit > max_line_range():
|
|
158
|
-
return False
|
|
159
|
-
raw_offset = data.get("offset")
|
|
160
|
-
if raw_offset is not None:
|
|
161
|
-
try:
|
|
162
|
-
if int(raw_offset) < 0:
|
|
163
|
-
return False
|
|
164
|
-
except (TypeError, ValueError):
|
|
165
|
-
return False
|
|
166
|
-
return True
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
def safe_label(path: Path, root: Path) -> str:
|
|
170
|
-
try:
|
|
171
|
-
resolved = path.resolve()
|
|
172
|
-
except OSError:
|
|
173
|
-
resolved = path
|
|
174
|
-
try:
|
|
175
|
-
label = resolved.relative_to(root.resolve()).as_posix()
|
|
176
|
-
except ValueError:
|
|
177
|
-
try:
|
|
178
|
-
raw = str(resolved)
|
|
179
|
-
except OSError:
|
|
180
|
-
raw = str(path)
|
|
181
|
-
digest = hashlib.sha256(raw.encode("utf-8", "replace")).hexdigest()[:12]
|
|
182
|
-
name = path.name or "path"
|
|
183
|
-
if hook_label_has_sensitive_evidence(name):
|
|
184
|
-
name = "redacted-path"
|
|
185
|
-
else:
|
|
186
|
-
name = compact_hook_text(name)
|
|
187
|
-
return f"{name or 'path'}#path:{digest}"
|
|
188
|
-
if hook_label_has_sensitive_evidence(label):
|
|
189
|
-
return anonymized_path_label(resolved)
|
|
190
|
-
return compact_hook_text(label) or "path"
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def has_symlink_component(path: Path) -> bool:
|
|
194
|
-
"""Return True when a requested project path traverses a symlink."""
|
|
195
|
-
if path.is_symlink():
|
|
196
|
-
return True
|
|
197
|
-
current = Path(path.anchor) if path.is_absolute() else Path()
|
|
198
|
-
for part in path.parts:
|
|
199
|
-
if path.is_absolute() and part == path.anchor:
|
|
200
|
-
continue
|
|
201
|
-
current = current / part
|
|
202
|
-
if current.is_symlink():
|
|
203
|
-
return True
|
|
204
|
-
return False
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
def base_open_flags() -> int:
|
|
208
|
-
flags = os.O_RDONLY
|
|
209
|
-
for optional_flag in ("O_CLOEXEC", "O_NONBLOCK"):
|
|
210
|
-
flags |= getattr(os, optional_flag, 0)
|
|
211
|
-
return flags
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
def no_follow_flag() -> int:
|
|
215
|
-
return getattr(os, "O_NOFOLLOW", 0)
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
def directory_flag() -> int:
|
|
219
|
-
return getattr(os, "O_DIRECTORY", 0)
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
def normalized_link_target(parent: Path, raw_target: str) -> Path:
|
|
223
|
-
target = Path(raw_target)
|
|
224
|
-
if not target.is_absolute():
|
|
225
|
-
target = parent / target
|
|
226
|
-
return Path(os.path.normpath(str(target)))
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
|
|
230
|
-
"""Rewrite narrow platform-owned absolute aliases before no-follow traversal."""
|
|
231
|
-
if not path.is_absolute() or len(path.parts) < 2:
|
|
232
|
-
return path
|
|
233
|
-
first = path.parts[1]
|
|
234
|
-
expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(first)
|
|
235
|
-
if expected is None:
|
|
236
|
-
return path
|
|
237
|
-
link = Path(path.anchor) / first
|
|
238
|
-
try:
|
|
239
|
-
if not stat.S_ISLNK(os.lstat(link).st_mode):
|
|
240
|
-
return path
|
|
241
|
-
if normalized_link_target(Path(path.anchor), os.readlink(link)) != expected:
|
|
242
|
-
return path
|
|
243
|
-
except OSError:
|
|
244
|
-
return path
|
|
245
|
-
return expected.joinpath(*path.parts[2:])
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
def open_directory_at(parent_fd: int, component: str, full_path: Path) -> int:
|
|
249
|
-
component_stat = lstat_at_no_follow(parent_fd, component)
|
|
250
|
-
if component_stat is not None:
|
|
251
|
-
if stat.S_ISLNK(component_stat.st_mode):
|
|
252
|
-
raise OSError(errno.ELOOP, "path component must not be a symlink", str(full_path))
|
|
253
|
-
if not stat.S_ISDIR(component_stat.st_mode):
|
|
254
|
-
raise OSError(errno.ENOTDIR, "path component is not a directory", str(full_path))
|
|
255
|
-
try:
|
|
256
|
-
fd = os.open(component, base_open_flags() | directory_flag() | no_follow_flag(), dir_fd=parent_fd)
|
|
257
|
-
except OSError as exc:
|
|
258
|
-
if component_stat is not None and exc.errno in {errno.ELOOP, errno.ENOTDIR, errno.ENOENT, errno.EINVAL}:
|
|
259
|
-
raise OSError(errno.ELOOP, "path component changed while opening", str(full_path)) from exc
|
|
260
|
-
raise
|
|
261
|
-
try:
|
|
262
|
-
opened = os.fstat(fd)
|
|
263
|
-
if component_stat is not None:
|
|
264
|
-
if not stat.S_ISDIR(opened.st_mode) or not os.path.samestat(component_stat, opened):
|
|
265
|
-
raise OSError(errno.ELOOP, "path component changed while opening", str(full_path))
|
|
266
|
-
elif not stat.S_ISDIR(opened.st_mode):
|
|
267
|
-
raise OSError(errno.ENOTDIR, "path component is not a directory", str(full_path))
|
|
268
|
-
return fd
|
|
269
|
-
except Exception:
|
|
270
|
-
os.close(fd)
|
|
271
|
-
raise
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
def lstat_no_symlink_components(path: Path) -> os.stat_result:
|
|
275
|
-
"""lstat each path component and reject any symlink traversal."""
|
|
276
|
-
components = list(path.parts)
|
|
277
|
-
if path.is_absolute() and components:
|
|
278
|
-
components = components[1:]
|
|
279
|
-
if not components:
|
|
280
|
-
raise OSError(errno.EINVAL, "requested path is not a regular file", str(path))
|
|
281
|
-
|
|
282
|
-
current = Path(path.anchor) if path.is_absolute() else Path()
|
|
283
|
-
last_stat = None
|
|
284
|
-
for index, component in enumerate(components):
|
|
285
|
-
current = current / component
|
|
286
|
-
current_stat = current.lstat()
|
|
287
|
-
if stat.S_ISLNK(current_stat.st_mode):
|
|
288
|
-
raise OSError(errno.ELOOP, "requested path must not traverse symlinks", str(path))
|
|
289
|
-
if index < len(components) - 1 and not stat.S_ISDIR(current_stat.st_mode):
|
|
290
|
-
raise OSError(errno.ENOTDIR, "path component is not a directory", str(path))
|
|
291
|
-
last_stat = current_stat
|
|
292
|
-
assert last_stat is not None
|
|
293
|
-
return last_stat
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
def lstat_at_no_follow(dir_fd: int, component: str) -> os.stat_result | None:
|
|
297
|
-
if os.stat not in getattr(os, "supports_dir_fd", set()):
|
|
298
|
-
return None
|
|
299
|
-
if os.stat not in getattr(os, "supports_follow_symlinks", set()):
|
|
300
|
-
return None
|
|
301
|
-
return os.stat(component, dir_fd=dir_fd, follow_symlinks=False)
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
def open_regular_no_symlink(path: Path) -> int:
|
|
305
|
-
"""Open a regular file after no-follow traversal of every path component."""
|
|
306
|
-
path = normalize_allowed_first_absolute_symlink(path)
|
|
307
|
-
if os.open not in getattr(os, "supports_dir_fd", set()):
|
|
308
|
-
before = lstat_no_symlink_components(path)
|
|
309
|
-
if not stat.S_ISREG(before.st_mode):
|
|
310
|
-
raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
|
|
311
|
-
flags = base_open_flags() | no_follow_flag()
|
|
312
|
-
fd = os.open(path, flags)
|
|
313
|
-
try:
|
|
314
|
-
opened = os.fstat(fd)
|
|
315
|
-
if not stat.S_ISREG(opened.st_mode) or not os.path.samestat(before, opened):
|
|
316
|
-
raise OSError(errno.ELOOP, "requested path changed while opening", str(path))
|
|
317
|
-
return fd
|
|
318
|
-
except Exception:
|
|
319
|
-
os.close(fd)
|
|
320
|
-
raise
|
|
321
|
-
|
|
322
|
-
components = list(path.parts)
|
|
323
|
-
if path.is_absolute() and components:
|
|
324
|
-
components = components[1:]
|
|
325
|
-
if not components:
|
|
326
|
-
raise OSError(errno.EINVAL, "requested path is not a regular file", str(path))
|
|
327
|
-
root = path.anchor if path.is_absolute() else "."
|
|
328
|
-
dir_fd = os.open(root or ".", base_open_flags() | directory_flag())
|
|
329
|
-
try:
|
|
330
|
-
for component in components[:-1]:
|
|
331
|
-
next_fd = open_directory_at(dir_fd, component, path)
|
|
332
|
-
os.close(dir_fd)
|
|
333
|
-
dir_fd = next_fd
|
|
334
|
-
before = lstat_at_no_follow(dir_fd, components[-1])
|
|
335
|
-
if before is not None:
|
|
336
|
-
if stat.S_ISLNK(before.st_mode):
|
|
337
|
-
raise OSError(errno.ELOOP, "requested path must not be a symlink", str(path))
|
|
338
|
-
if not stat.S_ISREG(before.st_mode):
|
|
339
|
-
raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
|
|
340
|
-
fd = os.open(components[-1], base_open_flags() | no_follow_flag(), dir_fd=dir_fd)
|
|
341
|
-
try:
|
|
342
|
-
st = os.fstat(fd)
|
|
343
|
-
if before is not None:
|
|
344
|
-
if not stat.S_ISREG(st.st_mode) or not os.path.samestat(before, st):
|
|
345
|
-
raise OSError(errno.ELOOP, "requested path changed while opening", str(path))
|
|
346
|
-
elif not stat.S_ISREG(st.st_mode):
|
|
347
|
-
raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
|
|
348
|
-
return fd
|
|
349
|
-
except Exception:
|
|
350
|
-
os.close(fd)
|
|
351
|
-
raise
|
|
352
|
-
finally:
|
|
353
|
-
os.close(dir_fd)
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
def regular_file_size_no_symlink(path: Path) -> int:
|
|
357
|
-
"""Return size for a regular file opened without following symlinks."""
|
|
358
|
-
fd = open_regular_no_symlink(path)
|
|
359
|
-
try:
|
|
360
|
-
return os.fstat(fd).st_size
|
|
361
|
-
finally:
|
|
362
|
-
os.close(fd)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
def find_read_symbol_command() -> str:
|
|
366
|
-
script_dir = Path(__file__).resolve().parent
|
|
367
|
-
if (script_dir / "context-guard-read-symbol").exists():
|
|
368
|
-
return "context-guard-read-symbol"
|
|
369
|
-
if (script_dir / "read_symbol.py").exists():
|
|
370
|
-
return "python3 context-guard-kit/read_symbol.py"
|
|
371
|
-
return "context-guard-read-symbol"
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
def suggested_commands(label: str, read_symbol: str) -> tuple[str, str]:
|
|
375
|
-
rg_cmd = shlex.join(["rg", "-n", "<symbol-or-error>", "--", label])
|
|
376
|
-
read_parts = shlex.split(read_symbol) + [label, "<SymbolName>"]
|
|
377
|
-
return rg_cmd, shlex.join(read_parts)
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
def read_prefix_for_outline(path: Path, max_bytes: int = OUTLINE_MAX_BYTES) -> tuple[str, bool]:
|
|
381
|
-
try:
|
|
382
|
-
fd = open_regular_no_symlink(path)
|
|
383
|
-
with os.fdopen(fd, "rb") as handle:
|
|
384
|
-
fd = -1
|
|
385
|
-
data = handle.read(max_bytes + 1)
|
|
386
|
-
except OSError:
|
|
387
|
-
return "", False
|
|
388
|
-
finally:
|
|
389
|
-
if "fd" in locals() and fd != -1:
|
|
390
|
-
os.close(fd)
|
|
391
|
-
truncated = len(data) > max_bytes
|
|
392
|
-
if truncated:
|
|
393
|
-
data = data[:max_bytes]
|
|
394
|
-
return data.decode("utf-8", errors="replace"), truncated
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
def outline_kind_for_suffix(path: Path) -> str:
|
|
398
|
-
suffix = path.suffix.lower()
|
|
399
|
-
if suffix == ".py":
|
|
400
|
-
return "python"
|
|
401
|
-
if suffix in {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}:
|
|
402
|
-
return "javascript"
|
|
403
|
-
if suffix == ".go":
|
|
404
|
-
return "go"
|
|
405
|
-
if suffix == ".rs":
|
|
406
|
-
return "rust"
|
|
407
|
-
if suffix in {".md", ".mdx", ".markdown"}:
|
|
408
|
-
return "markdown"
|
|
409
|
-
return "text"
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
OUTLINE_PATTERNS: dict[str, tuple[tuple[str, str], ...]] = {
|
|
413
|
-
"python": (
|
|
414
|
-
("class", r"^class\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
415
|
-
("function", r"^(?:async\s+def|def)\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
416
|
-
),
|
|
417
|
-
"javascript": (
|
|
418
|
-
("class", r"^(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)\b"),
|
|
419
|
-
(
|
|
420
|
-
"function",
|
|
421
|
-
r"^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\b",
|
|
422
|
-
),
|
|
423
|
-
(
|
|
424
|
-
"const",
|
|
425
|
-
r"^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
|
|
426
|
-
),
|
|
427
|
-
),
|
|
428
|
-
"go": (
|
|
429
|
-
("function", r"^func\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
430
|
-
("type", r"^type\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
431
|
-
),
|
|
432
|
-
"rust": (
|
|
433
|
-
("function", r"^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
434
|
-
("type", r"^(?:pub\s+)?(?:struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
|
|
435
|
-
),
|
|
436
|
-
"markdown": (
|
|
437
|
-
("heading", r"^(#{1,3})\s+(.+?)\s*$"),
|
|
438
|
-
),
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
def outline_items(path: Path, text: str, *, limit: int = OUTLINE_MAX_ITEMS) -> list[str]:
|
|
443
|
-
kind = outline_kind_for_suffix(path)
|
|
444
|
-
patterns = [(label, pattern) for label, pattern in OUTLINE_PATTERNS.get(kind, ())]
|
|
445
|
-
if not patterns:
|
|
446
|
-
return []
|
|
447
|
-
compiled = [(label, re.compile(pattern)) for label, pattern in patterns]
|
|
448
|
-
items: list[str] = []
|
|
449
|
-
for line_number, line in enumerate(text.splitlines(), start=1):
|
|
450
|
-
stripped = line.strip()
|
|
451
|
-
if not stripped:
|
|
452
|
-
continue
|
|
453
|
-
if kind != "markdown" and line[:1].isspace():
|
|
454
|
-
continue
|
|
455
|
-
for label, pattern in compiled:
|
|
456
|
-
match = pattern.match(stripped)
|
|
457
|
-
if not match:
|
|
458
|
-
continue
|
|
459
|
-
name = "<heading>" if kind == "markdown" else match.group(1)
|
|
460
|
-
items.append(f"line {line_number}: {label} {compact_hook_text(name, 80)}")
|
|
461
|
-
break
|
|
462
|
-
if len(items) >= limit:
|
|
463
|
-
break
|
|
464
|
-
return items
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
def line_estimate(prefix: str, size: int, truncated: bool) -> str:
|
|
468
|
-
lines = prefix.count("\n") + (1 if prefix and not prefix.endswith("\n") else 0)
|
|
469
|
-
if not truncated or not prefix:
|
|
470
|
-
return str(lines)
|
|
471
|
-
avg = max(1.0, len(prefix.encode("utf-8", errors="replace")) / max(1, lines))
|
|
472
|
-
estimated = int(size / avg)
|
|
473
|
-
return f"~{estimated} (estimated from first {lines})"
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
def progressive_read_ladder(path: Path, label: str, size: int, limit: int, read_symbol: str) -> str:
|
|
477
|
-
prefix, prefix_truncated = read_prefix_for_outline(path)
|
|
478
|
-
items = outline_items(path, prefix)
|
|
479
|
-
rg_cmd, symbol_cmd = suggested_commands(label, read_symbol)
|
|
480
|
-
range_limit = min(max_line_range(), 120)
|
|
481
|
-
parts = [
|
|
482
|
-
f"[context-guard-kit] Large Read blocked for {label} ({size} bytes > {limit} byte guard).",
|
|
483
|
-
"Progressive read ladder:",
|
|
484
|
-
f"1) Search names/errors: `{rg_cmd}`",
|
|
485
|
-
]
|
|
486
|
-
if items:
|
|
487
|
-
first_name = items[0].split(" ", 3)[-1].split(" ", 1)[-1]
|
|
488
|
-
read_parts = shlex.split(read_symbol) + [label, first_name]
|
|
489
|
-
parts.append(f"2) Read a symbol slice: `{shlex.join(read_parts)}` (or `{symbol_cmd}`)")
|
|
490
|
-
else:
|
|
491
|
-
parts.append(f"2) Read a symbol slice when you know the name: `{symbol_cmd}`")
|
|
492
|
-
parts.append("Plugin installs can use `context-guard-read-symbol` directly.")
|
|
493
|
-
parts.append(f"3) If no symbol fits, use Read with offset=0 limit={range_limit} and then narrow further.")
|
|
494
|
-
parts.append(f"File outline: estimated_lines={line_estimate(prefix, size, prefix_truncated)}")
|
|
495
|
-
if items:
|
|
496
|
-
parts.append("Top-level outline: " + "; ".join(items))
|
|
497
|
-
else:
|
|
498
|
-
parts.append("Top-level outline: unavailable from the bounded prefix; search first.")
|
|
499
|
-
parts.append("Use full-file Read only after these smaller queries fail.")
|
|
500
|
-
parts.append(f"Set {GUARD_ENV}=0 only for a deliberate local override.")
|
|
501
|
-
return " ".join(parts)
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
def read_guard_fingerprint(path: Path, label: str, size: int) -> str:
|
|
505
|
-
try:
|
|
506
|
-
stat_result = path.stat()
|
|
507
|
-
mtime = getattr(stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000))
|
|
508
|
-
except OSError:
|
|
509
|
-
mtime = 0
|
|
510
|
-
basis = f"{label}\0{size}\0{mtime}"
|
|
511
|
-
return hashlib.sha256(basis.encode("utf-8", errors="replace")).hexdigest()[:16]
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
def load_read_guard_state(root: Path) -> dict[str, Any]:
|
|
515
|
-
state_dir = root / READ_GUARD_STATE_DIR
|
|
516
|
-
state_file = state_dir / READ_GUARD_STATE_FILE
|
|
517
|
-
try:
|
|
518
|
-
if state_dir.is_symlink() or state_file.is_symlink() or not state_file.is_file():
|
|
519
|
-
return {}
|
|
520
|
-
data = json.loads(state_file.read_text(encoding="utf-8"))
|
|
521
|
-
except (OSError, json.JSONDecodeError, UnicodeDecodeError):
|
|
522
|
-
return {}
|
|
523
|
-
return data if isinstance(data, dict) else {}
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
def save_read_guard_state(root: Path, state: dict[str, Any]) -> None:
|
|
527
|
-
state_dir = root / READ_GUARD_STATE_DIR
|
|
528
|
-
state_file = state_dir / READ_GUARD_STATE_FILE
|
|
529
|
-
try:
|
|
530
|
-
if state_dir.exists() and not state_dir.is_dir():
|
|
531
|
-
return
|
|
532
|
-
if state_dir.is_symlink() or state_file.is_symlink():
|
|
533
|
-
return
|
|
534
|
-
state_dir.mkdir(mode=0o700, exist_ok=True)
|
|
535
|
-
try:
|
|
536
|
-
os.chmod(state_dir, 0o700)
|
|
537
|
-
except OSError:
|
|
538
|
-
pass
|
|
539
|
-
tmp = state_file.with_name(f".read-guard-{os.getpid()}-{secrets.token_hex(16)}.tmp")
|
|
540
|
-
flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
|
|
541
|
-
if hasattr(os, "O_NOFOLLOW"):
|
|
542
|
-
flags |= os.O_NOFOLLOW
|
|
543
|
-
if hasattr(os, "O_CLOEXEC"):
|
|
544
|
-
flags |= os.O_CLOEXEC
|
|
545
|
-
fd = -1
|
|
546
|
-
try:
|
|
547
|
-
fd = os.open(str(tmp), flags, 0o600)
|
|
548
|
-
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
|
549
|
-
fd = -1
|
|
550
|
-
json.dump(state, handle, ensure_ascii=False)
|
|
551
|
-
os.replace(tmp, state_file)
|
|
552
|
-
except OSError:
|
|
553
|
-
if fd != -1:
|
|
554
|
-
try:
|
|
555
|
-
os.close(fd)
|
|
556
|
-
except OSError:
|
|
557
|
-
pass
|
|
558
|
-
try:
|
|
559
|
-
tmp.unlink()
|
|
560
|
-
except OSError:
|
|
561
|
-
pass
|
|
562
|
-
return
|
|
563
|
-
try:
|
|
564
|
-
os.chmod(state_file, 0o600)
|
|
565
|
-
except OSError:
|
|
566
|
-
pass
|
|
567
|
-
except OSError:
|
|
568
|
-
return
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
def record_read_guard_attempt(root: Path, fp: str) -> int:
|
|
572
|
-
state = load_read_guard_state(root)
|
|
573
|
-
attempts = state.get("attempts")
|
|
574
|
-
if not isinstance(attempts, dict):
|
|
575
|
-
attempts = {}
|
|
576
|
-
entry = attempts.get(fp)
|
|
577
|
-
if not isinstance(entry, dict):
|
|
578
|
-
entry = {"count": 0}
|
|
579
|
-
count = bounded_int(entry.get("count", 0), 0, 0, 1_000_000) + 1
|
|
580
|
-
attempts.pop(fp, None)
|
|
581
|
-
attempts[fp] = {"count": count}
|
|
582
|
-
if len(attempts) > READ_GUARD_STATE_MAX_ITEMS:
|
|
583
|
-
for key in list(attempts)[: len(attempts) - READ_GUARD_STATE_MAX_ITEMS]:
|
|
584
|
-
attempts.pop(key, None)
|
|
585
|
-
state["attempts"] = attempts
|
|
586
|
-
save_read_guard_state(root, state)
|
|
587
|
-
return count
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
def repeated_read_hint(count: int) -> str:
|
|
591
|
-
if count < 2:
|
|
592
|
-
return ""
|
|
593
|
-
return (
|
|
594
|
-
f" Repeated-read dedup: this same oversized file fingerprint has been blocked {count} times; "
|
|
595
|
-
"reuse the previous ladder and query a symbol or line range instead of retrying full-file Read."
|
|
596
|
-
)
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
def deny_response(reason: str) -> dict[str, Any]:
|
|
600
|
-
return {
|
|
601
|
-
"hookSpecificOutput": {
|
|
602
|
-
"hookEventName": "PreToolUse",
|
|
603
|
-
"permissionDecision": "deny",
|
|
604
|
-
"permissionDecisionReason": reason,
|
|
605
|
-
}
|
|
606
|
-
}
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
def main() -> int:
|
|
610
|
-
if truthy_disabled(env_value(GUARD_ENV, LEGACY_GUARD_ENV)):
|
|
611
|
-
print("{}")
|
|
612
|
-
return 0
|
|
613
|
-
try:
|
|
614
|
-
payload = json.load(sys.stdin)
|
|
615
|
-
except json.JSONDecodeError as exc:
|
|
616
|
-
print(f"context-guard-guard-read: invalid hook JSON: {exc}", file=sys.stderr)
|
|
617
|
-
reason = "[context-guard-kit] Read blocked because the hook payload was invalid JSON. Retry the tool call."
|
|
618
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
619
|
-
return 0
|
|
620
|
-
if not isinstance(payload, dict):
|
|
621
|
-
reason = "[context-guard-kit] Read blocked because the hook payload was not a JSON object. Retry the tool call."
|
|
622
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
623
|
-
return 0
|
|
624
|
-
current_tool = tool_name(payload)
|
|
625
|
-
if current_tool and current_tool != "Read":
|
|
626
|
-
print("{}")
|
|
627
|
-
return 0
|
|
628
|
-
|
|
629
|
-
raw_path = read_path_from_payload(payload)
|
|
630
|
-
if not raw_path:
|
|
631
|
-
print("{}")
|
|
632
|
-
return 0
|
|
633
|
-
root = Path.cwd().resolve()
|
|
634
|
-
path = Path(raw_path).expanduser()
|
|
635
|
-
if not path.is_absolute():
|
|
636
|
-
path = root / path
|
|
637
|
-
path = normalize_allowed_first_absolute_symlink(path)
|
|
638
|
-
if has_symlink_component(path):
|
|
639
|
-
label = safe_label(path, root)
|
|
640
|
-
reason = (
|
|
641
|
-
f"[context-guard-kit] Read blocked for {label}: requested path traverses a symlink. "
|
|
642
|
-
"Use a real project file path before reading or extracting symbols."
|
|
643
|
-
)
|
|
644
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
645
|
-
return 0
|
|
646
|
-
try:
|
|
647
|
-
size = regular_file_size_no_symlink(path)
|
|
648
|
-
except OSError as exc:
|
|
649
|
-
if exc.errno == errno.ELOOP:
|
|
650
|
-
label = safe_label(path, root)
|
|
651
|
-
reason = (
|
|
652
|
-
f"[context-guard-kit] Read blocked for {label}: requested path traverses a symlink. "
|
|
653
|
-
"Use a real project file path before reading or extracting symbols."
|
|
654
|
-
)
|
|
655
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
656
|
-
return 0
|
|
657
|
-
if exc.errno in {errno.EINVAL, errno.ENOTDIR, errno.ENOENT}:
|
|
658
|
-
print("{}")
|
|
659
|
-
return 0
|
|
660
|
-
label = safe_label(path, root)
|
|
661
|
-
detail = compact_hook_text(exc.strerror or exc.__class__.__name__, 80)
|
|
662
|
-
print(f"context-guard-guard-read: could not safely inspect requested file: {detail}", file=sys.stderr)
|
|
663
|
-
reason = (
|
|
664
|
-
f"[context-guard-kit] Read blocked for {label}: the guard could not safely inspect the file "
|
|
665
|
-
f"({detail}). Use a bounded line range or verify the path locally first."
|
|
666
|
-
)
|
|
667
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
668
|
-
return 0
|
|
669
|
-
|
|
670
|
-
limit = max_bytes()
|
|
671
|
-
if size <= limit:
|
|
672
|
-
print("{}")
|
|
673
|
-
return 0
|
|
674
|
-
if bounded_line_range_requested(payload):
|
|
675
|
-
print("{}")
|
|
676
|
-
return 0
|
|
677
|
-
|
|
678
|
-
label = safe_label(path, root)
|
|
679
|
-
read_symbol = find_read_symbol_command()
|
|
680
|
-
try:
|
|
681
|
-
attempt_count = record_read_guard_attempt(root, read_guard_fingerprint(path, label, size))
|
|
682
|
-
except Exception:
|
|
683
|
-
attempt_count = 1
|
|
684
|
-
reason = progressive_read_ladder(path, label, size, limit, read_symbol) + repeated_read_hint(attempt_count)
|
|
685
|
-
print(json.dumps(deny_response(reason), ensure_ascii=False))
|
|
686
|
-
return 0
|
|
687
|
-
|
|
688
|
-
|
|
689
|
-
if __name__ == "__main__":
|
|
690
|
-
raise SystemExit(main())
|