@ictechgy/context-guard 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.ko.md +41 -24
  3. package/README.md +66 -26
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +21 -13
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +99 -18
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -4348
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,690 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Claude Code PreToolUse hook: block large whole-file Read calls.
3
-
4
- The hook nudges Claude toward symbol-scoped reads before a huge file is inserted
5
- into the conversation. It is opt-in through project settings and can be disabled
6
- with CONTEXT_GUARD_READ_GUARD=0. Legacy CLAUDE_TOKEN_* environment variables
7
- remain supported for existing project settings.
8
- """
9
- from __future__ import annotations
10
-
11
- import errno
12
- import hashlib
13
- import importlib.util
14
- import json
15
- import os
16
- import re
17
- import secrets
18
- import shlex
19
- import stat
20
- import sys
21
- from pathlib import Path
22
- from typing import Any
23
-
24
- SCRIPT_DIR = Path(__file__).resolve().parent
25
-
26
-
27
- def _load_hook_secret_patterns():
28
- searched = []
29
- for helper_dir in (SCRIPT_DIR, SCRIPT_DIR.parent / "lib"):
30
- helper_path = helper_dir / "hook_secret_patterns.py"
31
- searched.append(str(helper_path))
32
- if not helper_path.is_file():
33
- continue
34
- spec = importlib.util.spec_from_file_location("_claude_token_hook_secret_patterns", helper_path)
35
- if spec is None or spec.loader is None:
36
- continue
37
- module = importlib.util.module_from_spec(spec)
38
- spec.loader.exec_module(module)
39
- return module
40
- raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
41
-
42
-
43
- _hook_secret_patterns = _load_hook_secret_patterns()
44
- CONTROL_CHAR_RE = _hook_secret_patterns.CONTROL_CHAR_RE
45
- hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
46
-
47
- DEFAULT_MAX_BYTES = 48_000
48
- DEFAULT_MAX_LINE_RANGE = 400
49
- MAX_BYTES_LIMIT = 1_000_000
50
- MAX_LINE_RANGE_LIMIT = 20_000
51
- OUTLINE_MAX_BYTES = 200_000
52
- OUTLINE_MAX_ITEMS = 12
53
- READ_GUARD_STATE_DIR = Path(".context-guard")
54
- READ_GUARD_STATE_FILE = "read-guard-cache.json"
55
- READ_GUARD_STATE_MAX_ITEMS = 20
56
- GUARD_ENV = "CONTEXT_GUARD_READ_GUARD"
57
- LEGACY_GUARD_ENV = "CLAUDE_TOKEN_READ_GUARD"
58
- MAX_BYTES_ENV = "CONTEXT_GUARD_READ_GUARD_MAX_BYTES"
59
- LEGACY_MAX_BYTES_ENV = "CLAUDE_TOKEN_READ_GUARD_MAX_BYTES"
60
- MAX_LINE_RANGE_ENV = "CONTEXT_GUARD_READ_GUARD_MAX_LINES"
61
- LEGACY_MAX_LINE_RANGE_ENV = "CLAUDE_TOKEN_READ_GUARD_MAX_LINES"
62
- PATH_LABEL_MAX_CHARS = 160
63
- ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
64
- "tmp": Path("/private/tmp"),
65
- "var": Path("/private/var"),
66
- }
67
-
68
-
69
- def truthy_disabled(value: str | None) -> bool:
70
- return str(value or "").strip().lower() in {"0", "false", "no", "off", "disabled"}
71
-
72
-
73
- def env_value(name: str, legacy_name: str | None = None) -> str | None:
74
- value = os.environ.get(name)
75
- if value is not None or legacy_name is None:
76
- return value
77
- return os.environ.get(legacy_name)
78
-
79
-
80
- def bounded_env_int(name: str, legacy_name: str | None, default: int, minimum: int, maximum: int) -> int:
81
- raw = env_value(name, legacy_name)
82
- if not raw:
83
- return default
84
- try:
85
- number = int(raw)
86
- except (TypeError, ValueError, OverflowError):
87
- return default
88
- return min(max(number, minimum), maximum)
89
-
90
-
91
- def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
92
- try:
93
- number = int(value)
94
- except (TypeError, ValueError, OverflowError):
95
- return default
96
- return min(max(number, minimum), maximum)
97
-
98
-
99
- def max_bytes() -> int:
100
- return bounded_env_int(MAX_BYTES_ENV, LEGACY_MAX_BYTES_ENV, DEFAULT_MAX_BYTES, 1, MAX_BYTES_LIMIT)
101
-
102
-
103
- def max_line_range() -> int:
104
- return bounded_env_int(
105
- MAX_LINE_RANGE_ENV,
106
- LEGACY_MAX_LINE_RANGE_ENV,
107
- DEFAULT_MAX_LINE_RANGE,
108
- 1,
109
- MAX_LINE_RANGE_LIMIT,
110
- )
111
-
112
-
113
- def tool_input(payload: dict[str, Any]) -> dict[str, Any]:
114
- value = payload.get("tool_input") or payload.get("toolInput") or {}
115
- return value if isinstance(value, dict) else {}
116
-
117
-
118
- def read_path_from_payload(payload: dict[str, Any]) -> str:
119
- data = tool_input(payload)
120
- for key in ("file_path", "path", "filePath"):
121
- value = data.get(key)
122
- if isinstance(value, str) and value.strip():
123
- return value
124
- return ""
125
-
126
-
127
- def tool_name(payload: dict[str, Any]) -> str:
128
- value = payload.get("tool_name") or payload.get("toolName") or ""
129
- return value if isinstance(value, str) else ""
130
-
131
-
132
- def compact_hook_text(value: str, limit: int = PATH_LABEL_MAX_CHARS) -> str:
133
- compact = " ".join(CONTROL_CHAR_RE.sub(" ", value.strip()).split())
134
- if len(compact) > limit:
135
- compact = compact[: limit - 15].rstrip() + "...[truncated]"
136
- return compact
137
-
138
-
139
- def anonymized_path_label(path: Path) -> str:
140
- try:
141
- raw = str(path.resolve())
142
- except OSError:
143
- raw = str(path)
144
- digest = hashlib.sha256(raw.encode("utf-8", "replace")).hexdigest()[:12]
145
- return f"redacted-path#path:{digest}"
146
-
147
-
148
- def bounded_line_range_requested(payload: dict[str, Any]) -> bool:
149
- data = tool_input(payload)
150
- raw_limit = data.get("limit")
151
- if raw_limit is None:
152
- return False
153
- try:
154
- limit = int(raw_limit)
155
- except (TypeError, ValueError):
156
- return False
157
- if limit <= 0 or limit > max_line_range():
158
- return False
159
- raw_offset = data.get("offset")
160
- if raw_offset is not None:
161
- try:
162
- if int(raw_offset) < 0:
163
- return False
164
- except (TypeError, ValueError):
165
- return False
166
- return True
167
-
168
-
169
- def safe_label(path: Path, root: Path) -> str:
170
- try:
171
- resolved = path.resolve()
172
- except OSError:
173
- resolved = path
174
- try:
175
- label = resolved.relative_to(root.resolve()).as_posix()
176
- except ValueError:
177
- try:
178
- raw = str(resolved)
179
- except OSError:
180
- raw = str(path)
181
- digest = hashlib.sha256(raw.encode("utf-8", "replace")).hexdigest()[:12]
182
- name = path.name or "path"
183
- if hook_label_has_sensitive_evidence(name):
184
- name = "redacted-path"
185
- else:
186
- name = compact_hook_text(name)
187
- return f"{name or 'path'}#path:{digest}"
188
- if hook_label_has_sensitive_evidence(label):
189
- return anonymized_path_label(resolved)
190
- return compact_hook_text(label) or "path"
191
-
192
-
193
- def has_symlink_component(path: Path) -> bool:
194
- """Return True when a requested project path traverses a symlink."""
195
- if path.is_symlink():
196
- return True
197
- current = Path(path.anchor) if path.is_absolute() else Path()
198
- for part in path.parts:
199
- if path.is_absolute() and part == path.anchor:
200
- continue
201
- current = current / part
202
- if current.is_symlink():
203
- return True
204
- return False
205
-
206
-
207
- def base_open_flags() -> int:
208
- flags = os.O_RDONLY
209
- for optional_flag in ("O_CLOEXEC", "O_NONBLOCK"):
210
- flags |= getattr(os, optional_flag, 0)
211
- return flags
212
-
213
-
214
- def no_follow_flag() -> int:
215
- return getattr(os, "O_NOFOLLOW", 0)
216
-
217
-
218
- def directory_flag() -> int:
219
- return getattr(os, "O_DIRECTORY", 0)
220
-
221
-
222
- def normalized_link_target(parent: Path, raw_target: str) -> Path:
223
- target = Path(raw_target)
224
- if not target.is_absolute():
225
- target = parent / target
226
- return Path(os.path.normpath(str(target)))
227
-
228
-
229
- def normalize_allowed_first_absolute_symlink(path: Path) -> Path:
230
- """Rewrite narrow platform-owned absolute aliases before no-follow traversal."""
231
- if not path.is_absolute() or len(path.parts) < 2:
232
- return path
233
- first = path.parts[1]
234
- expected = ALLOWED_FIRST_ABSOLUTE_SYMLINKS.get(first)
235
- if expected is None:
236
- return path
237
- link = Path(path.anchor) / first
238
- try:
239
- if not stat.S_ISLNK(os.lstat(link).st_mode):
240
- return path
241
- if normalized_link_target(Path(path.anchor), os.readlink(link)) != expected:
242
- return path
243
- except OSError:
244
- return path
245
- return expected.joinpath(*path.parts[2:])
246
-
247
-
248
- def open_directory_at(parent_fd: int, component: str, full_path: Path) -> int:
249
- component_stat = lstat_at_no_follow(parent_fd, component)
250
- if component_stat is not None:
251
- if stat.S_ISLNK(component_stat.st_mode):
252
- raise OSError(errno.ELOOP, "path component must not be a symlink", str(full_path))
253
- if not stat.S_ISDIR(component_stat.st_mode):
254
- raise OSError(errno.ENOTDIR, "path component is not a directory", str(full_path))
255
- try:
256
- fd = os.open(component, base_open_flags() | directory_flag() | no_follow_flag(), dir_fd=parent_fd)
257
- except OSError as exc:
258
- if component_stat is not None and exc.errno in {errno.ELOOP, errno.ENOTDIR, errno.ENOENT, errno.EINVAL}:
259
- raise OSError(errno.ELOOP, "path component changed while opening", str(full_path)) from exc
260
- raise
261
- try:
262
- opened = os.fstat(fd)
263
- if component_stat is not None:
264
- if not stat.S_ISDIR(opened.st_mode) or not os.path.samestat(component_stat, opened):
265
- raise OSError(errno.ELOOP, "path component changed while opening", str(full_path))
266
- elif not stat.S_ISDIR(opened.st_mode):
267
- raise OSError(errno.ENOTDIR, "path component is not a directory", str(full_path))
268
- return fd
269
- except Exception:
270
- os.close(fd)
271
- raise
272
-
273
-
274
- def lstat_no_symlink_components(path: Path) -> os.stat_result:
275
- """lstat each path component and reject any symlink traversal."""
276
- components = list(path.parts)
277
- if path.is_absolute() and components:
278
- components = components[1:]
279
- if not components:
280
- raise OSError(errno.EINVAL, "requested path is not a regular file", str(path))
281
-
282
- current = Path(path.anchor) if path.is_absolute() else Path()
283
- last_stat = None
284
- for index, component in enumerate(components):
285
- current = current / component
286
- current_stat = current.lstat()
287
- if stat.S_ISLNK(current_stat.st_mode):
288
- raise OSError(errno.ELOOP, "requested path must not traverse symlinks", str(path))
289
- if index < len(components) - 1 and not stat.S_ISDIR(current_stat.st_mode):
290
- raise OSError(errno.ENOTDIR, "path component is not a directory", str(path))
291
- last_stat = current_stat
292
- assert last_stat is not None
293
- return last_stat
294
-
295
-
296
- def lstat_at_no_follow(dir_fd: int, component: str) -> os.stat_result | None:
297
- if os.stat not in getattr(os, "supports_dir_fd", set()):
298
- return None
299
- if os.stat not in getattr(os, "supports_follow_symlinks", set()):
300
- return None
301
- return os.stat(component, dir_fd=dir_fd, follow_symlinks=False)
302
-
303
-
304
- def open_regular_no_symlink(path: Path) -> int:
305
- """Open a regular file after no-follow traversal of every path component."""
306
- path = normalize_allowed_first_absolute_symlink(path)
307
- if os.open not in getattr(os, "supports_dir_fd", set()):
308
- before = lstat_no_symlink_components(path)
309
- if not stat.S_ISREG(before.st_mode):
310
- raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
311
- flags = base_open_flags() | no_follow_flag()
312
- fd = os.open(path, flags)
313
- try:
314
- opened = os.fstat(fd)
315
- if not stat.S_ISREG(opened.st_mode) or not os.path.samestat(before, opened):
316
- raise OSError(errno.ELOOP, "requested path changed while opening", str(path))
317
- return fd
318
- except Exception:
319
- os.close(fd)
320
- raise
321
-
322
- components = list(path.parts)
323
- if path.is_absolute() and components:
324
- components = components[1:]
325
- if not components:
326
- raise OSError(errno.EINVAL, "requested path is not a regular file", str(path))
327
- root = path.anchor if path.is_absolute() else "."
328
- dir_fd = os.open(root or ".", base_open_flags() | directory_flag())
329
- try:
330
- for component in components[:-1]:
331
- next_fd = open_directory_at(dir_fd, component, path)
332
- os.close(dir_fd)
333
- dir_fd = next_fd
334
- before = lstat_at_no_follow(dir_fd, components[-1])
335
- if before is not None:
336
- if stat.S_ISLNK(before.st_mode):
337
- raise OSError(errno.ELOOP, "requested path must not be a symlink", str(path))
338
- if not stat.S_ISREG(before.st_mode):
339
- raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
340
- fd = os.open(components[-1], base_open_flags() | no_follow_flag(), dir_fd=dir_fd)
341
- try:
342
- st = os.fstat(fd)
343
- if before is not None:
344
- if not stat.S_ISREG(st.st_mode) or not os.path.samestat(before, st):
345
- raise OSError(errno.ELOOP, "requested path changed while opening", str(path))
346
- elif not stat.S_ISREG(st.st_mode):
347
- raise OSError(errno.EINVAL, "requested path must be a regular file", str(path))
348
- return fd
349
- except Exception:
350
- os.close(fd)
351
- raise
352
- finally:
353
- os.close(dir_fd)
354
-
355
-
356
- def regular_file_size_no_symlink(path: Path) -> int:
357
- """Return size for a regular file opened without following symlinks."""
358
- fd = open_regular_no_symlink(path)
359
- try:
360
- return os.fstat(fd).st_size
361
- finally:
362
- os.close(fd)
363
-
364
-
365
- def find_read_symbol_command() -> str:
366
- script_dir = Path(__file__).resolve().parent
367
- if (script_dir / "context-guard-read-symbol").exists():
368
- return "context-guard-read-symbol"
369
- if (script_dir / "read_symbol.py").exists():
370
- return "python3 context-guard-kit/read_symbol.py"
371
- return "context-guard-read-symbol"
372
-
373
-
374
- def suggested_commands(label: str, read_symbol: str) -> tuple[str, str]:
375
- rg_cmd = shlex.join(["rg", "-n", "<symbol-or-error>", "--", label])
376
- read_parts = shlex.split(read_symbol) + [label, "<SymbolName>"]
377
- return rg_cmd, shlex.join(read_parts)
378
-
379
-
380
- def read_prefix_for_outline(path: Path, max_bytes: int = OUTLINE_MAX_BYTES) -> tuple[str, bool]:
381
- try:
382
- fd = open_regular_no_symlink(path)
383
- with os.fdopen(fd, "rb") as handle:
384
- fd = -1
385
- data = handle.read(max_bytes + 1)
386
- except OSError:
387
- return "", False
388
- finally:
389
- if "fd" in locals() and fd != -1:
390
- os.close(fd)
391
- truncated = len(data) > max_bytes
392
- if truncated:
393
- data = data[:max_bytes]
394
- return data.decode("utf-8", errors="replace"), truncated
395
-
396
-
397
- def outline_kind_for_suffix(path: Path) -> str:
398
- suffix = path.suffix.lower()
399
- if suffix == ".py":
400
- return "python"
401
- if suffix in {".js", ".jsx", ".ts", ".tsx", ".mjs", ".cjs"}:
402
- return "javascript"
403
- if suffix == ".go":
404
- return "go"
405
- if suffix == ".rs":
406
- return "rust"
407
- if suffix in {".md", ".mdx", ".markdown"}:
408
- return "markdown"
409
- return "text"
410
-
411
-
412
- OUTLINE_PATTERNS: dict[str, tuple[tuple[str, str], ...]] = {
413
- "python": (
414
- ("class", r"^class\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
415
- ("function", r"^(?:async\s+def|def)\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
416
- ),
417
- "javascript": (
418
- ("class", r"^(?:export\s+)?class\s+([A-Za-z_$][A-Za-z0-9_$]*)\b"),
419
- (
420
- "function",
421
- r"^(?:export\s+)?(?:async\s+)?function\s+([A-Za-z_$][A-Za-z0-9_$]*)\b",
422
- ),
423
- (
424
- "const",
425
- r"^(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][A-Za-z0-9_$]*)\s*=",
426
- ),
427
- ),
428
- "go": (
429
- ("function", r"^func\s+(?:\([^)]*\)\s*)?([A-Za-z_][A-Za-z0-9_]*)\b"),
430
- ("type", r"^type\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
431
- ),
432
- "rust": (
433
- ("function", r"^(?:pub\s+)?(?:async\s+)?fn\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
434
- ("type", r"^(?:pub\s+)?(?:struct|enum|trait)\s+([A-Za-z_][A-Za-z0-9_]*)\b"),
435
- ),
436
- "markdown": (
437
- ("heading", r"^(#{1,3})\s+(.+?)\s*$"),
438
- ),
439
- }
440
-
441
-
442
- def outline_items(path: Path, text: str, *, limit: int = OUTLINE_MAX_ITEMS) -> list[str]:
443
- kind = outline_kind_for_suffix(path)
444
- patterns = [(label, pattern) for label, pattern in OUTLINE_PATTERNS.get(kind, ())]
445
- if not patterns:
446
- return []
447
- compiled = [(label, re.compile(pattern)) for label, pattern in patterns]
448
- items: list[str] = []
449
- for line_number, line in enumerate(text.splitlines(), start=1):
450
- stripped = line.strip()
451
- if not stripped:
452
- continue
453
- if kind != "markdown" and line[:1].isspace():
454
- continue
455
- for label, pattern in compiled:
456
- match = pattern.match(stripped)
457
- if not match:
458
- continue
459
- name = "<heading>" if kind == "markdown" else match.group(1)
460
- items.append(f"line {line_number}: {label} {compact_hook_text(name, 80)}")
461
- break
462
- if len(items) >= limit:
463
- break
464
- return items
465
-
466
-
467
- def line_estimate(prefix: str, size: int, truncated: bool) -> str:
468
- lines = prefix.count("\n") + (1 if prefix and not prefix.endswith("\n") else 0)
469
- if not truncated or not prefix:
470
- return str(lines)
471
- avg = max(1.0, len(prefix.encode("utf-8", errors="replace")) / max(1, lines))
472
- estimated = int(size / avg)
473
- return f"~{estimated} (estimated from first {lines})"
474
-
475
-
476
- def progressive_read_ladder(path: Path, label: str, size: int, limit: int, read_symbol: str) -> str:
477
- prefix, prefix_truncated = read_prefix_for_outline(path)
478
- items = outline_items(path, prefix)
479
- rg_cmd, symbol_cmd = suggested_commands(label, read_symbol)
480
- range_limit = min(max_line_range(), 120)
481
- parts = [
482
- f"[context-guard-kit] Large Read blocked for {label} ({size} bytes > {limit} byte guard).",
483
- "Progressive read ladder:",
484
- f"1) Search names/errors: `{rg_cmd}`",
485
- ]
486
- if items:
487
- first_name = items[0].split(" ", 3)[-1].split(" ", 1)[-1]
488
- read_parts = shlex.split(read_symbol) + [label, first_name]
489
- parts.append(f"2) Read a symbol slice: `{shlex.join(read_parts)}` (or `{symbol_cmd}`)")
490
- else:
491
- parts.append(f"2) Read a symbol slice when you know the name: `{symbol_cmd}`")
492
- parts.append("Plugin installs can use `context-guard-read-symbol` directly.")
493
- parts.append(f"3) If no symbol fits, use Read with offset=0 limit={range_limit} and then narrow further.")
494
- parts.append(f"File outline: estimated_lines={line_estimate(prefix, size, prefix_truncated)}")
495
- if items:
496
- parts.append("Top-level outline: " + "; ".join(items))
497
- else:
498
- parts.append("Top-level outline: unavailable from the bounded prefix; search first.")
499
- parts.append("Use full-file Read only after these smaller queries fail.")
500
- parts.append(f"Set {GUARD_ENV}=0 only for a deliberate local override.")
501
- return " ".join(parts)
502
-
503
-
504
- def read_guard_fingerprint(path: Path, label: str, size: int) -> str:
505
- try:
506
- stat_result = path.stat()
507
- mtime = getattr(stat_result, "st_mtime_ns", int(stat_result.st_mtime * 1_000_000_000))
508
- except OSError:
509
- mtime = 0
510
- basis = f"{label}\0{size}\0{mtime}"
511
- return hashlib.sha256(basis.encode("utf-8", errors="replace")).hexdigest()[:16]
512
-
513
-
514
- def load_read_guard_state(root: Path) -> dict[str, Any]:
515
- state_dir = root / READ_GUARD_STATE_DIR
516
- state_file = state_dir / READ_GUARD_STATE_FILE
517
- try:
518
- if state_dir.is_symlink() or state_file.is_symlink() or not state_file.is_file():
519
- return {}
520
- data = json.loads(state_file.read_text(encoding="utf-8"))
521
- except (OSError, json.JSONDecodeError, UnicodeDecodeError):
522
- return {}
523
- return data if isinstance(data, dict) else {}
524
-
525
-
526
- def save_read_guard_state(root: Path, state: dict[str, Any]) -> None:
527
- state_dir = root / READ_GUARD_STATE_DIR
528
- state_file = state_dir / READ_GUARD_STATE_FILE
529
- try:
530
- if state_dir.exists() and not state_dir.is_dir():
531
- return
532
- if state_dir.is_symlink() or state_file.is_symlink():
533
- return
534
- state_dir.mkdir(mode=0o700, exist_ok=True)
535
- try:
536
- os.chmod(state_dir, 0o700)
537
- except OSError:
538
- pass
539
- tmp = state_file.with_name(f".read-guard-{os.getpid()}-{secrets.token_hex(16)}.tmp")
540
- flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL
541
- if hasattr(os, "O_NOFOLLOW"):
542
- flags |= os.O_NOFOLLOW
543
- if hasattr(os, "O_CLOEXEC"):
544
- flags |= os.O_CLOEXEC
545
- fd = -1
546
- try:
547
- fd = os.open(str(tmp), flags, 0o600)
548
- with os.fdopen(fd, "w", encoding="utf-8") as handle:
549
- fd = -1
550
- json.dump(state, handle, ensure_ascii=False)
551
- os.replace(tmp, state_file)
552
- except OSError:
553
- if fd != -1:
554
- try:
555
- os.close(fd)
556
- except OSError:
557
- pass
558
- try:
559
- tmp.unlink()
560
- except OSError:
561
- pass
562
- return
563
- try:
564
- os.chmod(state_file, 0o600)
565
- except OSError:
566
- pass
567
- except OSError:
568
- return
569
-
570
-
571
- def record_read_guard_attempt(root: Path, fp: str) -> int:
572
- state = load_read_guard_state(root)
573
- attempts = state.get("attempts")
574
- if not isinstance(attempts, dict):
575
- attempts = {}
576
- entry = attempts.get(fp)
577
- if not isinstance(entry, dict):
578
- entry = {"count": 0}
579
- count = bounded_int(entry.get("count", 0), 0, 0, 1_000_000) + 1
580
- attempts.pop(fp, None)
581
- attempts[fp] = {"count": count}
582
- if len(attempts) > READ_GUARD_STATE_MAX_ITEMS:
583
- for key in list(attempts)[: len(attempts) - READ_GUARD_STATE_MAX_ITEMS]:
584
- attempts.pop(key, None)
585
- state["attempts"] = attempts
586
- save_read_guard_state(root, state)
587
- return count
588
-
589
-
590
- def repeated_read_hint(count: int) -> str:
591
- if count < 2:
592
- return ""
593
- return (
594
- f" Repeated-read dedup: this same oversized file fingerprint has been blocked {count} times; "
595
- "reuse the previous ladder and query a symbol or line range instead of retrying full-file Read."
596
- )
597
-
598
-
599
- def deny_response(reason: str) -> dict[str, Any]:
600
- return {
601
- "hookSpecificOutput": {
602
- "hookEventName": "PreToolUse",
603
- "permissionDecision": "deny",
604
- "permissionDecisionReason": reason,
605
- }
606
- }
607
-
608
-
609
- def main() -> int:
610
- if truthy_disabled(env_value(GUARD_ENV, LEGACY_GUARD_ENV)):
611
- print("{}")
612
- return 0
613
- try:
614
- payload = json.load(sys.stdin)
615
- except json.JSONDecodeError as exc:
616
- print(f"context-guard-guard-read: invalid hook JSON: {exc}", file=sys.stderr)
617
- reason = "[context-guard-kit] Read blocked because the hook payload was invalid JSON. Retry the tool call."
618
- print(json.dumps(deny_response(reason), ensure_ascii=False))
619
- return 0
620
- if not isinstance(payload, dict):
621
- reason = "[context-guard-kit] Read blocked because the hook payload was not a JSON object. Retry the tool call."
622
- print(json.dumps(deny_response(reason), ensure_ascii=False))
623
- return 0
624
- current_tool = tool_name(payload)
625
- if current_tool and current_tool != "Read":
626
- print("{}")
627
- return 0
628
-
629
- raw_path = read_path_from_payload(payload)
630
- if not raw_path:
631
- print("{}")
632
- return 0
633
- root = Path.cwd().resolve()
634
- path = Path(raw_path).expanduser()
635
- if not path.is_absolute():
636
- path = root / path
637
- path = normalize_allowed_first_absolute_symlink(path)
638
- if has_symlink_component(path):
639
- label = safe_label(path, root)
640
- reason = (
641
- f"[context-guard-kit] Read blocked for {label}: requested path traverses a symlink. "
642
- "Use a real project file path before reading or extracting symbols."
643
- )
644
- print(json.dumps(deny_response(reason), ensure_ascii=False))
645
- return 0
646
- try:
647
- size = regular_file_size_no_symlink(path)
648
- except OSError as exc:
649
- if exc.errno == errno.ELOOP:
650
- label = safe_label(path, root)
651
- reason = (
652
- f"[context-guard-kit] Read blocked for {label}: requested path traverses a symlink. "
653
- "Use a real project file path before reading or extracting symbols."
654
- )
655
- print(json.dumps(deny_response(reason), ensure_ascii=False))
656
- return 0
657
- if exc.errno in {errno.EINVAL, errno.ENOTDIR, errno.ENOENT}:
658
- print("{}")
659
- return 0
660
- label = safe_label(path, root)
661
- detail = compact_hook_text(exc.strerror or exc.__class__.__name__, 80)
662
- print(f"context-guard-guard-read: could not safely inspect requested file: {detail}", file=sys.stderr)
663
- reason = (
664
- f"[context-guard-kit] Read blocked for {label}: the guard could not safely inspect the file "
665
- f"({detail}). Use a bounded line range or verify the path locally first."
666
- )
667
- print(json.dumps(deny_response(reason), ensure_ascii=False))
668
- return 0
669
-
670
- limit = max_bytes()
671
- if size <= limit:
672
- print("{}")
673
- return 0
674
- if bounded_line_range_requested(payload):
675
- print("{}")
676
- return 0
677
-
678
- label = safe_label(path, root)
679
- read_symbol = find_read_symbol_command()
680
- try:
681
- attempt_count = record_read_guard_attempt(root, read_guard_fingerprint(path, label, size))
682
- except Exception:
683
- attempt_count = 1
684
- reason = progressive_read_ladder(path, label, size, limit, read_symbol) + repeated_read_hint(attempt_count)
685
- print(json.dumps(deny_response(reason), ensure_ascii=False))
686
- return 0
687
-
688
-
689
- if __name__ == "__main__":
690
- raise SystemExit(main())