@ictechgy/context-guard 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +49 -0
- package/LICENSE +201 -0
- package/NOTICE +4 -0
- package/README.ko.md +353 -0
- package/README.md +353 -0
- package/context-guard-kit/README.md +76 -0
- package/context-guard-kit/benchmark_runner.py +1898 -0
- package/context-guard-kit/claude_transcript_cost_audit.py +1591 -0
- package/context-guard-kit/context_compress.py +543 -0
- package/context-guard-kit/context_escrow.py +919 -0
- package/context-guard-kit/context_guard_cli.py +149 -0
- package/context-guard-kit/context_guard_diet.py +1036 -0
- package/context-guard-kit/context_pack.py +929 -0
- package/context-guard-kit/failed_attempt_nudge.py +567 -0
- package/context-guard-kit/guard_large_read.py +690 -0
- package/context-guard-kit/hook_secret_patterns.py +43 -0
- package/context-guard-kit/read_symbol.py +483 -0
- package/context-guard-kit/rewrite_bash_for_token_budget.py +501 -0
- package/context-guard-kit/sanitize_output.py +725 -0
- package/context-guard-kit/settings.example.json +67 -0
- package/context-guard-kit/setup_wizard.py +1724 -0
- package/context-guard-kit/statusline.sh +362 -0
- package/context-guard-kit/statusline_merged.sh +157 -0
- package/context-guard-kit/tool_schema_pruner.py +837 -0
- package/context-guard-kit/trim_command_output.py +1098 -0
- package/docs/distribution.md +55 -0
- package/package.json +70 -0
- package/packaging/homebrew/context-guard.rb.template +34 -0
- package/plugins/context-guard/.claude-plugin/plugin.json +41 -0
- package/plugins/context-guard/LICENSE +201 -0
- package/plugins/context-guard/NOTICE +4 -0
- package/plugins/context-guard/README.ko.md +135 -0
- package/plugins/context-guard/README.md +135 -0
- package/plugins/context-guard/bin/claude-read-symbol +6 -0
- package/plugins/context-guard/bin/claude-sanitize-output +6 -0
- package/plugins/context-guard/bin/claude-token-artifact +6 -0
- package/plugins/context-guard/bin/claude-token-audit +6 -0
- package/plugins/context-guard/bin/claude-token-bench +6 -0
- package/plugins/context-guard/bin/claude-token-diet +6 -0
- package/plugins/context-guard/bin/claude-token-failed-nudge +6 -0
- package/plugins/context-guard/bin/claude-token-guard-read +6 -0
- package/plugins/context-guard/bin/claude-token-rewrite-bash +6 -0
- package/plugins/context-guard/bin/claude-token-setup +6 -0
- package/plugins/context-guard/bin/claude-token-statusline +6 -0
- package/plugins/context-guard/bin/claude-token-statusline-merged +6 -0
- package/plugins/context-guard/bin/claude-trim-output +6 -0
- package/plugins/context-guard/bin/context-guard +149 -0
- package/plugins/context-guard/bin/context-guard-artifact +919 -0
- package/plugins/context-guard/bin/context-guard-audit +1591 -0
- package/plugins/context-guard/bin/context-guard-bench +1898 -0
- package/plugins/context-guard/bin/context-guard-compress +543 -0
- package/plugins/context-guard/bin/context-guard-diet +1036 -0
- package/plugins/context-guard/bin/context-guard-failed-nudge +567 -0
- package/plugins/context-guard/bin/context-guard-guard-read +690 -0
- package/plugins/context-guard/bin/context-guard-pack +929 -0
- package/plugins/context-guard/bin/context-guard-read-symbol +483 -0
- package/plugins/context-guard/bin/context-guard-rewrite-bash +501 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +725 -0
- package/plugins/context-guard/bin/context-guard-setup +1724 -0
- package/plugins/context-guard/bin/context-guard-statusline +362 -0
- package/plugins/context-guard/bin/context-guard-statusline-merged +157 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +837 -0
- package/plugins/context-guard/bin/context-guard-trim-output +1098 -0
- package/plugins/context-guard/brief/README.md +65 -0
- package/plugins/context-guard/brief/brief-mode.lite.md +29 -0
- package/plugins/context-guard/brief/brief-mode.standard.md +31 -0
- package/plugins/context-guard/brief/brief-mode.ultra.md +32 -0
- package/plugins/context-guard/lib/hook_secret_patterns.py +43 -0
- package/plugins/context-guard/skills/audit/SKILL.md +39 -0
- package/plugins/context-guard/skills/optimize/SKILL.md +48 -0
- package/plugins/context-guard/skills/setup/SKILL.md +40 -0
|
@@ -0,0 +1,1036 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Scan a project for Claude Code token-diet configuration gaps.
|
|
3
|
+
|
|
4
|
+
The scanner is intentionally local, read-only, and heuristic. It looks for
|
|
5
|
+
large always-in-context instruction files, missing read deny rules for bulky or
|
|
6
|
+
sensitive paths, and missing helper hooks/statusline settings that reduce token
|
|
7
|
+
burn during noisy command runs.
|
|
8
|
+
"""
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import errno
|
|
13
|
+
import hashlib
|
|
14
|
+
import json
|
|
15
|
+
import os
|
|
16
|
+
import re
|
|
17
|
+
import stat
|
|
18
|
+
import sys
|
|
19
|
+
from dataclasses import dataclass, field
|
|
20
|
+
from pathlib import Path
|
|
21
|
+
from typing import Any, Iterable
|
|
22
|
+
|
|
23
|
+
CONTEXT_FILE_NAMES = {"CLAUDE.md", "AGENTS.md", "GEMINI.md"}
|
|
24
|
+
CONTEXT_EXACT_REL_FILES = {
|
|
25
|
+
".clinerules",
|
|
26
|
+
".cursorrules",
|
|
27
|
+
".github/copilot-instructions.md",
|
|
28
|
+
".windsurfrules",
|
|
29
|
+
}
|
|
30
|
+
CONTEXT_MD_DIRS = {
|
|
31
|
+
".claude/agents",
|
|
32
|
+
".claude/commands",
|
|
33
|
+
".claude/skills",
|
|
34
|
+
".clinerules",
|
|
35
|
+
".cursor/rules",
|
|
36
|
+
".windsurf/rules",
|
|
37
|
+
}
|
|
38
|
+
CONTEXT_SURFACE_LABELS = {
|
|
39
|
+
"claude": "Claude Code instructions",
|
|
40
|
+
"codex": "OpenAI Codex AGENTS.md",
|
|
41
|
+
"gemini": "Gemini CLI instructions",
|
|
42
|
+
"cursor": "Cursor rules",
|
|
43
|
+
"windsurf": "Windsurf rules",
|
|
44
|
+
"cline": "Cline rules",
|
|
45
|
+
"copilot": "GitHub Copilot instructions",
|
|
46
|
+
}
|
|
47
|
+
EXCLUDED_DIR_NAMES = {
|
|
48
|
+
".cache",
|
|
49
|
+
".git",
|
|
50
|
+
".hg",
|
|
51
|
+
".mypy_cache",
|
|
52
|
+
".next",
|
|
53
|
+
".omx",
|
|
54
|
+
".pytest_cache",
|
|
55
|
+
".ruff_cache",
|
|
56
|
+
".serena",
|
|
57
|
+
".tox",
|
|
58
|
+
".venv",
|
|
59
|
+
".vscode",
|
|
60
|
+
"__pycache__",
|
|
61
|
+
"build",
|
|
62
|
+
"coverage",
|
|
63
|
+
"dist",
|
|
64
|
+
"node_modules",
|
|
65
|
+
"target",
|
|
66
|
+
"vendor",
|
|
67
|
+
}
|
|
68
|
+
MAX_CONTEXT_READ_BYTES = 512_000
|
|
69
|
+
MAX_SECRET_SCAN_BYTES = 5_000_000
|
|
70
|
+
MAX_SETTINGS_READ_BYTES = 256_000
|
|
71
|
+
DEFAULT_LARGE_CONTEXT_BYTES = 16_000
|
|
72
|
+
DEFAULT_HUGE_CONTEXT_BYTES = 64_000
|
|
73
|
+
DEFAULT_LONG_CONTEXT_LINES = 300
|
|
74
|
+
|
|
75
|
+
HEAVY_PROJECT_DENIES: tuple[tuple[str, str, str], ...] = (
|
|
76
|
+
("node_modules", "node_modules", "Read(./node_modules/**)"),
|
|
77
|
+
("dist", "dist", "Read(./dist/**)"),
|
|
78
|
+
("build", "build", "Read(./build/**)"),
|
|
79
|
+
("coverage", "coverage", "Read(./coverage/**)"),
|
|
80
|
+
("logs", "logs", "Read(./logs/**)"),
|
|
81
|
+
("tmp", "tmp", "Read(./tmp/**)"),
|
|
82
|
+
("target", "target", "Read(./target/**)"),
|
|
83
|
+
(".next", ".next", "Read(./.next/**)"),
|
|
84
|
+
(".venv", ".venv", "Read(./.venv/**)"),
|
|
85
|
+
("vendor", "vendor", "Read(./vendor/**)"),
|
|
86
|
+
(".context-guard", ".context-guard", "Read(./.context-guard/**)"),
|
|
87
|
+
(".claude-token-optimizer", ".claude-token-optimizer", "Read(./.claude-token-optimizer/**)"),
|
|
88
|
+
)
|
|
89
|
+
SENSITIVE_PROJECT_DENIES: tuple[tuple[str, str, str], ...] = (
|
|
90
|
+
(".env", ".env", "Read(./.env)"),
|
|
91
|
+
(".env.*", ".env.*", "Read(./.env.*)"),
|
|
92
|
+
(".npmrc", ".npmrc", "Read(./.npmrc)"),
|
|
93
|
+
(".pypirc", ".pypirc", "Read(./.pypirc)"),
|
|
94
|
+
(".netrc", ".netrc", "Read(./.netrc)"),
|
|
95
|
+
)
|
|
96
|
+
SENSITIVE_HOME_DENIES: tuple[tuple[str, str], ...] = (
|
|
97
|
+
("~/.ssh", "Read(~/.ssh/**)"),
|
|
98
|
+
("~/.aws", "Read(~/.aws/**)"),
|
|
99
|
+
("~/.gnupg", "Read(~/.gnupg/**)"),
|
|
100
|
+
("~/.kube", "Read(~/.kube/**)"),
|
|
101
|
+
("~/.docker", "Read(~/.docker/**)"),
|
|
102
|
+
)
|
|
103
|
+
SECRET_CONTENT_RE = re.compile(
|
|
104
|
+
r"(?is)("
|
|
105
|
+
r"-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----|"
|
|
106
|
+
r"AKIA[0-9A-Z]{16}|"
|
|
107
|
+
r"gh[pousr]_[A-Za-z0-9_]{20,}|"
|
|
108
|
+
r"xox[abprs]-[A-Za-z0-9-]{10,}|"
|
|
109
|
+
r"AIza[0-9A-Za-z_\-]{20,}|"
|
|
110
|
+
r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
|
|
111
|
+
r"(?<![A-Za-z0-9])(?:api[_-]?key|token|secret|password|client[_-]?secret)\s*[:=]\s*[^\s]+"
|
|
112
|
+
r")"
|
|
113
|
+
)
|
|
114
|
+
REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
|
|
115
|
+
BASH_TRIM_COMMAND_MARKERS = (
|
|
116
|
+
"context-guard-rewrite-bash",
|
|
117
|
+
"claude-token-rewrite-bash",
|
|
118
|
+
"rewrite_bash_for_token_budget.py",
|
|
119
|
+
)
|
|
120
|
+
LARGE_READ_GUARD_COMMAND_MARKERS = (
|
|
121
|
+
"context-guard-guard-read",
|
|
122
|
+
"claude-token-guard-read",
|
|
123
|
+
"guard_large_read.py",
|
|
124
|
+
)
|
|
125
|
+
STATUSLINE_COMMAND_MARKERS = (
|
|
126
|
+
"context-guard-statusline",
|
|
127
|
+
"claude-token-statusline",
|
|
128
|
+
"statusline.sh",
|
|
129
|
+
"statusline_merged.sh",
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
@dataclass
|
|
134
|
+
class Finding:
|
|
135
|
+
id: str
|
|
136
|
+
severity: str
|
|
137
|
+
path: str
|
|
138
|
+
message: str
|
|
139
|
+
action: str
|
|
140
|
+
evidence: dict[str, Any] = field(default_factory=dict)
|
|
141
|
+
rule_id: str | None = None
|
|
142
|
+
instance_id: str | None = None
|
|
143
|
+
|
|
144
|
+
def as_dict(self) -> dict[str, Any]:
|
|
145
|
+
return {
|
|
146
|
+
"id": self.id,
|
|
147
|
+
"rule_id": self.rule_id or self.id,
|
|
148
|
+
"instance_id": self.instance_id or self.id,
|
|
149
|
+
"severity": self.severity,
|
|
150
|
+
"path": self.path,
|
|
151
|
+
"message": self.message,
|
|
152
|
+
"action": self.action,
|
|
153
|
+
"evidence": self.evidence,
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def path_hash(path: Path) -> str:
|
|
158
|
+
return hashlib.sha256(str(path).encode("utf-8", "replace")).hexdigest()[:12]
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def text_hash(text: str) -> str:
|
|
162
|
+
return hashlib.sha256(text.encode("utf-8", "replace")).hexdigest()[:12]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def safe_id_part(text: str) -> str:
|
|
166
|
+
normalized = text.lower().replace("*", " star ")
|
|
167
|
+
return re.sub(r"[^a-z0-9]+", "-", normalized).strip("-")
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def safe_resolve(path: Path) -> Path:
|
|
171
|
+
try:
|
|
172
|
+
return path.resolve()
|
|
173
|
+
except (OSError, RuntimeError):
|
|
174
|
+
return path.absolute()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def path_component_contains_secret(component: str) -> bool:
|
|
178
|
+
return bool(component and component not in {".", ".."} and SECRET_CONTENT_RE.search(component))
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
def sanitize_path_component(component: str) -> str:
|
|
182
|
+
if not component or component in {".", ".."}:
|
|
183
|
+
return component
|
|
184
|
+
if not path_component_contains_secret(component):
|
|
185
|
+
return component
|
|
186
|
+
return REDACTED_PATH_COMPONENT
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def sanitize_rel_path(path: str) -> str:
|
|
190
|
+
return "/".join(sanitize_path_component(component) for component in path.split("/"))
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def sanitize_path_text(path: str) -> str:
|
|
194
|
+
return "/".join(sanitize_path_component(component) for component in path.replace(os.sep, "/").split("/"))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def display_path_hash(path: Path) -> str:
|
|
198
|
+
return text_hash(sanitize_path_text(str(safe_resolve(path))))
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def path_label(path: Path, show_paths: bool) -> str:
|
|
202
|
+
if show_paths:
|
|
203
|
+
return str(path)
|
|
204
|
+
name = sanitize_path_component(path.name or "path")
|
|
205
|
+
return f"{name}#path:{display_path_hash(path)}"
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
def context_finding(
|
|
209
|
+
rule_id: str,
|
|
210
|
+
severity: str,
|
|
211
|
+
path: str,
|
|
212
|
+
message: str,
|
|
213
|
+
action: str,
|
|
214
|
+
evidence: dict[str, Any] | None = None,
|
|
215
|
+
) -> Finding:
|
|
216
|
+
instance_id = f"{rule_id}-{text_hash(path)}"
|
|
217
|
+
return Finding(instance_id, severity, path, message, action, evidence or {}, rule_id=rule_id, instance_id=instance_id)
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def root_label(root: Path, show_paths: bool) -> str:
|
|
221
|
+
if show_paths:
|
|
222
|
+
return str(root)
|
|
223
|
+
name = sanitize_path_component(root.name or "project")
|
|
224
|
+
return f"{name}#path:{display_path_hash(root)}"
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def rel_path(path: Path, root: Path) -> str:
|
|
228
|
+
try:
|
|
229
|
+
return sanitize_rel_path(path.resolve().relative_to(root.resolve()).as_posix())
|
|
230
|
+
except (OSError, RuntimeError, ValueError):
|
|
231
|
+
name = sanitize_path_component(path.name or "path")
|
|
232
|
+
return f"{name}#path:{display_path_hash(path)}"
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def raw_rel_path(path: Path, root: Path) -> str | None:
|
|
236
|
+
try:
|
|
237
|
+
return path.resolve().relative_to(root.resolve()).as_posix()
|
|
238
|
+
except (OSError, RuntimeError, ValueError):
|
|
239
|
+
return None
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def context_surface_for_rel(raw_rel: str, name: str) -> dict[str, str] | None:
|
|
243
|
+
if name == "CLAUDE.md" or raw_rel.startswith(".claude/"):
|
|
244
|
+
key = "claude"
|
|
245
|
+
elif name == "AGENTS.md":
|
|
246
|
+
key = "codex"
|
|
247
|
+
elif name == "GEMINI.md":
|
|
248
|
+
key = "gemini"
|
|
249
|
+
elif raw_rel == ".cursorrules" or raw_rel.startswith(".cursor/rules/"):
|
|
250
|
+
key = "cursor"
|
|
251
|
+
elif raw_rel == ".windsurfrules" or raw_rel.startswith(".windsurf/rules/"):
|
|
252
|
+
key = "windsurf"
|
|
253
|
+
elif raw_rel == ".clinerules" or raw_rel.startswith(".clinerules/"):
|
|
254
|
+
key = "cline"
|
|
255
|
+
elif raw_rel == ".github/copilot-instructions.md":
|
|
256
|
+
key = "copilot"
|
|
257
|
+
else:
|
|
258
|
+
return None
|
|
259
|
+
return {
|
|
260
|
+
"surface": key,
|
|
261
|
+
"surface_label": CONTEXT_SURFACE_LABELS.get(key, key),
|
|
262
|
+
"surface_kind": "agent_rule",
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
class SettingsFileTooLargeError(ValueError):
|
|
267
|
+
pass
|
|
268
|
+
|
|
269
|
+
|
|
270
|
+
def load_json(path: Path, root: Path) -> tuple[dict[str, Any] | None, str | None]:
|
|
271
|
+
try:
|
|
272
|
+
data = json.loads(read_settings_json_bytes_no_follow(path, root).decode("utf-8"))
|
|
273
|
+
except FileNotFoundError:
|
|
274
|
+
return None, "missing"
|
|
275
|
+
except json.JSONDecodeError as exc:
|
|
276
|
+
return None, f"invalid JSON at line {exc.lineno}: {exc.msg}"
|
|
277
|
+
except SettingsFileTooLargeError as exc:
|
|
278
|
+
return None, str(exc)
|
|
279
|
+
except UnicodeDecodeError as exc:
|
|
280
|
+
return None, f"invalid UTF-8 near byte {exc.start}"
|
|
281
|
+
except OSError as exc:
|
|
282
|
+
return None, f"unreadable: {format_os_error(exc)}"
|
|
283
|
+
if not isinstance(data, dict):
|
|
284
|
+
return None, "settings root must be a JSON object"
|
|
285
|
+
return data, None
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def _open_regular_under_root_no_follow(root: Path, path: Path, *, path_kind: str = "settings"):
|
|
289
|
+
root_resolved = root.resolve()
|
|
290
|
+
nofollow = getattr(os, "O_NOFOLLOW", 0)
|
|
291
|
+
if not nofollow:
|
|
292
|
+
raise OSError(errno.ENOTSUP, "safe no-follow open is unavailable")
|
|
293
|
+
if os.open not in getattr(os, "supports_dir_fd", set()):
|
|
294
|
+
raise OSError(errno.ENOTSUP, "safe directory-relative open is unavailable")
|
|
295
|
+
try:
|
|
296
|
+
relative = path.relative_to(root_resolved)
|
|
297
|
+
except ValueError:
|
|
298
|
+
try:
|
|
299
|
+
relative = path.relative_to(root)
|
|
300
|
+
except ValueError as exc:
|
|
301
|
+
raise OSError(f"{path_kind} path is outside project root") from exc
|
|
302
|
+
parts = relative.parts
|
|
303
|
+
if not parts:
|
|
304
|
+
raise OSError(errno.EINVAL, f"{path_kind} path is missing a file name")
|
|
305
|
+
for component in parts:
|
|
306
|
+
if component in {"", "."} or component == "..":
|
|
307
|
+
raise OSError(errno.EINVAL, f"invalid {path_kind} path component")
|
|
308
|
+
dir_flags = os.O_RDONLY | getattr(os, "O_DIRECTORY", 0) | nofollow
|
|
309
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
310
|
+
dir_flags |= os.O_CLOEXEC
|
|
311
|
+
dir_fd = os.open(root_resolved, dir_flags)
|
|
312
|
+
try:
|
|
313
|
+
if not stat.S_ISDIR(os.fstat(dir_fd).st_mode):
|
|
314
|
+
raise OSError(errno.ENOTDIR, f"{path_kind} root is not a directory")
|
|
315
|
+
for component in parts[:-1]:
|
|
316
|
+
try:
|
|
317
|
+
next_fd = os.open(component, dir_flags, dir_fd=dir_fd)
|
|
318
|
+
except OSError as exc:
|
|
319
|
+
if exc.errno in {errno.ENOTDIR, errno.ELOOP}:
|
|
320
|
+
raise OSError(exc.errno, f"{path_kind} parent is not a directory") from exc
|
|
321
|
+
raise
|
|
322
|
+
try:
|
|
323
|
+
if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
|
|
324
|
+
raise OSError(errno.ENOTDIR, f"{path_kind} parent is not a directory")
|
|
325
|
+
except Exception:
|
|
326
|
+
os.close(next_fd)
|
|
327
|
+
raise
|
|
328
|
+
old_fd = dir_fd
|
|
329
|
+
dir_fd = next_fd
|
|
330
|
+
os.close(old_fd)
|
|
331
|
+
file_flags = os.O_RDONLY
|
|
332
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
333
|
+
file_flags |= os.O_CLOEXEC
|
|
334
|
+
if hasattr(os, "O_NONBLOCK"):
|
|
335
|
+
file_flags |= os.O_NONBLOCK
|
|
336
|
+
if nofollow:
|
|
337
|
+
file_flags |= nofollow
|
|
338
|
+
try:
|
|
339
|
+
fd = os.open(parts[-1], file_flags, dir_fd=dir_fd)
|
|
340
|
+
except OSError as exc:
|
|
341
|
+
if exc.errno == errno.ELOOP:
|
|
342
|
+
raise OSError(errno.ELOOP, "not a regular file") from exc
|
|
343
|
+
raise
|
|
344
|
+
try:
|
|
345
|
+
opened = os.fstat(fd)
|
|
346
|
+
if not stat.S_ISREG(opened.st_mode):
|
|
347
|
+
raise OSError(errno.EINVAL, "not a regular file")
|
|
348
|
+
handle = os.fdopen(fd, "rb")
|
|
349
|
+
fd = -1
|
|
350
|
+
return handle
|
|
351
|
+
except Exception:
|
|
352
|
+
if fd != -1:
|
|
353
|
+
os.close(fd)
|
|
354
|
+
raise
|
|
355
|
+
finally:
|
|
356
|
+
if dir_fd != -1:
|
|
357
|
+
os.close(dir_fd)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def read_settings_json_bytes_no_follow(path: Path, root: Path) -> bytes:
|
|
361
|
+
with _open_regular_under_root_no_follow(root, path) as handle:
|
|
362
|
+
st = os.fstat(handle.fileno())
|
|
363
|
+
if st.st_size > MAX_SETTINGS_READ_BYTES:
|
|
364
|
+
raise SettingsFileTooLargeError(
|
|
365
|
+
f"settings file is too large ({st.st_size} bytes > {MAX_SETTINGS_READ_BYTES})"
|
|
366
|
+
)
|
|
367
|
+
data = handle.read(MAX_SETTINGS_READ_BYTES + 1)
|
|
368
|
+
if len(data) > MAX_SETTINGS_READ_BYTES:
|
|
369
|
+
raise SettingsFileTooLargeError(f"settings file is too large (> {MAX_SETTINGS_READ_BYTES} bytes)")
|
|
370
|
+
return data
|
|
371
|
+
|
|
372
|
+
|
|
373
|
+
def iter_values(value: Any) -> Iterable[Any]:
|
|
374
|
+
if isinstance(value, dict):
|
|
375
|
+
for item in value.values():
|
|
376
|
+
yield from iter_values(item)
|
|
377
|
+
elif isinstance(value, list):
|
|
378
|
+
for item in value:
|
|
379
|
+
yield from iter_values(item)
|
|
380
|
+
else:
|
|
381
|
+
yield value
|
|
382
|
+
|
|
383
|
+
|
|
384
|
+
def string_values(value: Any) -> list[str]:
|
|
385
|
+
return [item for item in iter_values(value) if isinstance(item, str)]
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def collect_settings(root: Path) -> tuple[list[dict[str, Any]], list[Finding]]:
|
|
389
|
+
settings: list[dict[str, Any]] = []
|
|
390
|
+
findings: list[Finding] = []
|
|
391
|
+
candidates = [root / ".claude" / "settings.json", root / ".claude" / "settings.local.json"]
|
|
392
|
+
has_project_settings = (root / ".claude" / "settings.json").exists() or (root / ".claude" / "settings.json").is_symlink()
|
|
393
|
+
for path in candidates:
|
|
394
|
+
if not path.exists() and not path.is_symlink():
|
|
395
|
+
continue
|
|
396
|
+
rel = rel_path(path, root)
|
|
397
|
+
data, error = load_json(path, root)
|
|
398
|
+
if error:
|
|
399
|
+
findings.append(Finding(
|
|
400
|
+
"settings-unreadable",
|
|
401
|
+
"high" if "outside project" in error or "invalid JSON" in error else "medium",
|
|
402
|
+
rel,
|
|
403
|
+
f"Claude settings could not be used: {error}.",
|
|
404
|
+
"Fix or remove the settings file so token-budget hooks and deny rules are predictable.",
|
|
405
|
+
))
|
|
406
|
+
continue
|
|
407
|
+
assert data is not None
|
|
408
|
+
settings.append({"path": rel, "data": data})
|
|
409
|
+
if not settings or not has_project_settings:
|
|
410
|
+
findings.append(Finding(
|
|
411
|
+
"missing-project-settings",
|
|
412
|
+
"medium",
|
|
413
|
+
".claude/settings.json",
|
|
414
|
+
"No shared project Claude settings file was found.",
|
|
415
|
+
"Add an opt-in project .claude/settings.json with read deny rules, statusline, and Bash output trimming hook.",
|
|
416
|
+
))
|
|
417
|
+
return settings, findings
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def merged_settings(settings: list[dict[str, Any]]) -> dict[str, Any]:
|
|
421
|
+
merged: dict[str, Any] = {"permissions": {"deny": [], "allow": []}, "hooks": {}, "mcpServers": {}}
|
|
422
|
+
for item in settings:
|
|
423
|
+
data = item["data"]
|
|
424
|
+
permissions = data.get("permissions") if isinstance(data.get("permissions"), dict) else {}
|
|
425
|
+
for key in ("deny", "allow"):
|
|
426
|
+
values = permissions.get(key) if isinstance(permissions, dict) else []
|
|
427
|
+
if isinstance(values, list):
|
|
428
|
+
merged["permissions"][key].extend(str(v) for v in values if isinstance(v, str))
|
|
429
|
+
if isinstance(data.get("hooks"), dict):
|
|
430
|
+
for event, hooks in data["hooks"].items():
|
|
431
|
+
if isinstance(hooks, list):
|
|
432
|
+
merged["hooks"].setdefault(event, [])
|
|
433
|
+
if isinstance(merged["hooks"][event], list):
|
|
434
|
+
merged["hooks"][event].extend(hooks)
|
|
435
|
+
else:
|
|
436
|
+
merged["hooks"][event] = hooks
|
|
437
|
+
else:
|
|
438
|
+
merged["hooks"][event] = hooks
|
|
439
|
+
if isinstance(data.get("statusLine"), dict):
|
|
440
|
+
merged["statusLine"] = data["statusLine"]
|
|
441
|
+
if "model" in data:
|
|
442
|
+
merged["model"] = data["model"]
|
|
443
|
+
if "effortLevel" in data:
|
|
444
|
+
merged["effortLevel"] = data["effortLevel"]
|
|
445
|
+
if isinstance(data.get("mcpServers"), dict):
|
|
446
|
+
merged["mcpServers"].update(data["mcpServers"])
|
|
447
|
+
return merged
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
READ_TARGET_RE = re.compile(r"(?i)^\s*Read\((?P<target>.*)\)\s*$")
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def normalize_read_target(value: str) -> str:
|
|
454
|
+
target = value.strip().strip('"').strip("'").replace("\\", "/")
|
|
455
|
+
while target.startswith("./"):
|
|
456
|
+
target = target[2:]
|
|
457
|
+
target = re.sub(r"/+", "/", target)
|
|
458
|
+
return target.rstrip("/") or "."
|
|
459
|
+
|
|
460
|
+
|
|
461
|
+
def parse_read_targets(deny_entries: list[str]) -> list[str]:
|
|
462
|
+
targets: list[str] = []
|
|
463
|
+
for entry in deny_entries:
|
|
464
|
+
match = READ_TARGET_RE.match(entry)
|
|
465
|
+
if not match:
|
|
466
|
+
continue
|
|
467
|
+
targets.append(normalize_read_target(match.group("target")))
|
|
468
|
+
return targets
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
def path_target_denied(deny_entries: list[str], recommended: str) -> bool:
|
|
472
|
+
"""Return True only for exact/equivalent or intentionally broader Read denies."""
|
|
473
|
+
required = parse_read_targets([recommended])
|
|
474
|
+
if not required:
|
|
475
|
+
return False
|
|
476
|
+
required_target = required[0]
|
|
477
|
+
if required_target in {"**", "*"}:
|
|
478
|
+
return False
|
|
479
|
+
targets = parse_read_targets(deny_entries)
|
|
480
|
+
broader_targets = {"**", "*", "./**", "."}
|
|
481
|
+
for target in targets:
|
|
482
|
+
if target in broader_targets:
|
|
483
|
+
return True
|
|
484
|
+
if target == required_target:
|
|
485
|
+
return True
|
|
486
|
+
if target.endswith("/**"):
|
|
487
|
+
base = target[:-3].rstrip("/")
|
|
488
|
+
if required_target == base or required_target.startswith(base + "/"):
|
|
489
|
+
return True
|
|
490
|
+
if target == "~/**" and required_target.startswith("~/"):
|
|
491
|
+
return True
|
|
492
|
+
return False
|
|
493
|
+
|
|
494
|
+
|
|
495
|
+
def project_path_exists(root: Path, rel: str) -> bool:
|
|
496
|
+
if rel == ".env":
|
|
497
|
+
return (root / ".env").exists()
|
|
498
|
+
if rel == ".env.*":
|
|
499
|
+
return any(path.name.startswith(".env.") for path in root.iterdir() if path.exists())
|
|
500
|
+
return (root / rel).exists()
|
|
501
|
+
|
|
502
|
+
|
|
503
|
+
def generic_context_pattern(rel: str) -> str:
|
|
504
|
+
if rel in {".env", ".npmrc", ".pypirc", ".netrc"}:
|
|
505
|
+
return rel
|
|
506
|
+
if rel.endswith(".*"):
|
|
507
|
+
return rel
|
|
508
|
+
if "*" in rel:
|
|
509
|
+
return rel.replace("./", "")
|
|
510
|
+
return f"{rel.rstrip('/')}/**"
|
|
511
|
+
|
|
512
|
+
|
|
513
|
+
def context_exclusion_recommendation(
|
|
514
|
+
*,
|
|
515
|
+
label: str,
|
|
516
|
+
rel: str,
|
|
517
|
+
recommended: str,
|
|
518
|
+
category: str,
|
|
519
|
+
severity: str,
|
|
520
|
+
deny_entries: list[str],
|
|
521
|
+
) -> dict[str, Any]:
|
|
522
|
+
already_denied = path_target_denied(deny_entries, recommended)
|
|
523
|
+
return {
|
|
524
|
+
"id": f"context-exclude-{safe_id_part(label)}",
|
|
525
|
+
"severity": severity,
|
|
526
|
+
"path": rel,
|
|
527
|
+
"category": category,
|
|
528
|
+
"status": "already_denied" if already_denied else "missing",
|
|
529
|
+
"reason": (
|
|
530
|
+
"Sensitive local file should not be read into AI-agent context."
|
|
531
|
+
if category == "sensitive"
|
|
532
|
+
else "Bulky generated/cache path should stay out of AI-agent context."
|
|
533
|
+
),
|
|
534
|
+
"recommended_deny": recommended,
|
|
535
|
+
"generic_pattern": generic_context_pattern(rel),
|
|
536
|
+
"applies_to": ["claude-permissions.deny", "agent-ignore-advisory"],
|
|
537
|
+
"surfaces": ["Claude Code permissions.deny", "generic agent ignore/exclude rules"],
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
|
|
541
|
+
def build_context_exclusion_recommendations(root: Path, deny_entries: list[str]) -> list[dict[str, Any]]:
|
|
542
|
+
recommendations: list[dict[str, Any]] = []
|
|
543
|
+
for label, rel, recommended in HEAVY_PROJECT_DENIES:
|
|
544
|
+
if project_path_exists(root, rel):
|
|
545
|
+
recommendations.append(context_exclusion_recommendation(
|
|
546
|
+
label=label,
|
|
547
|
+
rel=rel,
|
|
548
|
+
recommended=recommended,
|
|
549
|
+
category="generated_cache",
|
|
550
|
+
severity="medium",
|
|
551
|
+
deny_entries=deny_entries,
|
|
552
|
+
))
|
|
553
|
+
for label, rel, recommended in SENSITIVE_PROJECT_DENIES:
|
|
554
|
+
if project_path_exists(root, rel):
|
|
555
|
+
recommendations.append(context_exclusion_recommendation(
|
|
556
|
+
label=label,
|
|
557
|
+
rel=rel,
|
|
558
|
+
recommended=recommended,
|
|
559
|
+
category="sensitive",
|
|
560
|
+
severity="high",
|
|
561
|
+
deny_entries=deny_entries,
|
|
562
|
+
))
|
|
563
|
+
recommendations.sort(key=lambda item: (SEVERITY_ORDER.get(str(item["severity"]), 99), item["id"]))
|
|
564
|
+
return recommendations
|
|
565
|
+
|
|
566
|
+
|
|
567
|
+
def scan_settings(root: Path, settings: list[dict[str, Any]]) -> tuple[dict[str, Any], list[Finding]]:
|
|
568
|
+
findings: list[Finding] = []
|
|
569
|
+
merged = merged_settings(settings)
|
|
570
|
+
deny_entries = merged["permissions"]["deny"]
|
|
571
|
+
allow_entries = merged["permissions"]["allow"]
|
|
572
|
+
|
|
573
|
+
for label, rel, recommended in HEAVY_PROJECT_DENIES:
|
|
574
|
+
if project_path_exists(root, rel) and not path_target_denied(deny_entries, recommended):
|
|
575
|
+
findings.append(Finding(
|
|
576
|
+
f"missing-deny-{safe_id_part(label)}",
|
|
577
|
+
"medium",
|
|
578
|
+
rel,
|
|
579
|
+
f"Bulky generated/cache path `{rel}` exists but is not denied from Read.",
|
|
580
|
+
f"Add `{recommended}` to permissions.deny to avoid accidental large reads.",
|
|
581
|
+
{"recommended_deny": recommended},
|
|
582
|
+
))
|
|
583
|
+
|
|
584
|
+
for label, rel, recommended in SENSITIVE_PROJECT_DENIES:
|
|
585
|
+
if project_path_exists(root, rel) and not path_target_denied(deny_entries, recommended):
|
|
586
|
+
findings.append(Finding(
|
|
587
|
+
f"missing-sensitive-deny-{safe_id_part(label)}",
|
|
588
|
+
"high",
|
|
589
|
+
rel,
|
|
590
|
+
f"Sensitive project path `{rel}` exists but is not denied from Read.",
|
|
591
|
+
f"Add `{recommended}` to permissions.deny; do not send secrets to Claude context.",
|
|
592
|
+
{"recommended_deny": recommended},
|
|
593
|
+
))
|
|
594
|
+
|
|
595
|
+
for label, recommended in SENSITIVE_HOME_DENIES:
|
|
596
|
+
if not path_target_denied(deny_entries, recommended):
|
|
597
|
+
findings.append(Finding(
|
|
598
|
+
f"missing-home-deny-{safe_id_part(label)}",
|
|
599
|
+
"low",
|
|
600
|
+
label,
|
|
601
|
+
f"Home credential path `{label}` is not explicitly denied.",
|
|
602
|
+
f"Add `{recommended}` to permissions.deny as a guardrail against accidental credential reads.",
|
|
603
|
+
{"recommended_deny": recommended},
|
|
604
|
+
))
|
|
605
|
+
|
|
606
|
+
if not has_bash_trim_hook(merged):
|
|
607
|
+
findings.append(Finding(
|
|
608
|
+
"missing-bash-trim-hook",
|
|
609
|
+
"medium",
|
|
610
|
+
".claude/settings.json",
|
|
611
|
+
"No PreToolUse Bash hook for trimming noisy test/build/lint output was detected.",
|
|
612
|
+
"Install the example hook using context-guard-rewrite-bash or rewrite_bash_for_token_budget.py.",
|
|
613
|
+
))
|
|
614
|
+
|
|
615
|
+
if not has_large_read_guard(merged):
|
|
616
|
+
findings.append(Finding(
|
|
617
|
+
"missing-large-read-guard",
|
|
618
|
+
"medium",
|
|
619
|
+
".claude/settings.json",
|
|
620
|
+
"No PreToolUse Read hook for blocking large whole-file reads was detected.",
|
|
621
|
+
"Install context-guard-guard-read so Claude is nudged toward context-guard-read-symbol or line-range reads before large files enter context.",
|
|
622
|
+
))
|
|
623
|
+
|
|
624
|
+
if not has_statusline(merged):
|
|
625
|
+
findings.append(Finding(
|
|
626
|
+
"missing-token-statusline",
|
|
627
|
+
"low",
|
|
628
|
+
".claude/settings.json",
|
|
629
|
+
"No token/cost/context statusline command was detected.",
|
|
630
|
+
"Add context-guard-statusline so context and cost pressure stay visible during a session.",
|
|
631
|
+
))
|
|
632
|
+
|
|
633
|
+
for entry in allow_entries:
|
|
634
|
+
if any(target in {"**", "*", "."} for target in parse_read_targets([entry])):
|
|
635
|
+
findings.append(Finding(
|
|
636
|
+
"broad-read-allow",
|
|
637
|
+
"medium",
|
|
638
|
+
".claude/settings.json",
|
|
639
|
+
"A broad Read allow rule can make accidental large reads more likely.",
|
|
640
|
+
"Prefer narrow allow rules plus explicit deny entries for generated and secret paths.",
|
|
641
|
+
{"allow_entry": entry},
|
|
642
|
+
))
|
|
643
|
+
break
|
|
644
|
+
|
|
645
|
+
model = str(merged.get("model", "")).lower()
|
|
646
|
+
if "opus" in model:
|
|
647
|
+
findings.append(Finding(
|
|
648
|
+
"opus-default-model",
|
|
649
|
+
"medium",
|
|
650
|
+
".claude/settings.json",
|
|
651
|
+
"Default model appears to be Opus, which can burn scarce premium tokens on routine work.",
|
|
652
|
+
"Use Sonnet as the default and reserve Opus/opusplan for planning or high-risk reasoning.",
|
|
653
|
+
{"model": merged.get("model")},
|
|
654
|
+
))
|
|
655
|
+
|
|
656
|
+
effort = str(merged.get("effortLevel", "")).lower()
|
|
657
|
+
if effort in {"high", "max", "maximum"}:
|
|
658
|
+
findings.append(Finding(
|
|
659
|
+
"high-default-effort",
|
|
660
|
+
"low",
|
|
661
|
+
".claude/settings.json",
|
|
662
|
+
"Default effort is high, which can increase token burn on routine edits.",
|
|
663
|
+
"Use medium/low by default and raise effort only for hard design/debugging work.",
|
|
664
|
+
{"effortLevel": merged.get("effortLevel")},
|
|
665
|
+
))
|
|
666
|
+
|
|
667
|
+
mcp_servers = merged.get("mcpServers") if isinstance(merged.get("mcpServers"), dict) else {}
|
|
668
|
+
if len(mcp_servers) >= 6:
|
|
669
|
+
findings.append(Finding(
|
|
670
|
+
"many-mcp-servers",
|
|
671
|
+
"low",
|
|
672
|
+
".claude/settings.json",
|
|
673
|
+
"Many MCP servers are configured; tool schemas and discovery can add startup/context overhead.",
|
|
674
|
+
"Disable unused MCP servers for Claude sessions that do not need them.",
|
|
675
|
+
{"mcp_server_count": len(mcp_servers), "mcp_servers": sorted(mcp_servers)[:20]},
|
|
676
|
+
))
|
|
677
|
+
|
|
678
|
+
settings_summary = {
|
|
679
|
+
"files": [item["path"] for item in settings],
|
|
680
|
+
"deny_count": len(deny_entries),
|
|
681
|
+
"allow_count": len(allow_entries),
|
|
682
|
+
"has_bash_trim_hook": has_bash_trim_hook(merged),
|
|
683
|
+
"has_large_read_guard": has_large_read_guard(merged),
|
|
684
|
+
"has_statusline": has_statusline(merged),
|
|
685
|
+
"mcp_server_count": len(mcp_servers),
|
|
686
|
+
"model": merged.get("model"),
|
|
687
|
+
"effortLevel": merged.get("effortLevel"),
|
|
688
|
+
}
|
|
689
|
+
return settings_summary, findings
|
|
690
|
+
|
|
691
|
+
|
|
692
|
+
def has_bash_trim_hook(settings: dict[str, Any]) -> bool:
|
|
693
|
+
hooks = settings.get("hooks")
|
|
694
|
+
if not isinstance(hooks, dict):
|
|
695
|
+
return False
|
|
696
|
+
pre_tool = hooks.get("PreToolUse")
|
|
697
|
+
if not isinstance(pre_tool, list):
|
|
698
|
+
return False
|
|
699
|
+
for entry in pre_tool:
|
|
700
|
+
if not isinstance(entry, dict):
|
|
701
|
+
continue
|
|
702
|
+
matcher = entry.get("matcher")
|
|
703
|
+
if isinstance(matcher, str) and not matcher_applies_to_bash(matcher):
|
|
704
|
+
continue
|
|
705
|
+
commands = (
|
|
706
|
+
string_values(entry.get("hooks"))
|
|
707
|
+
+ string_values(entry.get("command"))
|
|
708
|
+
+ string_values(entry.get("commands"))
|
|
709
|
+
)
|
|
710
|
+
if any(any(marker in cmd for marker in BASH_TRIM_COMMAND_MARKERS) for cmd in commands):
|
|
711
|
+
return True
|
|
712
|
+
return False
|
|
713
|
+
|
|
714
|
+
|
|
715
|
+
def matcher_applies_to_bash(matcher: str) -> bool:
|
|
716
|
+
parts = [part.strip().lower() for part in matcher.split("|")]
|
|
717
|
+
return any(part in {"", "*", "bash"} for part in parts)
|
|
718
|
+
|
|
719
|
+
|
|
720
|
+
def has_large_read_guard(settings: dict[str, Any]) -> bool:
|
|
721
|
+
hooks = settings.get("hooks")
|
|
722
|
+
if not isinstance(hooks, dict):
|
|
723
|
+
return False
|
|
724
|
+
pre_tool = hooks.get("PreToolUse")
|
|
725
|
+
if not isinstance(pre_tool, list):
|
|
726
|
+
return False
|
|
727
|
+
for entry in pre_tool:
|
|
728
|
+
if not isinstance(entry, dict):
|
|
729
|
+
continue
|
|
730
|
+
matcher = entry.get("matcher")
|
|
731
|
+
if isinstance(matcher, str) and not matcher_applies_to_read(matcher):
|
|
732
|
+
continue
|
|
733
|
+
commands = (
|
|
734
|
+
string_values(entry.get("hooks"))
|
|
735
|
+
+ string_values(entry.get("command"))
|
|
736
|
+
+ string_values(entry.get("commands"))
|
|
737
|
+
)
|
|
738
|
+
if any(any(marker in cmd for marker in LARGE_READ_GUARD_COMMAND_MARKERS) for cmd in commands):
|
|
739
|
+
return True
|
|
740
|
+
return False
|
|
741
|
+
|
|
742
|
+
|
|
743
|
+
def matcher_applies_to_read(matcher: str) -> bool:
|
|
744
|
+
parts = [part.strip().lower() for part in matcher.split("|")]
|
|
745
|
+
return any(part in {"", "*", "read"} for part in parts)
|
|
746
|
+
|
|
747
|
+
|
|
748
|
+
def has_statusline(settings: dict[str, Any]) -> bool:
|
|
749
|
+
status = settings.get("statusLine")
|
|
750
|
+
if not isinstance(status, dict):
|
|
751
|
+
return False
|
|
752
|
+
command = status.get("command")
|
|
753
|
+
return isinstance(command, str) and any(marker in command for marker in STATUSLINE_COMMAND_MARKERS)
|
|
754
|
+
|
|
755
|
+
|
|
756
|
+
def should_scan_context_file(path: Path, root: Path) -> bool:
|
|
757
|
+
if path.name in CONTEXT_FILE_NAMES:
|
|
758
|
+
return True
|
|
759
|
+
raw_rel = raw_rel_path(path, root)
|
|
760
|
+
if raw_rel is None:
|
|
761
|
+
return False
|
|
762
|
+
if raw_rel in CONTEXT_EXACT_REL_FILES:
|
|
763
|
+
return True
|
|
764
|
+
rel = sanitize_rel_path(raw_rel)
|
|
765
|
+
return any(rel.startswith(prefix + "/") and path.suffix.lower() == ".md" for prefix in CONTEXT_MD_DIRS)
|
|
766
|
+
|
|
767
|
+
|
|
768
|
+
def iter_context_files(root: Path) -> Iterable[Path]:
|
|
769
|
+
for dirpath, dirnames, filenames in os.walk(root, followlinks=False):
|
|
770
|
+
current = Path(dirpath)
|
|
771
|
+
dirnames[:] = [
|
|
772
|
+
name
|
|
773
|
+
for name in dirnames
|
|
774
|
+
if name not in EXCLUDED_DIR_NAMES and not (current / name).is_symlink()
|
|
775
|
+
]
|
|
776
|
+
for name in filenames:
|
|
777
|
+
path = current / name
|
|
778
|
+
if path.is_symlink():
|
|
779
|
+
continue
|
|
780
|
+
if should_scan_context_file(path, root):
|
|
781
|
+
yield path
|
|
782
|
+
|
|
783
|
+
|
|
784
|
+
def read_text_prefix(path: Path, limit: int = MAX_CONTEXT_READ_BYTES, *, root: Path | None = None) -> tuple[str, bool]:
|
|
785
|
+
opener = (
|
|
786
|
+
_open_regular_under_root_no_follow(root, path, path_kind="context")
|
|
787
|
+
if root is not None
|
|
788
|
+
else open_regular_no_follow(path)
|
|
789
|
+
)
|
|
790
|
+
with opener as handle:
|
|
791
|
+
data = handle.read(limit + 1)
|
|
792
|
+
truncated = len(data) > limit
|
|
793
|
+
if truncated:
|
|
794
|
+
data = data[:limit]
|
|
795
|
+
return data.decode("utf-8", "replace"), truncated
|
|
796
|
+
|
|
797
|
+
|
|
798
|
+
def file_contains_secret(
|
|
799
|
+
path: Path,
|
|
800
|
+
chunk_bytes: int = 64_000,
|
|
801
|
+
*,
|
|
802
|
+
root: Path | None = None,
|
|
803
|
+
max_total_bytes: int = MAX_SECRET_SCAN_BYTES,
|
|
804
|
+
) -> bool:
|
|
805
|
+
carry = ""
|
|
806
|
+
bytes_read = 0
|
|
807
|
+
opener = (
|
|
808
|
+
_open_regular_under_root_no_follow(root, path, path_kind="context")
|
|
809
|
+
if root is not None
|
|
810
|
+
else open_regular_no_follow(path)
|
|
811
|
+
)
|
|
812
|
+
with opener as handle:
|
|
813
|
+
while True:
|
|
814
|
+
remaining = max_total_bytes - bytes_read
|
|
815
|
+
if remaining <= 0:
|
|
816
|
+
return False
|
|
817
|
+
data = handle.read(min(chunk_bytes, remaining))
|
|
818
|
+
if not data:
|
|
819
|
+
return False
|
|
820
|
+
bytes_read += len(data)
|
|
821
|
+
text = carry + data.decode("utf-8", "replace")
|
|
822
|
+
if SECRET_CONTENT_RE.search(text):
|
|
823
|
+
return True
|
|
824
|
+
carry = text[-512:]
|
|
825
|
+
|
|
826
|
+
|
|
827
|
+
def open_regular_no_follow(path: Path):
|
|
828
|
+
before = os.lstat(path)
|
|
829
|
+
if not stat.S_ISREG(before.st_mode):
|
|
830
|
+
raise OSError("not a regular file")
|
|
831
|
+
flags = os.O_RDONLY
|
|
832
|
+
nofollow = getattr(os, "O_NOFOLLOW", 0)
|
|
833
|
+
if hasattr(os, "O_NONBLOCK"):
|
|
834
|
+
flags |= os.O_NONBLOCK
|
|
835
|
+
if nofollow:
|
|
836
|
+
flags |= nofollow
|
|
837
|
+
fd = os.open(path, flags)
|
|
838
|
+
try:
|
|
839
|
+
opened = os.fstat(fd)
|
|
840
|
+
after = os.lstat(path)
|
|
841
|
+
if (
|
|
842
|
+
not stat.S_ISREG(opened.st_mode)
|
|
843
|
+
or not stat.S_ISREG(after.st_mode)
|
|
844
|
+
or not os.path.samestat(before, opened)
|
|
845
|
+
or not os.path.samestat(after, opened)
|
|
846
|
+
):
|
|
847
|
+
raise OSError("not a regular file")
|
|
848
|
+
handle = os.fdopen(fd, "rb")
|
|
849
|
+
except Exception:
|
|
850
|
+
os.close(fd)
|
|
851
|
+
raise
|
|
852
|
+
return handle
|
|
853
|
+
|
|
854
|
+
|
|
855
|
+
def format_os_error(exc: OSError) -> str:
|
|
856
|
+
reason = exc.strerror or exc.__class__.__name__
|
|
857
|
+
if exc.errno is not None:
|
|
858
|
+
return f"{reason} (errno {exc.errno})"
|
|
859
|
+
return reason
|
|
860
|
+
|
|
861
|
+
|
|
862
|
+
def scan_context(root: Path, large_bytes: int, huge_bytes: int, long_lines: int) -> tuple[list[dict[str, Any]], list[Finding]]:
|
|
863
|
+
context_files: list[dict[str, Any]] = []
|
|
864
|
+
findings: list[Finding] = []
|
|
865
|
+
for path in sorted(iter_context_files(root), key=lambda p: rel_path(p, root)):
|
|
866
|
+
rel = rel_path(path, root)
|
|
867
|
+
surface = context_surface_for_rel(raw_rel_path(path, root) or rel, path.name)
|
|
868
|
+
try:
|
|
869
|
+
st = path.lstat()
|
|
870
|
+
if not stat.S_ISREG(st.st_mode):
|
|
871
|
+
findings.append(context_finding(
|
|
872
|
+
"context-not-regular",
|
|
873
|
+
"medium",
|
|
874
|
+
rel,
|
|
875
|
+
"Context-like path is not a regular file.",
|
|
876
|
+
"Replace it with a regular markdown file or remove it from always-loaded context.",
|
|
877
|
+
))
|
|
878
|
+
continue
|
|
879
|
+
size = st.st_size
|
|
880
|
+
text, sample_truncated = read_text_prefix(path, root=root)
|
|
881
|
+
contains_secret = file_contains_secret(path, root=root)
|
|
882
|
+
except OSError as exc:
|
|
883
|
+
findings.append(context_finding(
|
|
884
|
+
"context-unreadable",
|
|
885
|
+
"low",
|
|
886
|
+
rel,
|
|
887
|
+
f"Context-like file could not be read: {format_os_error(exc)}.",
|
|
888
|
+
"Check file permissions or remove stale symlinks.",
|
|
889
|
+
))
|
|
890
|
+
continue
|
|
891
|
+
lines = text.count("\n") + (1 if text else 0)
|
|
892
|
+
code_fences = text.count("```")
|
|
893
|
+
item = {
|
|
894
|
+
"path": rel,
|
|
895
|
+
"bytes": size,
|
|
896
|
+
"sampled_lines": lines,
|
|
897
|
+
"sample_truncated": sample_truncated,
|
|
898
|
+
"code_fences": code_fences,
|
|
899
|
+
}
|
|
900
|
+
if surface is not None:
|
|
901
|
+
item.update(surface)
|
|
902
|
+
context_files.append(item)
|
|
903
|
+
|
|
904
|
+
if size >= huge_bytes:
|
|
905
|
+
evidence = {"bytes": size, "threshold_bytes": huge_bytes}
|
|
906
|
+
if surface is not None:
|
|
907
|
+
evidence.update(surface)
|
|
908
|
+
findings.append(context_finding(
|
|
909
|
+
"huge-context-file",
|
|
910
|
+
"high",
|
|
911
|
+
rel,
|
|
912
|
+
f"Context-like file is very large ({size} bytes).",
|
|
913
|
+
"Move long procedures/logs/examples into opt-in skills or commands and keep only a short index in always-loaded context.",
|
|
914
|
+
evidence,
|
|
915
|
+
))
|
|
916
|
+
elif size >= large_bytes or lines >= long_lines:
|
|
917
|
+
evidence = {"bytes": size, "large_bytes": large_bytes, "sampled_lines": lines, "long_lines": long_lines}
|
|
918
|
+
if surface is not None:
|
|
919
|
+
evidence.update(surface)
|
|
920
|
+
findings.append(context_finding(
|
|
921
|
+
"large-context-file",
|
|
922
|
+
"medium",
|
|
923
|
+
rel,
|
|
924
|
+
f"Context-like file is large ({size} bytes, sampled {lines} lines).",
|
|
925
|
+
"Trim stable instructions, move volatile or lengthy material to skills/custom commands, and keep examples short.",
|
|
926
|
+
evidence,
|
|
927
|
+
))
|
|
928
|
+
if code_fences >= 12:
|
|
929
|
+
findings.append(context_finding(
|
|
930
|
+
"context-heavy-code-fences",
|
|
931
|
+
"low",
|
|
932
|
+
rel,
|
|
933
|
+
"Context-like file contains many code fences, which can inflate startup context.",
|
|
934
|
+
"Replace long embedded examples with links or opt-in command/skill files.",
|
|
935
|
+
{"code_fences": code_fences},
|
|
936
|
+
))
|
|
937
|
+
if contains_secret:
|
|
938
|
+
findings.append(context_finding(
|
|
939
|
+
"secret-like-context-content",
|
|
940
|
+
"high",
|
|
941
|
+
rel,
|
|
942
|
+
"Context-like file contains credential-shaped text.",
|
|
943
|
+
"Remove secrets from prompt context and rotate exposed credentials if this file was shared.",
|
|
944
|
+
))
|
|
945
|
+
return context_files, findings
|
|
946
|
+
|
|
947
|
+
|
|
948
|
+
SEVERITY_ORDER = {"high": 0, "medium": 1, "low": 2}
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
def build_report(args: argparse.Namespace) -> dict[str, Any]:
|
|
952
|
+
root = safe_resolve(Path(args.path).expanduser())
|
|
953
|
+
try:
|
|
954
|
+
is_scan_root = root.exists() and root.is_dir()
|
|
955
|
+
except OSError:
|
|
956
|
+
is_scan_root = False
|
|
957
|
+
if not is_scan_root:
|
|
958
|
+
raise SystemExit(f"context-guard-diet: scan path is not a directory: {path_label(root, args.show_paths)}")
|
|
959
|
+
settings, settings_findings = collect_settings(root)
|
|
960
|
+
settings_summary, config_findings = scan_settings(root, settings)
|
|
961
|
+
context_files, context_findings = scan_context(root, args.large_context_bytes, args.huge_context_bytes, args.long_context_lines)
|
|
962
|
+
deny_entries = merged_settings(settings)["permissions"]["deny"]
|
|
963
|
+
exclusion_recommendations = build_context_exclusion_recommendations(root, deny_entries)
|
|
964
|
+
findings = settings_findings + config_findings + context_findings
|
|
965
|
+
findings.sort(key=lambda item: (SEVERITY_ORDER.get(item.severity, 99), item.id, item.path))
|
|
966
|
+
return {
|
|
967
|
+
"tool": "context-guard-diet",
|
|
968
|
+
"root": root_label(root, args.show_paths),
|
|
969
|
+
"settings": settings_summary,
|
|
970
|
+
"context_files": sorted(context_files, key=lambda item: item["bytes"], reverse=True)[: args.top],
|
|
971
|
+
"context_exclusion_recommendations": exclusion_recommendations[: args.top],
|
|
972
|
+
"finding_count": len(findings),
|
|
973
|
+
"findings": [item.as_dict() for item in findings],
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
|
|
977
|
+
def print_text(report: dict[str, Any]) -> None:
|
|
978
|
+
print("Claude token diet scan")
|
|
979
|
+
print(f"root: {report['root']}")
|
|
980
|
+
settings = report["settings"]
|
|
981
|
+
print(
|
|
982
|
+
"settings: "
|
|
983
|
+
f"files={len(settings['files'])} deny={settings['deny_count']} "
|
|
984
|
+
f"trim_hook={'yes' if settings['has_bash_trim_hook'] else 'no'} "
|
|
985
|
+
f"read_guard={'yes' if settings['has_large_read_guard'] else 'no'} "
|
|
986
|
+
f"statusline={'yes' if settings['has_statusline'] else 'no'} "
|
|
987
|
+
f"mcp={settings['mcp_server_count']}"
|
|
988
|
+
)
|
|
989
|
+
if report["context_files"]:
|
|
990
|
+
print("\nTop context-like files:")
|
|
991
|
+
for item in report["context_files"]:
|
|
992
|
+
surface = f", surface={item['surface']}" if item.get("surface") else ""
|
|
993
|
+
print(f"- {item['path']} ({item['bytes']} bytes, sampled_lines={item['sampled_lines']}{surface})")
|
|
994
|
+
if report.get("context_exclusion_recommendations"):
|
|
995
|
+
print("\nContext exclusion recommendations:")
|
|
996
|
+
for item in report["context_exclusion_recommendations"]:
|
|
997
|
+
status = item.get("status", "missing")
|
|
998
|
+
print(f"- [{item['severity'].upper()}] {item['id']} @ {item['path']} ({status})")
|
|
999
|
+
print(f" claude: {item['recommended_deny']}")
|
|
1000
|
+
print(f" generic: {item['generic_pattern']}")
|
|
1001
|
+
print("\nFindings:")
|
|
1002
|
+
if not report["findings"]:
|
|
1003
|
+
print("- none")
|
|
1004
|
+
return
|
|
1005
|
+
for finding in report["findings"]:
|
|
1006
|
+
print(f"- [{finding['severity'].upper()}] {finding['id']} @ {finding['path']}")
|
|
1007
|
+
print(f" why: {finding['message']}")
|
|
1008
|
+
print(f" fix: {finding['action']}")
|
|
1009
|
+
|
|
1010
|
+
|
|
1011
|
+
def main() -> int:
|
|
1012
|
+
parser = argparse.ArgumentParser(prog="context-guard-diet")
|
|
1013
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
1014
|
+
scan = sub.add_parser("scan", help="scan project settings and context files for token-diet gaps")
|
|
1015
|
+
scan.add_argument("path", nargs="?", default=".")
|
|
1016
|
+
scan.add_argument("--json", action="store_true", help="emit machine-readable JSON")
|
|
1017
|
+
scan.add_argument("--show-paths", action="store_true", help="show raw absolute root path instead of a stable anonymized root label; local debugging only because private paths may be exposed")
|
|
1018
|
+
scan.add_argument("--top", type=int, default=20, help="maximum context-like files and context-exclusion recommendations to list")
|
|
1019
|
+
scan.add_argument("--large-context-bytes", type=int, default=DEFAULT_LARGE_CONTEXT_BYTES)
|
|
1020
|
+
scan.add_argument("--huge-context-bytes", type=int, default=DEFAULT_HUGE_CONTEXT_BYTES)
|
|
1021
|
+
scan.add_argument("--long-context-lines", type=int, default=DEFAULT_LONG_CONTEXT_LINES)
|
|
1022
|
+
args = parser.parse_args()
|
|
1023
|
+
|
|
1024
|
+
if args.command == "scan":
|
|
1025
|
+
report = build_report(args)
|
|
1026
|
+
if args.json:
|
|
1027
|
+
print(json.dumps(report, indent=2, sort_keys=True, ensure_ascii=False))
|
|
1028
|
+
else:
|
|
1029
|
+
print_text(report)
|
|
1030
|
+
return 0
|
|
1031
|
+
parser.error("unknown command")
|
|
1032
|
+
return 2
|
|
1033
|
+
|
|
1034
|
+
|
|
1035
|
+
if __name__ == "__main__":
|
|
1036
|
+
raise SystemExit(main())
|