@ictechgy/context-guard 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +49 -0
- package/LICENSE +201 -0
- package/NOTICE +4 -0
- package/README.ko.md +353 -0
- package/README.md +353 -0
- package/context-guard-kit/README.md +76 -0
- package/context-guard-kit/benchmark_runner.py +1898 -0
- package/context-guard-kit/claude_transcript_cost_audit.py +1591 -0
- package/context-guard-kit/context_compress.py +543 -0
- package/context-guard-kit/context_escrow.py +919 -0
- package/context-guard-kit/context_guard_cli.py +149 -0
- package/context-guard-kit/context_guard_diet.py +1036 -0
- package/context-guard-kit/context_pack.py +929 -0
- package/context-guard-kit/failed_attempt_nudge.py +567 -0
- package/context-guard-kit/guard_large_read.py +690 -0
- package/context-guard-kit/hook_secret_patterns.py +43 -0
- package/context-guard-kit/read_symbol.py +483 -0
- package/context-guard-kit/rewrite_bash_for_token_budget.py +501 -0
- package/context-guard-kit/sanitize_output.py +725 -0
- package/context-guard-kit/settings.example.json +67 -0
- package/context-guard-kit/setup_wizard.py +1724 -0
- package/context-guard-kit/statusline.sh +362 -0
- package/context-guard-kit/statusline_merged.sh +157 -0
- package/context-guard-kit/tool_schema_pruner.py +837 -0
- package/context-guard-kit/trim_command_output.py +1098 -0
- package/docs/distribution.md +55 -0
- package/package.json +70 -0
- package/packaging/homebrew/context-guard.rb.template +34 -0
- package/plugins/context-guard/.claude-plugin/plugin.json +41 -0
- package/plugins/context-guard/LICENSE +201 -0
- package/plugins/context-guard/NOTICE +4 -0
- package/plugins/context-guard/README.ko.md +135 -0
- package/plugins/context-guard/README.md +135 -0
- package/plugins/context-guard/bin/claude-read-symbol +6 -0
- package/plugins/context-guard/bin/claude-sanitize-output +6 -0
- package/plugins/context-guard/bin/claude-token-artifact +6 -0
- package/plugins/context-guard/bin/claude-token-audit +6 -0
- package/plugins/context-guard/bin/claude-token-bench +6 -0
- package/plugins/context-guard/bin/claude-token-diet +6 -0
- package/plugins/context-guard/bin/claude-token-failed-nudge +6 -0
- package/plugins/context-guard/bin/claude-token-guard-read +6 -0
- package/plugins/context-guard/bin/claude-token-rewrite-bash +6 -0
- package/plugins/context-guard/bin/claude-token-setup +6 -0
- package/plugins/context-guard/bin/claude-token-statusline +6 -0
- package/plugins/context-guard/bin/claude-token-statusline-merged +6 -0
- package/plugins/context-guard/bin/claude-trim-output +6 -0
- package/plugins/context-guard/bin/context-guard +149 -0
- package/plugins/context-guard/bin/context-guard-artifact +919 -0
- package/plugins/context-guard/bin/context-guard-audit +1591 -0
- package/plugins/context-guard/bin/context-guard-bench +1898 -0
- package/plugins/context-guard/bin/context-guard-compress +543 -0
- package/plugins/context-guard/bin/context-guard-diet +1036 -0
- package/plugins/context-guard/bin/context-guard-failed-nudge +567 -0
- package/plugins/context-guard/bin/context-guard-guard-read +690 -0
- package/plugins/context-guard/bin/context-guard-pack +929 -0
- package/plugins/context-guard/bin/context-guard-read-symbol +483 -0
- package/plugins/context-guard/bin/context-guard-rewrite-bash +501 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +725 -0
- package/plugins/context-guard/bin/context-guard-setup +1724 -0
- package/plugins/context-guard/bin/context-guard-statusline +362 -0
- package/plugins/context-guard/bin/context-guard-statusline-merged +157 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +837 -0
- package/plugins/context-guard/bin/context-guard-trim-output +1098 -0
- package/plugins/context-guard/brief/README.md +65 -0
- package/plugins/context-guard/brief/brief-mode.lite.md +29 -0
- package/plugins/context-guard/brief/brief-mode.standard.md +31 -0
- package/plugins/context-guard/brief/brief-mode.ultra.md +32 -0
- package/plugins/context-guard/lib/hook_secret_patterns.py +43 -0
- package/plugins/context-guard/skills/audit/SKILL.md +39 -0
- package/plugins/context-guard/skills/optimize/SKILL.md +48 -0
- package/plugins/context-guard/skills/setup/SKILL.md +40 -0
|
@@ -0,0 +1,929 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Build a deterministic, budgeted local context pack from prioritized files.
|
|
3
|
+
|
|
4
|
+
The packer is local-only and intentionally conservative. It assembles selected
|
|
5
|
+
file slices into a Markdown body whose rendered UTF-8 byte length is bounded by
|
|
6
|
+
``--budget-bytes``. It redacts before building the pack/receipt, records why
|
|
7
|
+
lower-priority sources were omitted, and emits exact local slice commands for
|
|
8
|
+
retrieval when the path is safe to display.
|
|
9
|
+
"""
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import argparse
|
|
13
|
+
import copy
|
|
14
|
+
import hashlib
|
|
15
|
+
import importlib.machinery
|
|
16
|
+
import importlib.util
|
|
17
|
+
import json
|
|
18
|
+
import os
|
|
19
|
+
from pathlib import Path
|
|
20
|
+
import re
|
|
21
|
+
import shlex
|
|
22
|
+
import stat
|
|
23
|
+
import sys
|
|
24
|
+
import time
|
|
25
|
+
from dataclasses import dataclass
|
|
26
|
+
from typing import Any
|
|
27
|
+
|
|
28
|
+
TOOL_NAME = "context-guard-pack"
|
|
29
|
+
VERSION = 1
|
|
30
|
+
DEFAULT_BUDGET_BYTES = 12_000
|
|
31
|
+
MIN_BUDGET_BYTES = 0
|
|
32
|
+
MAX_BUDGET_BYTES = 2_000_000
|
|
33
|
+
MAX_RECEIPT_BYTES = 64_000
|
|
34
|
+
MAX_MANIFEST_BYTES = 1_000_000
|
|
35
|
+
MAX_LABEL_CHARS = 160
|
|
36
|
+
MAX_REASON_CHARS = 120
|
|
37
|
+
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
38
|
+
PACK_DIR = ".context-guard/packs"
|
|
39
|
+
REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
|
|
40
|
+
SECRET_CONTENT_RE = re.compile(
|
|
41
|
+
r"(?is)("
|
|
42
|
+
r"-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----|"
|
|
43
|
+
r"AKIA[0-9A-Z]{16}|"
|
|
44
|
+
r"gh[pousr]_[A-Za-z0-9_]{20,}|"
|
|
45
|
+
r"github_pat_[A-Za-z0-9_]{20,}|"
|
|
46
|
+
r"xox[abprs]-[A-Za-z0-9-]{10,}|"
|
|
47
|
+
r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}|"
|
|
48
|
+
r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|"
|
|
49
|
+
r"AIza[0-9A-Za-z_\-]{20,}|"
|
|
50
|
+
r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
|
|
51
|
+
r"(?<![A-Za-z0-9])(?:api[_-]?key|token|secret|password|client[_-]?secret)\s*[:=]\s*[^\s]+"
|
|
52
|
+
r")"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass(frozen=True)
|
|
57
|
+
class LineRange:
|
|
58
|
+
start: int
|
|
59
|
+
end: int
|
|
60
|
+
|
|
61
|
+
def as_dict(self) -> dict[str, int]:
|
|
62
|
+
return {"start": self.start, "end": self.end}
|
|
63
|
+
|
|
64
|
+
def identity(self) -> str:
|
|
65
|
+
return f"{self.start}:{self.end}"
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
@dataclass
|
|
69
|
+
class SourceSpec:
|
|
70
|
+
path: str
|
|
71
|
+
priority: int = 0
|
|
72
|
+
lines: LineRange | None = None
|
|
73
|
+
label: str | None = None
|
|
74
|
+
input_index: int = 0
|
|
75
|
+
origin: str = "cli"
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class ResolvedSource:
|
|
80
|
+
spec: SourceSpec
|
|
81
|
+
abs_path: Path
|
|
82
|
+
display_path: str
|
|
83
|
+
redacted_path: bool
|
|
84
|
+
requested_lines: LineRange | None
|
|
85
|
+
selected_lines: list[str]
|
|
86
|
+
total_lines: int
|
|
87
|
+
redacted_lines: int
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
class PackError(ValueError):
|
|
91
|
+
pass
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class FallbackLineSanitizer:
|
|
95
|
+
def __init__(self, *, show_paths: bool = False) -> None:
|
|
96
|
+
self.show_paths = show_paths
|
|
97
|
+
self.redactions = 0
|
|
98
|
+
|
|
99
|
+
def sanitize(self, raw_line: str) -> tuple[str, bool]:
|
|
100
|
+
def repl(match: re.Match[str]) -> str:
|
|
101
|
+
text = match.group(0)
|
|
102
|
+
if "=" in text:
|
|
103
|
+
key = text.split("=", 1)[0]
|
|
104
|
+
return key + "=[REDACTED]"
|
|
105
|
+
if ":" in text and re.search(r"(?i)(api|token|secret|password|authorization)", text.split(":", 1)[0]):
|
|
106
|
+
key = text.split(":", 1)[0]
|
|
107
|
+
return key + ": [REDACTED]"
|
|
108
|
+
return "[REDACTED]"
|
|
109
|
+
|
|
110
|
+
line, count = SECRET_CONTENT_RE.subn(repl, raw_line)
|
|
111
|
+
if count:
|
|
112
|
+
self.redactions += 1
|
|
113
|
+
return line, bool(count)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_line_sanitizer(show_paths: bool = False) -> object:
|
|
117
|
+
script_dir = Path(__file__).resolve().parent
|
|
118
|
+
for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
|
|
119
|
+
candidate = script_dir / name
|
|
120
|
+
if not candidate.exists():
|
|
121
|
+
continue
|
|
122
|
+
try:
|
|
123
|
+
loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
|
|
124
|
+
spec = importlib.util.spec_from_loader(loader.name, loader)
|
|
125
|
+
if spec is None:
|
|
126
|
+
raise RuntimeError("import spec unavailable")
|
|
127
|
+
module = importlib.util.module_from_spec(spec)
|
|
128
|
+
loader.exec_module(module)
|
|
129
|
+
return module.LineSanitizer(show_paths=show_paths)
|
|
130
|
+
except Exception as exc:
|
|
131
|
+
raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
|
|
132
|
+
return FallbackLineSanitizer(show_paths=show_paths)
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
|
|
136
|
+
sanitizer = load_line_sanitizer(show_paths)
|
|
137
|
+
redacted = 0
|
|
138
|
+
out: list[str] = []
|
|
139
|
+
for line in text.splitlines(True):
|
|
140
|
+
sanitized, did_redact = sanitizer.sanitize(line) # type: ignore[attr-defined]
|
|
141
|
+
out.append(sanitized)
|
|
142
|
+
if did_redact:
|
|
143
|
+
redacted += 1
|
|
144
|
+
return "".join(out), redacted
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def byte_len(text: str) -> int:
|
|
148
|
+
return len(text.encode("utf-8", errors="replace"))
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def token_proxy(text: str) -> int:
|
|
152
|
+
if not text:
|
|
153
|
+
return 0
|
|
154
|
+
return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
def sha256_text(text: str) -> str:
|
|
158
|
+
return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def path_hash(path: Path) -> str:
|
|
162
|
+
return hashlib.sha256(str(path).encode("utf-8", "replace")).hexdigest()[:12]
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def sanitize_path_component(component: str) -> tuple[str, bool]:
|
|
166
|
+
if SECRET_CONTENT_RE.search(component):
|
|
167
|
+
return REDACTED_PATH_COMPONENT, True
|
|
168
|
+
return component, False
|
|
169
|
+
|
|
170
|
+
|
|
171
|
+
def display_root(root: Path) -> str:
|
|
172
|
+
name, redacted = sanitize_path_component(root.name or "project")
|
|
173
|
+
if redacted:
|
|
174
|
+
name = "project"
|
|
175
|
+
return f"{name}#path:{path_hash(root)}"
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def display_rel_path(rel: str) -> tuple[str, bool]:
|
|
179
|
+
parts: list[str] = []
|
|
180
|
+
redacted = False
|
|
181
|
+
for part in rel.replace("\\", "/").split("/"):
|
|
182
|
+
if not part:
|
|
183
|
+
continue
|
|
184
|
+
safe, did = sanitize_path_component(part)
|
|
185
|
+
parts.append(safe)
|
|
186
|
+
redacted = redacted or did
|
|
187
|
+
return "/".join(parts), redacted
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def parse_line_range(value: object) -> LineRange | None:
|
|
191
|
+
if value is None or value == "":
|
|
192
|
+
return None
|
|
193
|
+
if isinstance(value, dict):
|
|
194
|
+
try:
|
|
195
|
+
start = int(value.get("start"))
|
|
196
|
+
end = int(value.get("end"))
|
|
197
|
+
except (TypeError, ValueError):
|
|
198
|
+
raise PackError("invalid_lines")
|
|
199
|
+
elif isinstance(value, str):
|
|
200
|
+
if ":" not in value:
|
|
201
|
+
raise PackError("invalid_lines")
|
|
202
|
+
left, right = value.split(":", 1)
|
|
203
|
+
try:
|
|
204
|
+
start = int(left)
|
|
205
|
+
end = int(right)
|
|
206
|
+
except ValueError:
|
|
207
|
+
raise PackError("invalid_lines")
|
|
208
|
+
else:
|
|
209
|
+
raise PackError("invalid_lines")
|
|
210
|
+
if start < 1 or end < start:
|
|
211
|
+
raise PackError("invalid_lines")
|
|
212
|
+
return LineRange(start, end)
|
|
213
|
+
|
|
214
|
+
|
|
215
|
+
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
216
|
+
try:
|
|
217
|
+
number = int(value)
|
|
218
|
+
except (TypeError, ValueError, OverflowError):
|
|
219
|
+
return default
|
|
220
|
+
return min(max(number, minimum), maximum)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_CHARS) -> str | None:
|
|
224
|
+
if value is None:
|
|
225
|
+
return default
|
|
226
|
+
text = " ".join(str(value).strip().split())
|
|
227
|
+
text = SECRET_CONTENT_RE.sub("[REDACTED]", text)
|
|
228
|
+
if not text:
|
|
229
|
+
return default
|
|
230
|
+
if len(text) > limit:
|
|
231
|
+
text = text[: max(0, limit - 15)].rstrip() + " ...[truncated]"
|
|
232
|
+
return text
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def read_manifest(path: Path) -> list[SourceSpec]:
|
|
236
|
+
try:
|
|
237
|
+
raw = path.read_bytes()
|
|
238
|
+
except OSError as exc:
|
|
239
|
+
raise PackError(f"could not read manifest: {exc.strerror or exc.__class__.__name__}") from exc
|
|
240
|
+
if len(raw) > MAX_MANIFEST_BYTES:
|
|
241
|
+
raise PackError(f"manifest exceeds trusted size cap: {len(raw)} > {MAX_MANIFEST_BYTES}")
|
|
242
|
+
try:
|
|
243
|
+
data = json.loads(raw.decode("utf-8"))
|
|
244
|
+
except (UnicodeDecodeError, json.JSONDecodeError) as exc:
|
|
245
|
+
raise PackError(f"invalid manifest JSON: {exc}") from exc
|
|
246
|
+
version = data.get("version", VERSION) if isinstance(data, dict) else None
|
|
247
|
+
if version != VERSION:
|
|
248
|
+
raise PackError(f"unsupported manifest version: {version}")
|
|
249
|
+
sources = data.get("sources") if isinstance(data, dict) else None
|
|
250
|
+
if not isinstance(sources, list):
|
|
251
|
+
raise PackError("manifest sources must be a list")
|
|
252
|
+
out: list[SourceSpec] = []
|
|
253
|
+
for item in sources:
|
|
254
|
+
if not isinstance(item, dict):
|
|
255
|
+
raise PackError("manifest sources must be objects")
|
|
256
|
+
if "path" not in item:
|
|
257
|
+
raise PackError("manifest source missing path")
|
|
258
|
+
try:
|
|
259
|
+
lines = parse_line_range(item.get("lines"))
|
|
260
|
+
except PackError:
|
|
261
|
+
lines = LineRange(-1, -1)
|
|
262
|
+
out.append(SourceSpec(
|
|
263
|
+
path=str(item.get("path", "")),
|
|
264
|
+
priority=bounded_int(item.get("priority"), 0, -1_000_000, 1_000_000),
|
|
265
|
+
lines=lines,
|
|
266
|
+
label=cap_label(item.get("label")),
|
|
267
|
+
origin="manifest",
|
|
268
|
+
))
|
|
269
|
+
return out
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def parse_source_spec(raw: str) -> SourceSpec:
|
|
273
|
+
raw = raw.strip()
|
|
274
|
+
if not raw:
|
|
275
|
+
raise PackError("empty --source")
|
|
276
|
+
values: dict[str, str] = {}
|
|
277
|
+
if "=" not in raw.split(",", 1)[0]:
|
|
278
|
+
values["path"] = raw
|
|
279
|
+
else:
|
|
280
|
+
for part in raw.split(","):
|
|
281
|
+
if not part:
|
|
282
|
+
continue
|
|
283
|
+
if "=" not in part:
|
|
284
|
+
raise PackError(f"invalid --source part: {part}")
|
|
285
|
+
key, value = part.split("=", 1)
|
|
286
|
+
values[key.strip()] = value.strip()
|
|
287
|
+
if "path" not in values or not values["path"]:
|
|
288
|
+
raise PackError("--source missing path")
|
|
289
|
+
try:
|
|
290
|
+
lines = parse_line_range(values.get("lines"))
|
|
291
|
+
except PackError:
|
|
292
|
+
lines = LineRange(-1, -1)
|
|
293
|
+
return SourceSpec(
|
|
294
|
+
path=values["path"],
|
|
295
|
+
priority=bounded_int(values.get("priority"), 0, -1_000_000, 1_000_000),
|
|
296
|
+
lines=lines,
|
|
297
|
+
label=cap_label(values.get("label")),
|
|
298
|
+
origin="cli",
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def normalize_root(raw_root: Path) -> Path:
|
|
303
|
+
expanded = raw_root.expanduser()
|
|
304
|
+
try:
|
|
305
|
+
if expanded.is_symlink():
|
|
306
|
+
raise PackError("root must not be a symlink")
|
|
307
|
+
root = expanded.resolve()
|
|
308
|
+
except OSError as exc:
|
|
309
|
+
raise PackError(f"could not resolve root: {exc.strerror or exc.__class__.__name__}") from exc
|
|
310
|
+
if not root.is_dir():
|
|
311
|
+
raise PackError("root must be a directory")
|
|
312
|
+
return root
|
|
313
|
+
|
|
314
|
+
|
|
315
|
+
def omission(spec: SourceSpec, reason: str, *, path: str | None = None, redacted_path: bool = False) -> dict[str, Any]:
|
|
316
|
+
item: dict[str, Any] = {
|
|
317
|
+
"path": path if path is not None else safe_raw_path_label(spec.path),
|
|
318
|
+
"status": "omitted",
|
|
319
|
+
"priority": spec.priority,
|
|
320
|
+
"reason": reason,
|
|
321
|
+
"input_index": spec.input_index,
|
|
322
|
+
}
|
|
323
|
+
if spec.label:
|
|
324
|
+
item["label"] = spec.label
|
|
325
|
+
if spec.lines and spec.lines.start > 0:
|
|
326
|
+
item["requested_lines"] = spec.lines.as_dict()
|
|
327
|
+
if redacted_path:
|
|
328
|
+
item["retrieval_omitted_reason"] = "redacted_path"
|
|
329
|
+
return item
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def safe_raw_path_label(raw: str) -> str:
|
|
333
|
+
text = raw.replace("\\", "/")
|
|
334
|
+
parts = []
|
|
335
|
+
for part in text.split("/"):
|
|
336
|
+
if part in {"", "."}:
|
|
337
|
+
continue
|
|
338
|
+
safe, _ = sanitize_path_component(part)
|
|
339
|
+
parts.append(safe)
|
|
340
|
+
return "/".join(parts) or "path"
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def lexical_rel(raw_path: str) -> tuple[Path | None, str]:
|
|
344
|
+
path = Path(raw_path)
|
|
345
|
+
if path.is_absolute():
|
|
346
|
+
return None, "outside_root"
|
|
347
|
+
parts = path.parts
|
|
348
|
+
if not parts or any(part in {"..", ""} for part in parts):
|
|
349
|
+
return None, "outside_root"
|
|
350
|
+
cleaned = Path(*[part for part in parts if part != "."])
|
|
351
|
+
if not cleaned.parts:
|
|
352
|
+
return None, "outside_root"
|
|
353
|
+
return cleaned, ""
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
|
|
357
|
+
flags = os.O_RDONLY
|
|
358
|
+
if hasattr(os, "O_DIRECTORY"):
|
|
359
|
+
flags |= os.O_DIRECTORY
|
|
360
|
+
if hasattr(os, "O_NOFOLLOW"):
|
|
361
|
+
flags |= os.O_NOFOLLOW
|
|
362
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
363
|
+
flags |= os.O_CLOEXEC
|
|
364
|
+
if dir_fd is None:
|
|
365
|
+
fd = os.open(path, flags)
|
|
366
|
+
else:
|
|
367
|
+
fd = os.open(path, flags, dir_fd=dir_fd)
|
|
368
|
+
try:
|
|
369
|
+
st = os.fstat(fd)
|
|
370
|
+
if not stat.S_ISDIR(st.st_mode):
|
|
371
|
+
raise PackError("not a directory")
|
|
372
|
+
return fd
|
|
373
|
+
except Exception:
|
|
374
|
+
os.close(fd)
|
|
375
|
+
raise
|
|
376
|
+
|
|
377
|
+
|
|
378
|
+
def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
|
|
379
|
+
current_fd: int | None = None
|
|
380
|
+
try:
|
|
381
|
+
current_fd = open_dir_no_follow(root)
|
|
382
|
+
for index, part in enumerate(rel.parts):
|
|
383
|
+
if part in {"", ".", ".."}:
|
|
384
|
+
return None, "outside_root"
|
|
385
|
+
is_final = index == len(rel.parts) - 1
|
|
386
|
+
if not is_final:
|
|
387
|
+
try:
|
|
388
|
+
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
389
|
+
except FileNotFoundError:
|
|
390
|
+
return None, "missing"
|
|
391
|
+
except NotADirectoryError:
|
|
392
|
+
return None, "missing"
|
|
393
|
+
except OSError:
|
|
394
|
+
return None, "unsafe_path"
|
|
395
|
+
os.close(current_fd)
|
|
396
|
+
current_fd = next_fd
|
|
397
|
+
continue
|
|
398
|
+
flags = os.O_RDONLY
|
|
399
|
+
if hasattr(os, "O_NOFOLLOW"):
|
|
400
|
+
flags |= os.O_NOFOLLOW
|
|
401
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
402
|
+
flags |= os.O_CLOEXEC
|
|
403
|
+
file_fd = -1
|
|
404
|
+
try:
|
|
405
|
+
file_fd = os.open(part, flags, dir_fd=current_fd)
|
|
406
|
+
st = os.fstat(file_fd)
|
|
407
|
+
if not stat.S_ISREG(st.st_mode):
|
|
408
|
+
os.close(file_fd)
|
|
409
|
+
file_fd = -1
|
|
410
|
+
return None, "empty_source"
|
|
411
|
+
handle = os.fdopen(file_fd, "r", encoding="utf-8", errors="replace", newline="")
|
|
412
|
+
file_fd = -1
|
|
413
|
+
return handle, ""
|
|
414
|
+
except FileNotFoundError:
|
|
415
|
+
return None, "missing"
|
|
416
|
+
except IsADirectoryError:
|
|
417
|
+
return None, "empty_source"
|
|
418
|
+
except NotADirectoryError:
|
|
419
|
+
return None, "missing"
|
|
420
|
+
except OSError:
|
|
421
|
+
return None, "unsafe_path"
|
|
422
|
+
finally:
|
|
423
|
+
if file_fd >= 0:
|
|
424
|
+
try:
|
|
425
|
+
os.close(file_fd)
|
|
426
|
+
except OSError:
|
|
427
|
+
pass
|
|
428
|
+
except OSError:
|
|
429
|
+
return None, "unsafe_path"
|
|
430
|
+
finally:
|
|
431
|
+
if current_fd is not None:
|
|
432
|
+
try:
|
|
433
|
+
os.close(current_fd)
|
|
434
|
+
except OSError:
|
|
435
|
+
pass
|
|
436
|
+
return None, "unsafe_path"
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
def resolve_source(root: Path, spec: SourceSpec) -> tuple[ResolvedSource | None, dict[str, Any] | None]:
|
|
440
|
+
if spec.lines is not None and spec.lines.start < 1:
|
|
441
|
+
return None, omission(spec, "invalid_lines")
|
|
442
|
+
rel, reason = lexical_rel(spec.path)
|
|
443
|
+
if rel is None:
|
|
444
|
+
return None, omission(spec, reason)
|
|
445
|
+
display, redacted_path = display_rel_path(rel.as_posix())
|
|
446
|
+
handle, reason = open_regular_under_root(root, rel)
|
|
447
|
+
if handle is None:
|
|
448
|
+
return None, omission(spec, reason, path=display, redacted_path=redacted_path)
|
|
449
|
+
try:
|
|
450
|
+
with handle:
|
|
451
|
+
raw_text = handle.read()
|
|
452
|
+
except OSError:
|
|
453
|
+
return None, omission(spec, "unsafe_path", path=display, redacted_path=redacted_path)
|
|
454
|
+
sanitized, redacted_lines = sanitize_text(raw_text)
|
|
455
|
+
all_lines = sanitized.splitlines(True)
|
|
456
|
+
if not all_lines:
|
|
457
|
+
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
458
|
+
total_lines = len(all_lines)
|
|
459
|
+
requested = spec.lines or LineRange(1, total_lines)
|
|
460
|
+
if requested.start > total_lines:
|
|
461
|
+
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
462
|
+
end = min(requested.end, total_lines)
|
|
463
|
+
selected = all_lines[requested.start - 1:end]
|
|
464
|
+
if not selected:
|
|
465
|
+
return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
|
|
466
|
+
return ResolvedSource(
|
|
467
|
+
spec=spec,
|
|
468
|
+
abs_path=root / rel,
|
|
469
|
+
display_path=display,
|
|
470
|
+
redacted_path=redacted_path,
|
|
471
|
+
requested_lines=requested,
|
|
472
|
+
selected_lines=selected,
|
|
473
|
+
total_lines=total_lines,
|
|
474
|
+
redacted_lines=redacted_lines,
|
|
475
|
+
), None
|
|
476
|
+
|
|
477
|
+
|
|
478
|
+
def retrieval_cli(root_arg: str, display_path: str, lines: LineRange) -> str:
|
|
479
|
+
return (
|
|
480
|
+
f"context-guard-pack slice --root {shlex.quote(root_arg)} "
|
|
481
|
+
f"--path {shlex.quote(display_path)} --lines {lines.start}:{lines.end} --json"
|
|
482
|
+
)
|
|
483
|
+
|
|
484
|
+
|
|
485
|
+
def safe_root_arg_for_retrieval(root_arg: str) -> str | None:
|
|
486
|
+
text = str(root_arg)
|
|
487
|
+
if SECRET_CONTENT_RE.search(text):
|
|
488
|
+
return None
|
|
489
|
+
for part in text.replace("\\", "/").split("/"):
|
|
490
|
+
if not part:
|
|
491
|
+
continue
|
|
492
|
+
_safe, redacted = sanitize_path_component(part)
|
|
493
|
+
if redacted:
|
|
494
|
+
return None
|
|
495
|
+
return text
|
|
496
|
+
|
|
497
|
+
|
|
498
|
+
def retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacted_path: bool) -> tuple[str | None, str | None]:
|
|
499
|
+
if redacted_path:
|
|
500
|
+
return None, "redacted_path"
|
|
501
|
+
safe_root = safe_root_arg_for_retrieval(root_arg)
|
|
502
|
+
if safe_root is None:
|
|
503
|
+
return None, "unsafe_root_path"
|
|
504
|
+
return retrieval_cli(safe_root, display_path, lines), None
|
|
505
|
+
|
|
506
|
+
|
|
507
|
+
def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, status: str, included: LineRange) -> str:
|
|
508
|
+
title = source.spec.label or source.display_path
|
|
509
|
+
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
510
|
+
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
511
|
+
header = [
|
|
512
|
+
f"## {title}",
|
|
513
|
+
f"Source: `{source.display_path}`",
|
|
514
|
+
f"Priority: {source.spec.priority}",
|
|
515
|
+
f"Status: {status}",
|
|
516
|
+
f"Included lines: {included.start}:{included.end}",
|
|
517
|
+
f"Requested lines: {requested.start}:{requested.end}",
|
|
518
|
+
]
|
|
519
|
+
if retrieval:
|
|
520
|
+
header.append(f"Retrieval: `{retrieval}`")
|
|
521
|
+
elif retrieval_omitted_reason:
|
|
522
|
+
header.append(f"Retrieval omitted: {retrieval_omitted_reason}")
|
|
523
|
+
return "\n".join(header) + "\n\n```text\n" + "".join(lines) + ("" if not lines or lines[-1].endswith("\n") else "\n") + "```\n\n"
|
|
524
|
+
|
|
525
|
+
|
|
526
|
+
def source_metadata(source: ResolvedSource, *, status: str, lines: list[str], included: LineRange, root_arg: str) -> dict[str, Any]:
|
|
527
|
+
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
528
|
+
item: dict[str, Any] = {
|
|
529
|
+
"path": source.display_path,
|
|
530
|
+
"status": status,
|
|
531
|
+
"priority": source.spec.priority,
|
|
532
|
+
"input_index": source.spec.input_index,
|
|
533
|
+
"requested_lines": requested.as_dict(),
|
|
534
|
+
"included_lines": included.as_dict(),
|
|
535
|
+
"bytes": byte_len("".join(lines)),
|
|
536
|
+
}
|
|
537
|
+
if source.spec.label:
|
|
538
|
+
item["label"] = source.spec.label
|
|
539
|
+
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
|
|
540
|
+
if retrieval:
|
|
541
|
+
item["retrieval_cli"] = retrieval
|
|
542
|
+
elif retrieval_omitted_reason:
|
|
543
|
+
item["retrieval_omitted_reason"] = retrieval_omitted_reason
|
|
544
|
+
if status == "partial":
|
|
545
|
+
item["reason"] = "budget_exhausted"
|
|
546
|
+
return item
|
|
547
|
+
|
|
548
|
+
|
|
549
|
+
def budget_omission(source: ResolvedSource, *, root_arg: str) -> dict[str, Any]:
|
|
550
|
+
requested = source.requested_lines or LineRange(1, source.total_lines)
|
|
551
|
+
item = omission(source.spec, "budget_exhausted", path=source.display_path, redacted_path=source.redacted_path)
|
|
552
|
+
item["requested_lines"] = requested.as_dict()
|
|
553
|
+
item["total_lines"] = source.total_lines
|
|
554
|
+
retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, requested, redacted_path=source.redacted_path)
|
|
555
|
+
if retrieval:
|
|
556
|
+
item["retrieval_cli"] = retrieval
|
|
557
|
+
item.pop("retrieval_omitted_reason", None)
|
|
558
|
+
elif retrieval_omitted_reason:
|
|
559
|
+
item["retrieval_omitted_reason"] = retrieval_omitted_reason
|
|
560
|
+
return item
|
|
561
|
+
|
|
562
|
+
|
|
563
|
+
def fit_partial_lines(source: ResolvedSource, remaining: int, *, root_arg: str) -> tuple[list[str], str | None, LineRange | None]:
|
|
564
|
+
if remaining <= 0:
|
|
565
|
+
return [], None, None
|
|
566
|
+
picked: list[str] = []
|
|
567
|
+
for line in source.selected_lines:
|
|
568
|
+
candidate = picked + [line]
|
|
569
|
+
included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(candidate) - 1)
|
|
570
|
+
block = render_block(source, candidate, root_arg=root_arg, status="partial", included=included)
|
|
571
|
+
if byte_len(block) <= remaining:
|
|
572
|
+
picked = candidate
|
|
573
|
+
else:
|
|
574
|
+
break
|
|
575
|
+
if not picked:
|
|
576
|
+
return [], None, None
|
|
577
|
+
included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(picked) - 1)
|
|
578
|
+
return picked, render_block(source, picked, root_arg=root_arg, status="partial", included=included), included
|
|
579
|
+
|
|
580
|
+
|
|
581
|
+
def metadata_size(data: dict[str, Any]) -> int:
|
|
582
|
+
return len(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True).encode("utf-8", errors="replace")) + 1
|
|
583
|
+
|
|
584
|
+
|
|
585
|
+
def artifact_failure(error: str, *, bytes_count: int = 0, capped: bool = False) -> dict[str, Any]:
|
|
586
|
+
return {
|
|
587
|
+
"stored": False,
|
|
588
|
+
"path": None,
|
|
589
|
+
"bytes": bytes_count,
|
|
590
|
+
"capped": capped,
|
|
591
|
+
"error": error,
|
|
592
|
+
"cap_bytes": MAX_RECEIPT_BYTES,
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
|
|
596
|
+
def ensure_private_pack_dir(root: Path) -> tuple[Path | None, int | None, str | None]:
|
|
597
|
+
"""Create/verify the receipt directory by walking from a no-follow root fd."""
|
|
598
|
+
current_fd: int | None = None
|
|
599
|
+
try:
|
|
600
|
+
current_fd = open_dir_no_follow(root)
|
|
601
|
+
for part in (".context-guard", "packs"):
|
|
602
|
+
while True:
|
|
603
|
+
try:
|
|
604
|
+
next_fd = open_dir_no_follow(part, dir_fd=current_fd)
|
|
605
|
+
break
|
|
606
|
+
except FileNotFoundError:
|
|
607
|
+
try:
|
|
608
|
+
os.mkdir(part, 0o700, dir_fd=current_fd)
|
|
609
|
+
except FileExistsError:
|
|
610
|
+
continue
|
|
611
|
+
except (OSError, NotImplementedError):
|
|
612
|
+
return None, None, "artifact_dir_unavailable"
|
|
613
|
+
except NotADirectoryError:
|
|
614
|
+
return None, None, "unsafe_artifact_dir"
|
|
615
|
+
except (OSError, NotImplementedError):
|
|
616
|
+
return None, None, "unsafe_artifact_dir"
|
|
617
|
+
try:
|
|
618
|
+
os.fchmod(next_fd, 0o700)
|
|
619
|
+
except (AttributeError, OSError):
|
|
620
|
+
pass
|
|
621
|
+
os.close(current_fd)
|
|
622
|
+
current_fd = next_fd
|
|
623
|
+
dir_fd = current_fd
|
|
624
|
+
current_fd = None
|
|
625
|
+
return root / PACK_DIR, dir_fd, None
|
|
626
|
+
except OSError:
|
|
627
|
+
return None, None, "unsafe_artifact_dir"
|
|
628
|
+
finally:
|
|
629
|
+
if current_fd is not None:
|
|
630
|
+
try:
|
|
631
|
+
os.close(current_fd)
|
|
632
|
+
except OSError:
|
|
633
|
+
pass
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def write_private_json_at(dir_fd: int, filename: str, data: dict[str, Any]) -> None:
|
|
637
|
+
if "/" in filename or filename in {"", ".", ".."}:
|
|
638
|
+
raise PackError("unsafe_artifact_path")
|
|
639
|
+
flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
|
|
640
|
+
if hasattr(os, "O_NOFOLLOW"):
|
|
641
|
+
flags |= os.O_NOFOLLOW
|
|
642
|
+
if hasattr(os, "O_CLOEXEC"):
|
|
643
|
+
flags |= os.O_CLOEXEC
|
|
644
|
+
fd = os.open(filename, flags, 0o600, dir_fd=dir_fd)
|
|
645
|
+
try:
|
|
646
|
+
with os.fdopen(fd, "w", encoding="utf-8") as handle:
|
|
647
|
+
json.dump(data, handle, ensure_ascii=False, indent=2, sort_keys=True)
|
|
648
|
+
handle.write("\n")
|
|
649
|
+
except Exception:
|
|
650
|
+
try:
|
|
651
|
+
os.close(fd)
|
|
652
|
+
except OSError:
|
|
653
|
+
pass
|
|
654
|
+
raise
|
|
655
|
+
try:
|
|
656
|
+
os.chmod(filename, 0o600, dir_fd=dir_fd, follow_symlinks=False)
|
|
657
|
+
except (OSError, TypeError, NotImplementedError):
|
|
658
|
+
pass
|
|
659
|
+
|
|
660
|
+
|
|
661
|
+
def finalize_receipt_size(receipt: dict[str, Any]) -> int:
|
|
662
|
+
artifact = receipt.setdefault("artifact", {})
|
|
663
|
+
size = metadata_size(receipt)
|
|
664
|
+
for _ in range(4):
|
|
665
|
+
artifact["bytes"] = size
|
|
666
|
+
next_size = metadata_size(receipt)
|
|
667
|
+
if next_size == size:
|
|
668
|
+
return size
|
|
669
|
+
size = next_size
|
|
670
|
+
artifact["bytes"] = size
|
|
671
|
+
return metadata_size(receipt)
|
|
672
|
+
|
|
673
|
+
|
|
674
|
+
def shrink_receipt_for_write(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
|
|
675
|
+
receipt = copy.deepcopy(data)
|
|
676
|
+
capped = False
|
|
677
|
+
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
678
|
+
return receipt, capped
|
|
679
|
+
capped = True
|
|
680
|
+
receipt.setdefault("artifact", {})["capped"] = True
|
|
681
|
+
receipt.setdefault("artifact", {})["cap_bytes"] = MAX_RECEIPT_BYTES
|
|
682
|
+
for item in receipt.get("omitted_sources", []):
|
|
683
|
+
if isinstance(item, dict):
|
|
684
|
+
item.pop("preview", None)
|
|
685
|
+
if "label" in item:
|
|
686
|
+
item["label"] = cap_label(item.get("label"), limit=80)
|
|
687
|
+
if "reason" in item:
|
|
688
|
+
item["reason"] = cap_label(item.get("reason"), default=str(item.get("reason")), limit=MAX_REASON_CHARS)
|
|
689
|
+
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
690
|
+
return receipt, capped
|
|
691
|
+
for item in receipt.get("included_sources", []):
|
|
692
|
+
if isinstance(item, dict):
|
|
693
|
+
item.pop("preview", None)
|
|
694
|
+
if "label" in item:
|
|
695
|
+
item["label"] = cap_label(item.get("label"), limit=80)
|
|
696
|
+
if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
|
|
697
|
+
return receipt, capped
|
|
698
|
+
# The stdout payload remains authoritative for the full pack body. Receipts may omit it to stay readable.
|
|
699
|
+
receipt["pack_omitted_from_receipt"] = True
|
|
700
|
+
receipt.pop("pack", None)
|
|
701
|
+
return receipt, capped
|
|
702
|
+
|
|
703
|
+
|
|
704
|
+
def store_receipt(root: Path, result: dict[str, Any]) -> dict[str, Any]:
|
|
705
|
+
out_dir, dir_fd, dir_error = ensure_private_pack_dir(root)
|
|
706
|
+
if out_dir is None or dir_fd is None:
|
|
707
|
+
return artifact_failure(dir_error or "unsafe_artifact_dir")
|
|
708
|
+
size = 0
|
|
709
|
+
capped = False
|
|
710
|
+
try:
|
|
711
|
+
receipt, capped = shrink_receipt_for_write(result)
|
|
712
|
+
size = metadata_size(receipt)
|
|
713
|
+
if size > MAX_RECEIPT_BYTES:
|
|
714
|
+
return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
|
|
715
|
+
pack_id = str(result["pack_id"])
|
|
716
|
+
filename = f"{pack_id}.json"
|
|
717
|
+
receipt.setdefault("artifact", {})["stored"] = True
|
|
718
|
+
receipt.setdefault("artifact", {})["path"] = f"{PACK_DIR}/{pack_id}.json"
|
|
719
|
+
receipt.setdefault("artifact", {})["capped"] = capped
|
|
720
|
+
size = finalize_receipt_size(receipt)
|
|
721
|
+
if size > MAX_RECEIPT_BYTES:
|
|
722
|
+
return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
|
|
723
|
+
write_private_json_at(dir_fd, filename, receipt)
|
|
724
|
+
except (OSError, PackError, NotImplementedError):
|
|
725
|
+
return artifact_failure("artifact_write_failed", bytes_count=size, capped=capped)
|
|
726
|
+
finally:
|
|
727
|
+
try:
|
|
728
|
+
os.close(dir_fd)
|
|
729
|
+
except OSError:
|
|
730
|
+
pass
|
|
731
|
+
return {
|
|
732
|
+
"stored": True,
|
|
733
|
+
"path": f"{PACK_DIR}/{pack_id}.json",
|
|
734
|
+
"bytes": size,
|
|
735
|
+
"capped": capped,
|
|
736
|
+
"cap_bytes": MAX_RECEIPT_BYTES,
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
|
|
740
|
+
def build_pack(root: Path, specs: list[SourceSpec], *, budget_bytes: int, root_arg: str, store_artifact: bool) -> dict[str, Any]:
|
|
741
|
+
seen: set[tuple[str, str]] = set()
|
|
742
|
+
resolved: list[ResolvedSource] = []
|
|
743
|
+
omitted: list[dict[str, Any]] = []
|
|
744
|
+
canonical_specs: list[dict[str, Any]] = []
|
|
745
|
+
for spec in specs:
|
|
746
|
+
rel, reason = lexical_rel(spec.path)
|
|
747
|
+
if spec.lines is not None and spec.lines.start < 1:
|
|
748
|
+
omitted_item = omission(spec, "invalid_lines")
|
|
749
|
+
omitted.append(omitted_item)
|
|
750
|
+
canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": "invalid", "status": "invalid_lines"})
|
|
751
|
+
continue
|
|
752
|
+
if rel is not None and spec.lines is not None and spec.lines.start > 0:
|
|
753
|
+
identity_lines = spec.lines.identity()
|
|
754
|
+
elif rel is not None:
|
|
755
|
+
identity_lines = "all"
|
|
756
|
+
else:
|
|
757
|
+
identity_lines = "invalid"
|
|
758
|
+
identity = (rel.as_posix() if rel is not None else spec.path, identity_lines)
|
|
759
|
+
if rel is not None and identity in seen:
|
|
760
|
+
display, redacted = display_rel_path(rel.as_posix())
|
|
761
|
+
omitted.append(omission(spec, "duplicate_source", path=display, redacted_path=redacted))
|
|
762
|
+
canonical_specs.append({"path": display, "priority": spec.priority, "lines": identity_lines, "status": "duplicate_source"})
|
|
763
|
+
continue
|
|
764
|
+
if rel is not None:
|
|
765
|
+
seen.add(identity)
|
|
766
|
+
source, omitted_item = resolve_source(root, spec)
|
|
767
|
+
if omitted_item is not None:
|
|
768
|
+
omitted.append(omitted_item)
|
|
769
|
+
canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": identity_lines, "status": omitted_item.get("reason")})
|
|
770
|
+
continue
|
|
771
|
+
assert source is not None
|
|
772
|
+
resolved.append(source)
|
|
773
|
+
canonical_specs.append({"path": source.display_path, "priority": spec.priority, "lines": identity_lines, "status": "candidate"})
|
|
774
|
+
resolved.sort(key=lambda item: (-item.spec.priority, item.spec.input_index, item.display_path))
|
|
775
|
+
header = "# Context Pack\n\nGenerated by context-guard-pack. Token counts are estimated proxies; byte counts are observed.\n\n"
|
|
776
|
+
parts: list[str] = []
|
|
777
|
+
included: list[dict[str, Any]] = []
|
|
778
|
+
current_pack_bytes = 0
|
|
779
|
+
header_bytes = byte_len(header)
|
|
780
|
+
if header_bytes <= budget_bytes:
|
|
781
|
+
parts.append(header)
|
|
782
|
+
current_pack_bytes += header_bytes
|
|
783
|
+
for source in resolved:
|
|
784
|
+
start_line = source.requested_lines.start if source.requested_lines else 1
|
|
785
|
+
included_range = LineRange(start_line, start_line + len(source.selected_lines) - 1)
|
|
786
|
+
full_block = render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included_range)
|
|
787
|
+
full_block_bytes = byte_len(full_block)
|
|
788
|
+
remaining = budget_bytes - current_pack_bytes
|
|
789
|
+
if full_block_bytes <= remaining:
|
|
790
|
+
parts.append(full_block)
|
|
791
|
+
current_pack_bytes += full_block_bytes
|
|
792
|
+
included.append(source_metadata(source, status="included", lines=source.selected_lines, included=included_range, root_arg=root_arg))
|
|
793
|
+
continue
|
|
794
|
+
partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
|
|
795
|
+
if partial_block is not None and partial_range is not None:
|
|
796
|
+
parts.append(partial_block)
|
|
797
|
+
current_pack_bytes += byte_len(partial_block)
|
|
798
|
+
included.append(source_metadata(source, status="partial", lines=partial_lines, included=partial_range, root_arg=root_arg))
|
|
799
|
+
else:
|
|
800
|
+
omitted.append(budget_omission(source, root_arg=root_arg))
|
|
801
|
+
pack = "".join(parts)
|
|
802
|
+
pack_bytes = current_pack_bytes
|
|
803
|
+
redacted_lines = sum(source.redacted_lines for source in resolved)
|
|
804
|
+
partial_count = sum(1 for item in included if item.get("status") == "partial")
|
|
805
|
+
omitted_sorted = sorted(omitted, key=lambda item: (item.get("input_index", 0), str(item.get("path", "")), str(item.get("reason", ""))))
|
|
806
|
+
canonical = {
|
|
807
|
+
"version": VERSION,
|
|
808
|
+
"root": display_root(root),
|
|
809
|
+
"budget_bytes": budget_bytes,
|
|
810
|
+
"sources": canonical_specs,
|
|
811
|
+
"pack_sha256": sha256_text(pack),
|
|
812
|
+
"omission_summary": sorted({str(item.get("reason")) for item in omitted_sorted}),
|
|
813
|
+
}
|
|
814
|
+
pack_id = hashlib.sha256(json.dumps(canonical, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")).hexdigest()[:20]
|
|
815
|
+
result: dict[str, Any] = {
|
|
816
|
+
"tool": TOOL_NAME,
|
|
817
|
+
"version": VERSION,
|
|
818
|
+
"pack_id": pack_id,
|
|
819
|
+
"root": display_root(root),
|
|
820
|
+
"budget_bytes": budget_bytes,
|
|
821
|
+
"pack_bytes": pack_bytes,
|
|
822
|
+
"pack": pack,
|
|
823
|
+
"token_proxy": {"measurement": "estimated", "method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}", "pack": token_proxy(pack)},
|
|
824
|
+
"sources": {"total": len(specs), "included": len(included) - partial_count, "partial": partial_count, "omitted": len(omitted_sorted)},
|
|
825
|
+
"included_sources": included,
|
|
826
|
+
"omitted_sources": omitted_sorted,
|
|
827
|
+
"redaction": {"redacted_lines": redacted_lines, "redacted_before_pack": True},
|
|
828
|
+
"artifact": {"stored": False, "path": None, "bytes": 0, "capped": False, "cap_bytes": MAX_RECEIPT_BYTES},
|
|
829
|
+
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
|
|
830
|
+
}
|
|
831
|
+
if store_artifact:
|
|
832
|
+
artifact = store_receipt(root, result)
|
|
833
|
+
result["artifact"] = artifact
|
|
834
|
+
return result
|
|
835
|
+
|
|
836
|
+
|
|
837
|
+
def parse_all_sources(args: argparse.Namespace) -> list[SourceSpec]:
|
|
838
|
+
specs: list[SourceSpec] = []
|
|
839
|
+
if args.manifest:
|
|
840
|
+
specs.extend(read_manifest(Path(args.manifest)))
|
|
841
|
+
for raw in args.source or []:
|
|
842
|
+
specs.append(parse_source_spec(raw))
|
|
843
|
+
for index, spec in enumerate(specs):
|
|
844
|
+
spec.input_index = index
|
|
845
|
+
return specs
|
|
846
|
+
|
|
847
|
+
|
|
848
|
+
def slice_source(root: Path, *, raw_path: str, lines: LineRange) -> tuple[dict[str, Any], int]:
|
|
849
|
+
spec = SourceSpec(path=raw_path, lines=lines)
|
|
850
|
+
source, omitted_item = resolve_source(root, spec)
|
|
851
|
+
if omitted_item is not None:
|
|
852
|
+
payload = {"tool": TOOL_NAME, "status": "error", "reason": omitted_item.get("reason"), "path": omitted_item.get("path")}
|
|
853
|
+
return payload, 1
|
|
854
|
+
assert source is not None
|
|
855
|
+
content = "".join(source.selected_lines)
|
|
856
|
+
payload = {
|
|
857
|
+
"tool": TOOL_NAME,
|
|
858
|
+
"version": VERSION,
|
|
859
|
+
"status": "ok",
|
|
860
|
+
"path": source.display_path,
|
|
861
|
+
"query": {"type": "lines", "start": lines.start, "end": min(lines.end, source.total_lines), "returned_lines": len(source.selected_lines)},
|
|
862
|
+
"content": content,
|
|
863
|
+
"bytes": byte_len(content),
|
|
864
|
+
"redaction": {"redacted_lines": source.redacted_lines, "redacted_before_pack": True},
|
|
865
|
+
}
|
|
866
|
+
return payload, 0
|
|
867
|
+
|
|
868
|
+
|
|
869
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
870
|
+
parser = argparse.ArgumentParser(description="Build budgeted local context packs with exact retrieval hints.")
|
|
871
|
+
sub = parser.add_subparsers(dest="command", required=True)
|
|
872
|
+
build = sub.add_parser("build", help="assemble a prioritized context pack")
|
|
873
|
+
build.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
874
|
+
build.add_argument("--manifest", help="JSON manifest with version/sources")
|
|
875
|
+
build.add_argument("--source", action="append", help="source spec: path=REL[,priority=N][,lines=A:B][,label=TEXT]")
|
|
876
|
+
build.add_argument("--budget-bytes", type=int, default=DEFAULT_BUDGET_BYTES)
|
|
877
|
+
build.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
878
|
+
build.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
|
|
879
|
+
slice_cmd = sub.add_parser("slice", help="retrieve an exact sanitized file slice")
|
|
880
|
+
slice_cmd.add_argument("--root", default=".", help="project root; must not be a symlink")
|
|
881
|
+
slice_cmd.add_argument("--path", required=True, help="relative file path under root")
|
|
882
|
+
slice_cmd.add_argument("--lines", required=True, help="inclusive 1-indexed START:END")
|
|
883
|
+
slice_cmd.add_argument("--json", action="store_true", help="emit JSON payload")
|
|
884
|
+
return parser
|
|
885
|
+
|
|
886
|
+
|
|
887
|
+
def main(argv: list[str] | None = None) -> int:
|
|
888
|
+
parser = build_parser()
|
|
889
|
+
args = parser.parse_args(argv)
|
|
890
|
+
try:
|
|
891
|
+
root = normalize_root(Path(args.root))
|
|
892
|
+
if args.command == "build":
|
|
893
|
+
specs = parse_all_sources(args)
|
|
894
|
+
if not specs:
|
|
895
|
+
raise PackError("provide --manifest or --source")
|
|
896
|
+
budget = bounded_int(args.budget_bytes, DEFAULT_BUDGET_BYTES, MIN_BUDGET_BYTES, MAX_BUDGET_BYTES)
|
|
897
|
+
result = build_pack(root, specs, budget_bytes=budget, root_arg=str(args.root), store_artifact=not args.no_artifact)
|
|
898
|
+
if args.json:
|
|
899
|
+
json.dump(result, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
900
|
+
sys.stdout.write("\n")
|
|
901
|
+
else:
|
|
902
|
+
sys.stdout.write(str(result["pack"]))
|
|
903
|
+
print(
|
|
904
|
+
f"[context-guard-pack] pack_id={result['pack_id']} bytes={result['pack_bytes']}/{result['budget_bytes']} "
|
|
905
|
+
f"included={result['sources']['included']} partial={result['sources']['partial']} omitted={result['sources']['omitted']}",
|
|
906
|
+
file=sys.stderr,
|
|
907
|
+
)
|
|
908
|
+
return 0
|
|
909
|
+
if args.command == "slice":
|
|
910
|
+
lines = parse_line_range(args.lines)
|
|
911
|
+
if lines is None:
|
|
912
|
+
raise PackError("invalid_lines")
|
|
913
|
+
payload, rc = slice_source(root, raw_path=args.path, lines=lines)
|
|
914
|
+
if args.json:
|
|
915
|
+
json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
|
|
916
|
+
sys.stdout.write("\n")
|
|
917
|
+
elif rc == 0:
|
|
918
|
+
sys.stdout.write(str(payload.get("content", "")))
|
|
919
|
+
else:
|
|
920
|
+
print(f"context-guard-pack: {payload.get('reason')}", file=sys.stderr)
|
|
921
|
+
return rc
|
|
922
|
+
raise PackError("unknown command")
|
|
923
|
+
except PackError as exc:
|
|
924
|
+
print(f"context-guard-pack: {exc}", file=sys.stderr)
|
|
925
|
+
return 2
|
|
926
|
+
|
|
927
|
+
|
|
928
|
+
if __name__ == "__main__":
|
|
929
|
+
raise SystemExit(main())
|