@ictechgy/context-guard 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/CHANGELOG.md +49 -0
  2. package/LICENSE +201 -0
  3. package/NOTICE +4 -0
  4. package/README.ko.md +353 -0
  5. package/README.md +353 -0
  6. package/context-guard-kit/README.md +76 -0
  7. package/context-guard-kit/benchmark_runner.py +1898 -0
  8. package/context-guard-kit/claude_transcript_cost_audit.py +1591 -0
  9. package/context-guard-kit/context_compress.py +543 -0
  10. package/context-guard-kit/context_escrow.py +919 -0
  11. package/context-guard-kit/context_guard_cli.py +149 -0
  12. package/context-guard-kit/context_guard_diet.py +1036 -0
  13. package/context-guard-kit/context_pack.py +929 -0
  14. package/context-guard-kit/failed_attempt_nudge.py +567 -0
  15. package/context-guard-kit/guard_large_read.py +690 -0
  16. package/context-guard-kit/hook_secret_patterns.py +43 -0
  17. package/context-guard-kit/read_symbol.py +483 -0
  18. package/context-guard-kit/rewrite_bash_for_token_budget.py +501 -0
  19. package/context-guard-kit/sanitize_output.py +725 -0
  20. package/context-guard-kit/settings.example.json +67 -0
  21. package/context-guard-kit/setup_wizard.py +1724 -0
  22. package/context-guard-kit/statusline.sh +362 -0
  23. package/context-guard-kit/statusline_merged.sh +157 -0
  24. package/context-guard-kit/tool_schema_pruner.py +837 -0
  25. package/context-guard-kit/trim_command_output.py +1098 -0
  26. package/docs/distribution.md +55 -0
  27. package/package.json +70 -0
  28. package/packaging/homebrew/context-guard.rb.template +34 -0
  29. package/plugins/context-guard/.claude-plugin/plugin.json +41 -0
  30. package/plugins/context-guard/LICENSE +201 -0
  31. package/plugins/context-guard/NOTICE +4 -0
  32. package/plugins/context-guard/README.ko.md +135 -0
  33. package/plugins/context-guard/README.md +135 -0
  34. package/plugins/context-guard/bin/claude-read-symbol +6 -0
  35. package/plugins/context-guard/bin/claude-sanitize-output +6 -0
  36. package/plugins/context-guard/bin/claude-token-artifact +6 -0
  37. package/plugins/context-guard/bin/claude-token-audit +6 -0
  38. package/plugins/context-guard/bin/claude-token-bench +6 -0
  39. package/plugins/context-guard/bin/claude-token-diet +6 -0
  40. package/plugins/context-guard/bin/claude-token-failed-nudge +6 -0
  41. package/plugins/context-guard/bin/claude-token-guard-read +6 -0
  42. package/plugins/context-guard/bin/claude-token-rewrite-bash +6 -0
  43. package/plugins/context-guard/bin/claude-token-setup +6 -0
  44. package/plugins/context-guard/bin/claude-token-statusline +6 -0
  45. package/plugins/context-guard/bin/claude-token-statusline-merged +6 -0
  46. package/plugins/context-guard/bin/claude-trim-output +6 -0
  47. package/plugins/context-guard/bin/context-guard +149 -0
  48. package/plugins/context-guard/bin/context-guard-artifact +919 -0
  49. package/plugins/context-guard/bin/context-guard-audit +1591 -0
  50. package/plugins/context-guard/bin/context-guard-bench +1898 -0
  51. package/plugins/context-guard/bin/context-guard-compress +543 -0
  52. package/plugins/context-guard/bin/context-guard-diet +1036 -0
  53. package/plugins/context-guard/bin/context-guard-failed-nudge +567 -0
  54. package/plugins/context-guard/bin/context-guard-guard-read +690 -0
  55. package/plugins/context-guard/bin/context-guard-pack +929 -0
  56. package/plugins/context-guard/bin/context-guard-read-symbol +483 -0
  57. package/plugins/context-guard/bin/context-guard-rewrite-bash +501 -0
  58. package/plugins/context-guard/bin/context-guard-sanitize-output +725 -0
  59. package/plugins/context-guard/bin/context-guard-setup +1724 -0
  60. package/plugins/context-guard/bin/context-guard-statusline +362 -0
  61. package/plugins/context-guard/bin/context-guard-statusline-merged +157 -0
  62. package/plugins/context-guard/bin/context-guard-tool-prune +837 -0
  63. package/plugins/context-guard/bin/context-guard-trim-output +1098 -0
  64. package/plugins/context-guard/brief/README.md +65 -0
  65. package/plugins/context-guard/brief/brief-mode.lite.md +29 -0
  66. package/plugins/context-guard/brief/brief-mode.standard.md +31 -0
  67. package/plugins/context-guard/brief/brief-mode.ultra.md +32 -0
  68. package/plugins/context-guard/lib/hook_secret_patterns.py +43 -0
  69. package/plugins/context-guard/skills/audit/SKILL.md +39 -0
  70. package/plugins/context-guard/skills/optimize/SKILL.md +48 -0
  71. package/plugins/context-guard/skills/setup/SKILL.md +40 -0
@@ -0,0 +1,929 @@
1
+ #!/usr/bin/env python3
2
+ """Build a deterministic, budgeted local context pack from prioritized files.
3
+
4
+ The packer is local-only and intentionally conservative. It assembles selected
5
+ file slices into a Markdown body whose rendered UTF-8 byte length is bounded by
6
+ ``--budget-bytes``. It redacts before building the pack/receipt, records why
7
+ lower-priority sources were omitted, and emits exact local slice commands for
8
+ retrieval when the path is safe to display.
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import argparse
13
+ import copy
14
+ import hashlib
15
+ import importlib.machinery
16
+ import importlib.util
17
+ import json
18
+ import os
19
+ from pathlib import Path
20
+ import re
21
+ import shlex
22
+ import stat
23
+ import sys
24
+ import time
25
+ from dataclasses import dataclass
26
+ from typing import Any
27
+
28
+ TOOL_NAME = "context-guard-pack"
29
+ VERSION = 1
30
+ DEFAULT_BUDGET_BYTES = 12_000
31
+ MIN_BUDGET_BYTES = 0
32
+ MAX_BUDGET_BYTES = 2_000_000
33
+ MAX_RECEIPT_BYTES = 64_000
34
+ MAX_MANIFEST_BYTES = 1_000_000
35
+ MAX_LABEL_CHARS = 160
36
+ MAX_REASON_CHARS = 120
37
+ TOKEN_PROXY_CHARS_PER_TOKEN = 4
38
+ PACK_DIR = ".context-guard/packs"
39
+ REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
40
+ SECRET_CONTENT_RE = re.compile(
41
+ r"(?is)("
42
+ r"-----BEGIN [A-Z0-9 ]*PRIVATE KEY-----|"
43
+ r"AKIA[0-9A-Z]{16}|"
44
+ r"gh[pousr]_[A-Za-z0-9_]{20,}|"
45
+ r"github_pat_[A-Za-z0-9_]{20,}|"
46
+ r"xox[abprs]-[A-Za-z0-9-]{10,}|"
47
+ r"sk-(?:ant|proj)-[A-Za-z0-9_-]{12,}|"
48
+ r"sk-[A-Za-z0-9][A-Za-z0-9_-]{20,}|"
49
+ r"AIza[0-9A-Za-z_\-]{20,}|"
50
+ r"(?i:Authorization)\s*:\s*(?:Bearer|Basic)\s+[A-Za-z0-9._~+/=-]+|"
51
+ r"(?<![A-Za-z0-9])(?:api[_-]?key|token|secret|password|client[_-]?secret)\s*[:=]\s*[^\s]+"
52
+ r")"
53
+ )
54
+
55
+
56
+ @dataclass(frozen=True)
57
+ class LineRange:
58
+ start: int
59
+ end: int
60
+
61
+ def as_dict(self) -> dict[str, int]:
62
+ return {"start": self.start, "end": self.end}
63
+
64
+ def identity(self) -> str:
65
+ return f"{self.start}:{self.end}"
66
+
67
+
68
+ @dataclass
69
+ class SourceSpec:
70
+ path: str
71
+ priority: int = 0
72
+ lines: LineRange | None = None
73
+ label: str | None = None
74
+ input_index: int = 0
75
+ origin: str = "cli"
76
+
77
+
78
+ @dataclass
79
+ class ResolvedSource:
80
+ spec: SourceSpec
81
+ abs_path: Path
82
+ display_path: str
83
+ redacted_path: bool
84
+ requested_lines: LineRange | None
85
+ selected_lines: list[str]
86
+ total_lines: int
87
+ redacted_lines: int
88
+
89
+
90
+ class PackError(ValueError):
91
+ pass
92
+
93
+
94
+ class FallbackLineSanitizer:
95
+ def __init__(self, *, show_paths: bool = False) -> None:
96
+ self.show_paths = show_paths
97
+ self.redactions = 0
98
+
99
+ def sanitize(self, raw_line: str) -> tuple[str, bool]:
100
+ def repl(match: re.Match[str]) -> str:
101
+ text = match.group(0)
102
+ if "=" in text:
103
+ key = text.split("=", 1)[0]
104
+ return key + "=[REDACTED]"
105
+ if ":" in text and re.search(r"(?i)(api|token|secret|password|authorization)", text.split(":", 1)[0]):
106
+ key = text.split(":", 1)[0]
107
+ return key + ": [REDACTED]"
108
+ return "[REDACTED]"
109
+
110
+ line, count = SECRET_CONTENT_RE.subn(repl, raw_line)
111
+ if count:
112
+ self.redactions += 1
113
+ return line, bool(count)
114
+
115
+
116
+ def load_line_sanitizer(show_paths: bool = False) -> object:
117
+ script_dir = Path(__file__).resolve().parent
118
+ for name in ("sanitize_output.py", "context-guard-sanitize-output", "claude-sanitize-output"):
119
+ candidate = script_dir / name
120
+ if not candidate.exists():
121
+ continue
122
+ try:
123
+ loader = importlib.machinery.SourceFileLoader(f"_context_guard_pack_sanitize_{os.getpid()}", str(candidate))
124
+ spec = importlib.util.spec_from_loader(loader.name, loader)
125
+ if spec is None:
126
+ raise RuntimeError("import spec unavailable")
127
+ module = importlib.util.module_from_spec(spec)
128
+ loader.exec_module(module)
129
+ return module.LineSanitizer(show_paths=show_paths)
130
+ except Exception as exc:
131
+ raise RuntimeError(f"could not load sanitizer {candidate}: {exc}") from exc
132
+ return FallbackLineSanitizer(show_paths=show_paths)
133
+
134
+
135
+ def sanitize_text(text: str, *, show_paths: bool = False) -> tuple[str, int]:
136
+ sanitizer = load_line_sanitizer(show_paths)
137
+ redacted = 0
138
+ out: list[str] = []
139
+ for line in text.splitlines(True):
140
+ sanitized, did_redact = sanitizer.sanitize(line) # type: ignore[attr-defined]
141
+ out.append(sanitized)
142
+ if did_redact:
143
+ redacted += 1
144
+ return "".join(out), redacted
145
+
146
+
147
+ def byte_len(text: str) -> int:
148
+ return len(text.encode("utf-8", errors="replace"))
149
+
150
+
151
+ def token_proxy(text: str) -> int:
152
+ if not text:
153
+ return 0
154
+ return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
155
+
156
+
157
+ def sha256_text(text: str) -> str:
158
+ return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest()
159
+
160
+
161
+ def path_hash(path: Path) -> str:
162
+ return hashlib.sha256(str(path).encode("utf-8", "replace")).hexdigest()[:12]
163
+
164
+
165
+ def sanitize_path_component(component: str) -> tuple[str, bool]:
166
+ if SECRET_CONTENT_RE.search(component):
167
+ return REDACTED_PATH_COMPONENT, True
168
+ return component, False
169
+
170
+
171
+ def display_root(root: Path) -> str:
172
+ name, redacted = sanitize_path_component(root.name or "project")
173
+ if redacted:
174
+ name = "project"
175
+ return f"{name}#path:{path_hash(root)}"
176
+
177
+
178
+ def display_rel_path(rel: str) -> tuple[str, bool]:
179
+ parts: list[str] = []
180
+ redacted = False
181
+ for part in rel.replace("\\", "/").split("/"):
182
+ if not part:
183
+ continue
184
+ safe, did = sanitize_path_component(part)
185
+ parts.append(safe)
186
+ redacted = redacted or did
187
+ return "/".join(parts), redacted
188
+
189
+
190
+ def parse_line_range(value: object) -> LineRange | None:
191
+ if value is None or value == "":
192
+ return None
193
+ if isinstance(value, dict):
194
+ try:
195
+ start = int(value.get("start"))
196
+ end = int(value.get("end"))
197
+ except (TypeError, ValueError):
198
+ raise PackError("invalid_lines")
199
+ elif isinstance(value, str):
200
+ if ":" not in value:
201
+ raise PackError("invalid_lines")
202
+ left, right = value.split(":", 1)
203
+ try:
204
+ start = int(left)
205
+ end = int(right)
206
+ except ValueError:
207
+ raise PackError("invalid_lines")
208
+ else:
209
+ raise PackError("invalid_lines")
210
+ if start < 1 or end < start:
211
+ raise PackError("invalid_lines")
212
+ return LineRange(start, end)
213
+
214
+
215
+ def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
216
+ try:
217
+ number = int(value)
218
+ except (TypeError, ValueError, OverflowError):
219
+ return default
220
+ return min(max(number, minimum), maximum)
221
+
222
+
223
+ def cap_label(value: object, default: str | None = None, limit: int = MAX_LABEL_CHARS) -> str | None:
224
+ if value is None:
225
+ return default
226
+ text = " ".join(str(value).strip().split())
227
+ text = SECRET_CONTENT_RE.sub("[REDACTED]", text)
228
+ if not text:
229
+ return default
230
+ if len(text) > limit:
231
+ text = text[: max(0, limit - 15)].rstrip() + " ...[truncated]"
232
+ return text
233
+
234
+
235
+ def read_manifest(path: Path) -> list[SourceSpec]:
236
+ try:
237
+ raw = path.read_bytes()
238
+ except OSError as exc:
239
+ raise PackError(f"could not read manifest: {exc.strerror or exc.__class__.__name__}") from exc
240
+ if len(raw) > MAX_MANIFEST_BYTES:
241
+ raise PackError(f"manifest exceeds trusted size cap: {len(raw)} > {MAX_MANIFEST_BYTES}")
242
+ try:
243
+ data = json.loads(raw.decode("utf-8"))
244
+ except (UnicodeDecodeError, json.JSONDecodeError) as exc:
245
+ raise PackError(f"invalid manifest JSON: {exc}") from exc
246
+ version = data.get("version", VERSION) if isinstance(data, dict) else None
247
+ if version != VERSION:
248
+ raise PackError(f"unsupported manifest version: {version}")
249
+ sources = data.get("sources") if isinstance(data, dict) else None
250
+ if not isinstance(sources, list):
251
+ raise PackError("manifest sources must be a list")
252
+ out: list[SourceSpec] = []
253
+ for item in sources:
254
+ if not isinstance(item, dict):
255
+ raise PackError("manifest sources must be objects")
256
+ if "path" not in item:
257
+ raise PackError("manifest source missing path")
258
+ try:
259
+ lines = parse_line_range(item.get("lines"))
260
+ except PackError:
261
+ lines = LineRange(-1, -1)
262
+ out.append(SourceSpec(
263
+ path=str(item.get("path", "")),
264
+ priority=bounded_int(item.get("priority"), 0, -1_000_000, 1_000_000),
265
+ lines=lines,
266
+ label=cap_label(item.get("label")),
267
+ origin="manifest",
268
+ ))
269
+ return out
270
+
271
+
272
+ def parse_source_spec(raw: str) -> SourceSpec:
273
+ raw = raw.strip()
274
+ if not raw:
275
+ raise PackError("empty --source")
276
+ values: dict[str, str] = {}
277
+ if "=" not in raw.split(",", 1)[0]:
278
+ values["path"] = raw
279
+ else:
280
+ for part in raw.split(","):
281
+ if not part:
282
+ continue
283
+ if "=" not in part:
284
+ raise PackError(f"invalid --source part: {part}")
285
+ key, value = part.split("=", 1)
286
+ values[key.strip()] = value.strip()
287
+ if "path" not in values or not values["path"]:
288
+ raise PackError("--source missing path")
289
+ try:
290
+ lines = parse_line_range(values.get("lines"))
291
+ except PackError:
292
+ lines = LineRange(-1, -1)
293
+ return SourceSpec(
294
+ path=values["path"],
295
+ priority=bounded_int(values.get("priority"), 0, -1_000_000, 1_000_000),
296
+ lines=lines,
297
+ label=cap_label(values.get("label")),
298
+ origin="cli",
299
+ )
300
+
301
+
302
+ def normalize_root(raw_root: Path) -> Path:
303
+ expanded = raw_root.expanduser()
304
+ try:
305
+ if expanded.is_symlink():
306
+ raise PackError("root must not be a symlink")
307
+ root = expanded.resolve()
308
+ except OSError as exc:
309
+ raise PackError(f"could not resolve root: {exc.strerror or exc.__class__.__name__}") from exc
310
+ if not root.is_dir():
311
+ raise PackError("root must be a directory")
312
+ return root
313
+
314
+
315
+ def omission(spec: SourceSpec, reason: str, *, path: str | None = None, redacted_path: bool = False) -> dict[str, Any]:
316
+ item: dict[str, Any] = {
317
+ "path": path if path is not None else safe_raw_path_label(spec.path),
318
+ "status": "omitted",
319
+ "priority": spec.priority,
320
+ "reason": reason,
321
+ "input_index": spec.input_index,
322
+ }
323
+ if spec.label:
324
+ item["label"] = spec.label
325
+ if spec.lines and spec.lines.start > 0:
326
+ item["requested_lines"] = spec.lines.as_dict()
327
+ if redacted_path:
328
+ item["retrieval_omitted_reason"] = "redacted_path"
329
+ return item
330
+
331
+
332
+ def safe_raw_path_label(raw: str) -> str:
333
+ text = raw.replace("\\", "/")
334
+ parts = []
335
+ for part in text.split("/"):
336
+ if part in {"", "."}:
337
+ continue
338
+ safe, _ = sanitize_path_component(part)
339
+ parts.append(safe)
340
+ return "/".join(parts) or "path"
341
+
342
+
343
+ def lexical_rel(raw_path: str) -> tuple[Path | None, str]:
344
+ path = Path(raw_path)
345
+ if path.is_absolute():
346
+ return None, "outside_root"
347
+ parts = path.parts
348
+ if not parts or any(part in {"..", ""} for part in parts):
349
+ return None, "outside_root"
350
+ cleaned = Path(*[part for part in parts if part != "."])
351
+ if not cleaned.parts:
352
+ return None, "outside_root"
353
+ return cleaned, ""
354
+
355
+
356
+ def open_dir_no_follow(path: Path | str, *, dir_fd: int | None = None) -> int:
357
+ flags = os.O_RDONLY
358
+ if hasattr(os, "O_DIRECTORY"):
359
+ flags |= os.O_DIRECTORY
360
+ if hasattr(os, "O_NOFOLLOW"):
361
+ flags |= os.O_NOFOLLOW
362
+ if hasattr(os, "O_CLOEXEC"):
363
+ flags |= os.O_CLOEXEC
364
+ if dir_fd is None:
365
+ fd = os.open(path, flags)
366
+ else:
367
+ fd = os.open(path, flags, dir_fd=dir_fd)
368
+ try:
369
+ st = os.fstat(fd)
370
+ if not stat.S_ISDIR(st.st_mode):
371
+ raise PackError("not a directory")
372
+ return fd
373
+ except Exception:
374
+ os.close(fd)
375
+ raise
376
+
377
+
378
+ def open_regular_under_root(root: Path, rel: Path) -> tuple[Any | None, str]:
379
+ current_fd: int | None = None
380
+ try:
381
+ current_fd = open_dir_no_follow(root)
382
+ for index, part in enumerate(rel.parts):
383
+ if part in {"", ".", ".."}:
384
+ return None, "outside_root"
385
+ is_final = index == len(rel.parts) - 1
386
+ if not is_final:
387
+ try:
388
+ next_fd = open_dir_no_follow(part, dir_fd=current_fd)
389
+ except FileNotFoundError:
390
+ return None, "missing"
391
+ except NotADirectoryError:
392
+ return None, "missing"
393
+ except OSError:
394
+ return None, "unsafe_path"
395
+ os.close(current_fd)
396
+ current_fd = next_fd
397
+ continue
398
+ flags = os.O_RDONLY
399
+ if hasattr(os, "O_NOFOLLOW"):
400
+ flags |= os.O_NOFOLLOW
401
+ if hasattr(os, "O_CLOEXEC"):
402
+ flags |= os.O_CLOEXEC
403
+ file_fd = -1
404
+ try:
405
+ file_fd = os.open(part, flags, dir_fd=current_fd)
406
+ st = os.fstat(file_fd)
407
+ if not stat.S_ISREG(st.st_mode):
408
+ os.close(file_fd)
409
+ file_fd = -1
410
+ return None, "empty_source"
411
+ handle = os.fdopen(file_fd, "r", encoding="utf-8", errors="replace", newline="")
412
+ file_fd = -1
413
+ return handle, ""
414
+ except FileNotFoundError:
415
+ return None, "missing"
416
+ except IsADirectoryError:
417
+ return None, "empty_source"
418
+ except NotADirectoryError:
419
+ return None, "missing"
420
+ except OSError:
421
+ return None, "unsafe_path"
422
+ finally:
423
+ if file_fd >= 0:
424
+ try:
425
+ os.close(file_fd)
426
+ except OSError:
427
+ pass
428
+ except OSError:
429
+ return None, "unsafe_path"
430
+ finally:
431
+ if current_fd is not None:
432
+ try:
433
+ os.close(current_fd)
434
+ except OSError:
435
+ pass
436
+ return None, "unsafe_path"
437
+
438
+
439
+ def resolve_source(root: Path, spec: SourceSpec) -> tuple[ResolvedSource | None, dict[str, Any] | None]:
440
+ if spec.lines is not None and spec.lines.start < 1:
441
+ return None, omission(spec, "invalid_lines")
442
+ rel, reason = lexical_rel(spec.path)
443
+ if rel is None:
444
+ return None, omission(spec, reason)
445
+ display, redacted_path = display_rel_path(rel.as_posix())
446
+ handle, reason = open_regular_under_root(root, rel)
447
+ if handle is None:
448
+ return None, omission(spec, reason, path=display, redacted_path=redacted_path)
449
+ try:
450
+ with handle:
451
+ raw_text = handle.read()
452
+ except OSError:
453
+ return None, omission(spec, "unsafe_path", path=display, redacted_path=redacted_path)
454
+ sanitized, redacted_lines = sanitize_text(raw_text)
455
+ all_lines = sanitized.splitlines(True)
456
+ if not all_lines:
457
+ return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
458
+ total_lines = len(all_lines)
459
+ requested = spec.lines or LineRange(1, total_lines)
460
+ if requested.start > total_lines:
461
+ return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
462
+ end = min(requested.end, total_lines)
463
+ selected = all_lines[requested.start - 1:end]
464
+ if not selected:
465
+ return None, omission(spec, "empty_source", path=display, redacted_path=redacted_path)
466
+ return ResolvedSource(
467
+ spec=spec,
468
+ abs_path=root / rel,
469
+ display_path=display,
470
+ redacted_path=redacted_path,
471
+ requested_lines=requested,
472
+ selected_lines=selected,
473
+ total_lines=total_lines,
474
+ redacted_lines=redacted_lines,
475
+ ), None
476
+
477
+
478
+ def retrieval_cli(root_arg: str, display_path: str, lines: LineRange) -> str:
479
+ return (
480
+ f"context-guard-pack slice --root {shlex.quote(root_arg)} "
481
+ f"--path {shlex.quote(display_path)} --lines {lines.start}:{lines.end} --json"
482
+ )
483
+
484
+
485
+ def safe_root_arg_for_retrieval(root_arg: str) -> str | None:
486
+ text = str(root_arg)
487
+ if SECRET_CONTENT_RE.search(text):
488
+ return None
489
+ for part in text.replace("\\", "/").split("/"):
490
+ if not part:
491
+ continue
492
+ _safe, redacted = sanitize_path_component(part)
493
+ if redacted:
494
+ return None
495
+ return text
496
+
497
+
498
+ def retrieval_for(root_arg: str, display_path: str, lines: LineRange, *, redacted_path: bool) -> tuple[str | None, str | None]:
499
+ if redacted_path:
500
+ return None, "redacted_path"
501
+ safe_root = safe_root_arg_for_retrieval(root_arg)
502
+ if safe_root is None:
503
+ return None, "unsafe_root_path"
504
+ return retrieval_cli(safe_root, display_path, lines), None
505
+
506
+
507
+ def render_block(source: ResolvedSource, lines: list[str], *, root_arg: str, status: str, included: LineRange) -> str:
508
+ title = source.spec.label or source.display_path
509
+ requested = source.requested_lines or LineRange(1, source.total_lines)
510
+ retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
511
+ header = [
512
+ f"## {title}",
513
+ f"Source: `{source.display_path}`",
514
+ f"Priority: {source.spec.priority}",
515
+ f"Status: {status}",
516
+ f"Included lines: {included.start}:{included.end}",
517
+ f"Requested lines: {requested.start}:{requested.end}",
518
+ ]
519
+ if retrieval:
520
+ header.append(f"Retrieval: `{retrieval}`")
521
+ elif retrieval_omitted_reason:
522
+ header.append(f"Retrieval omitted: {retrieval_omitted_reason}")
523
+ return "\n".join(header) + "\n\n```text\n" + "".join(lines) + ("" if not lines or lines[-1].endswith("\n") else "\n") + "```\n\n"
524
+
525
+
526
+ def source_metadata(source: ResolvedSource, *, status: str, lines: list[str], included: LineRange, root_arg: str) -> dict[str, Any]:
527
+ requested = source.requested_lines or LineRange(1, source.total_lines)
528
+ item: dict[str, Any] = {
529
+ "path": source.display_path,
530
+ "status": status,
531
+ "priority": source.spec.priority,
532
+ "input_index": source.spec.input_index,
533
+ "requested_lines": requested.as_dict(),
534
+ "included_lines": included.as_dict(),
535
+ "bytes": byte_len("".join(lines)),
536
+ }
537
+ if source.spec.label:
538
+ item["label"] = source.spec.label
539
+ retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, included, redacted_path=source.redacted_path)
540
+ if retrieval:
541
+ item["retrieval_cli"] = retrieval
542
+ elif retrieval_omitted_reason:
543
+ item["retrieval_omitted_reason"] = retrieval_omitted_reason
544
+ if status == "partial":
545
+ item["reason"] = "budget_exhausted"
546
+ return item
547
+
548
+
549
+ def budget_omission(source: ResolvedSource, *, root_arg: str) -> dict[str, Any]:
550
+ requested = source.requested_lines or LineRange(1, source.total_lines)
551
+ item = omission(source.spec, "budget_exhausted", path=source.display_path, redacted_path=source.redacted_path)
552
+ item["requested_lines"] = requested.as_dict()
553
+ item["total_lines"] = source.total_lines
554
+ retrieval, retrieval_omitted_reason = retrieval_for(root_arg, source.display_path, requested, redacted_path=source.redacted_path)
555
+ if retrieval:
556
+ item["retrieval_cli"] = retrieval
557
+ item.pop("retrieval_omitted_reason", None)
558
+ elif retrieval_omitted_reason:
559
+ item["retrieval_omitted_reason"] = retrieval_omitted_reason
560
+ return item
561
+
562
+
563
+ def fit_partial_lines(source: ResolvedSource, remaining: int, *, root_arg: str) -> tuple[list[str], str | None, LineRange | None]:
564
+ if remaining <= 0:
565
+ return [], None, None
566
+ picked: list[str] = []
567
+ for line in source.selected_lines:
568
+ candidate = picked + [line]
569
+ included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(candidate) - 1)
570
+ block = render_block(source, candidate, root_arg=root_arg, status="partial", included=included)
571
+ if byte_len(block) <= remaining:
572
+ picked = candidate
573
+ else:
574
+ break
575
+ if not picked:
576
+ return [], None, None
577
+ included = LineRange(source.requested_lines.start if source.requested_lines else 1, (source.requested_lines.start if source.requested_lines else 1) + len(picked) - 1)
578
+ return picked, render_block(source, picked, root_arg=root_arg, status="partial", included=included), included
579
+
580
+
581
+ def metadata_size(data: dict[str, Any]) -> int:
582
+ return len(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True).encode("utf-8", errors="replace")) + 1
583
+
584
+
585
+ def artifact_failure(error: str, *, bytes_count: int = 0, capped: bool = False) -> dict[str, Any]:
586
+ return {
587
+ "stored": False,
588
+ "path": None,
589
+ "bytes": bytes_count,
590
+ "capped": capped,
591
+ "error": error,
592
+ "cap_bytes": MAX_RECEIPT_BYTES,
593
+ }
594
+
595
+
596
+ def ensure_private_pack_dir(root: Path) -> tuple[Path | None, int | None, str | None]:
597
+ """Create/verify the receipt directory by walking from a no-follow root fd."""
598
+ current_fd: int | None = None
599
+ try:
600
+ current_fd = open_dir_no_follow(root)
601
+ for part in (".context-guard", "packs"):
602
+ while True:
603
+ try:
604
+ next_fd = open_dir_no_follow(part, dir_fd=current_fd)
605
+ break
606
+ except FileNotFoundError:
607
+ try:
608
+ os.mkdir(part, 0o700, dir_fd=current_fd)
609
+ except FileExistsError:
610
+ continue
611
+ except (OSError, NotImplementedError):
612
+ return None, None, "artifact_dir_unavailable"
613
+ except NotADirectoryError:
614
+ return None, None, "unsafe_artifact_dir"
615
+ except (OSError, NotImplementedError):
616
+ return None, None, "unsafe_artifact_dir"
617
+ try:
618
+ os.fchmod(next_fd, 0o700)
619
+ except (AttributeError, OSError):
620
+ pass
621
+ os.close(current_fd)
622
+ current_fd = next_fd
623
+ dir_fd = current_fd
624
+ current_fd = None
625
+ return root / PACK_DIR, dir_fd, None
626
+ except OSError:
627
+ return None, None, "unsafe_artifact_dir"
628
+ finally:
629
+ if current_fd is not None:
630
+ try:
631
+ os.close(current_fd)
632
+ except OSError:
633
+ pass
634
+
635
+
636
+ def write_private_json_at(dir_fd: int, filename: str, data: dict[str, Any]) -> None:
637
+ if "/" in filename or filename in {"", ".", ".."}:
638
+ raise PackError("unsafe_artifact_path")
639
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
640
+ if hasattr(os, "O_NOFOLLOW"):
641
+ flags |= os.O_NOFOLLOW
642
+ if hasattr(os, "O_CLOEXEC"):
643
+ flags |= os.O_CLOEXEC
644
+ fd = os.open(filename, flags, 0o600, dir_fd=dir_fd)
645
+ try:
646
+ with os.fdopen(fd, "w", encoding="utf-8") as handle:
647
+ json.dump(data, handle, ensure_ascii=False, indent=2, sort_keys=True)
648
+ handle.write("\n")
649
+ except Exception:
650
+ try:
651
+ os.close(fd)
652
+ except OSError:
653
+ pass
654
+ raise
655
+ try:
656
+ os.chmod(filename, 0o600, dir_fd=dir_fd, follow_symlinks=False)
657
+ except (OSError, TypeError, NotImplementedError):
658
+ pass
659
+
660
+
661
+ def finalize_receipt_size(receipt: dict[str, Any]) -> int:
662
+ artifact = receipt.setdefault("artifact", {})
663
+ size = metadata_size(receipt)
664
+ for _ in range(4):
665
+ artifact["bytes"] = size
666
+ next_size = metadata_size(receipt)
667
+ if next_size == size:
668
+ return size
669
+ size = next_size
670
+ artifact["bytes"] = size
671
+ return metadata_size(receipt)
672
+
673
+
674
+ def shrink_receipt_for_write(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
675
+ receipt = copy.deepcopy(data)
676
+ capped = False
677
+ if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
678
+ return receipt, capped
679
+ capped = True
680
+ receipt.setdefault("artifact", {})["capped"] = True
681
+ receipt.setdefault("artifact", {})["cap_bytes"] = MAX_RECEIPT_BYTES
682
+ for item in receipt.get("omitted_sources", []):
683
+ if isinstance(item, dict):
684
+ item.pop("preview", None)
685
+ if "label" in item:
686
+ item["label"] = cap_label(item.get("label"), limit=80)
687
+ if "reason" in item:
688
+ item["reason"] = cap_label(item.get("reason"), default=str(item.get("reason")), limit=MAX_REASON_CHARS)
689
+ if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
690
+ return receipt, capped
691
+ for item in receipt.get("included_sources", []):
692
+ if isinstance(item, dict):
693
+ item.pop("preview", None)
694
+ if "label" in item:
695
+ item["label"] = cap_label(item.get("label"), limit=80)
696
+ if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
697
+ return receipt, capped
698
+ # The stdout payload remains authoritative for the full pack body. Receipts may omit it to stay readable.
699
+ receipt["pack_omitted_from_receipt"] = True
700
+ receipt.pop("pack", None)
701
+ return receipt, capped
702
+
703
+
704
+ def store_receipt(root: Path, result: dict[str, Any]) -> dict[str, Any]:
705
+ out_dir, dir_fd, dir_error = ensure_private_pack_dir(root)
706
+ if out_dir is None or dir_fd is None:
707
+ return artifact_failure(dir_error or "unsafe_artifact_dir")
708
+ size = 0
709
+ capped = False
710
+ try:
711
+ receipt, capped = shrink_receipt_for_write(result)
712
+ size = metadata_size(receipt)
713
+ if size > MAX_RECEIPT_BYTES:
714
+ return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
715
+ pack_id = str(result["pack_id"])
716
+ filename = f"{pack_id}.json"
717
+ receipt.setdefault("artifact", {})["stored"] = True
718
+ receipt.setdefault("artifact", {})["path"] = f"{PACK_DIR}/{pack_id}.json"
719
+ receipt.setdefault("artifact", {})["capped"] = capped
720
+ size = finalize_receipt_size(receipt)
721
+ if size > MAX_RECEIPT_BYTES:
722
+ return artifact_failure("receipt_metadata_too_large", bytes_count=size, capped=True)
723
+ write_private_json_at(dir_fd, filename, receipt)
724
+ except (OSError, PackError, NotImplementedError):
725
+ return artifact_failure("artifact_write_failed", bytes_count=size, capped=capped)
726
+ finally:
727
+ try:
728
+ os.close(dir_fd)
729
+ except OSError:
730
+ pass
731
+ return {
732
+ "stored": True,
733
+ "path": f"{PACK_DIR}/{pack_id}.json",
734
+ "bytes": size,
735
+ "capped": capped,
736
+ "cap_bytes": MAX_RECEIPT_BYTES,
737
+ }
738
+
739
+
740
+ def build_pack(root: Path, specs: list[SourceSpec], *, budget_bytes: int, root_arg: str, store_artifact: bool) -> dict[str, Any]:
741
+ seen: set[tuple[str, str]] = set()
742
+ resolved: list[ResolvedSource] = []
743
+ omitted: list[dict[str, Any]] = []
744
+ canonical_specs: list[dict[str, Any]] = []
745
+ for spec in specs:
746
+ rel, reason = lexical_rel(spec.path)
747
+ if spec.lines is not None and spec.lines.start < 1:
748
+ omitted_item = omission(spec, "invalid_lines")
749
+ omitted.append(omitted_item)
750
+ canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": "invalid", "status": "invalid_lines"})
751
+ continue
752
+ if rel is not None and spec.lines is not None and spec.lines.start > 0:
753
+ identity_lines = spec.lines.identity()
754
+ elif rel is not None:
755
+ identity_lines = "all"
756
+ else:
757
+ identity_lines = "invalid"
758
+ identity = (rel.as_posix() if rel is not None else spec.path, identity_lines)
759
+ if rel is not None and identity in seen:
760
+ display, redacted = display_rel_path(rel.as_posix())
761
+ omitted.append(omission(spec, "duplicate_source", path=display, redacted_path=redacted))
762
+ canonical_specs.append({"path": display, "priority": spec.priority, "lines": identity_lines, "status": "duplicate_source"})
763
+ continue
764
+ if rel is not None:
765
+ seen.add(identity)
766
+ source, omitted_item = resolve_source(root, spec)
767
+ if omitted_item is not None:
768
+ omitted.append(omitted_item)
769
+ canonical_specs.append({"path": omitted_item.get("path"), "priority": spec.priority, "lines": identity_lines, "status": omitted_item.get("reason")})
770
+ continue
771
+ assert source is not None
772
+ resolved.append(source)
773
+ canonical_specs.append({"path": source.display_path, "priority": spec.priority, "lines": identity_lines, "status": "candidate"})
774
+ resolved.sort(key=lambda item: (-item.spec.priority, item.spec.input_index, item.display_path))
775
+ header = "# Context Pack\n\nGenerated by context-guard-pack. Token counts are estimated proxies; byte counts are observed.\n\n"
776
+ parts: list[str] = []
777
+ included: list[dict[str, Any]] = []
778
+ current_pack_bytes = 0
779
+ header_bytes = byte_len(header)
780
+ if header_bytes <= budget_bytes:
781
+ parts.append(header)
782
+ current_pack_bytes += header_bytes
783
+ for source in resolved:
784
+ start_line = source.requested_lines.start if source.requested_lines else 1
785
+ included_range = LineRange(start_line, start_line + len(source.selected_lines) - 1)
786
+ full_block = render_block(source, source.selected_lines, root_arg=root_arg, status="included", included=included_range)
787
+ full_block_bytes = byte_len(full_block)
788
+ remaining = budget_bytes - current_pack_bytes
789
+ if full_block_bytes <= remaining:
790
+ parts.append(full_block)
791
+ current_pack_bytes += full_block_bytes
792
+ included.append(source_metadata(source, status="included", lines=source.selected_lines, included=included_range, root_arg=root_arg))
793
+ continue
794
+ partial_lines, partial_block, partial_range = fit_partial_lines(source, remaining, root_arg=root_arg)
795
+ if partial_block is not None and partial_range is not None:
796
+ parts.append(partial_block)
797
+ current_pack_bytes += byte_len(partial_block)
798
+ included.append(source_metadata(source, status="partial", lines=partial_lines, included=partial_range, root_arg=root_arg))
799
+ else:
800
+ omitted.append(budget_omission(source, root_arg=root_arg))
801
+ pack = "".join(parts)
802
+ pack_bytes = current_pack_bytes
803
+ redacted_lines = sum(source.redacted_lines for source in resolved)
804
+ partial_count = sum(1 for item in included if item.get("status") == "partial")
805
+ omitted_sorted = sorted(omitted, key=lambda item: (item.get("input_index", 0), str(item.get("path", "")), str(item.get("reason", ""))))
806
+ canonical = {
807
+ "version": VERSION,
808
+ "root": display_root(root),
809
+ "budget_bytes": budget_bytes,
810
+ "sources": canonical_specs,
811
+ "pack_sha256": sha256_text(pack),
812
+ "omission_summary": sorted({str(item.get("reason")) for item in omitted_sorted}),
813
+ }
814
+ pack_id = hashlib.sha256(json.dumps(canonical, ensure_ascii=False, sort_keys=True, separators=(",", ":")).encode("utf-8")).hexdigest()[:20]
815
+ result: dict[str, Any] = {
816
+ "tool": TOOL_NAME,
817
+ "version": VERSION,
818
+ "pack_id": pack_id,
819
+ "root": display_root(root),
820
+ "budget_bytes": budget_bytes,
821
+ "pack_bytes": pack_bytes,
822
+ "pack": pack,
823
+ "token_proxy": {"measurement": "estimated", "method": f"chars_div_{TOKEN_PROXY_CHARS_PER_TOKEN}", "pack": token_proxy(pack)},
824
+ "sources": {"total": len(specs), "included": len(included) - partial_count, "partial": partial_count, "omitted": len(omitted_sorted)},
825
+ "included_sources": included,
826
+ "omitted_sources": omitted_sorted,
827
+ "redaction": {"redacted_lines": redacted_lines, "redacted_before_pack": True},
828
+ "artifact": {"stored": False, "path": None, "bytes": 0, "capped": False, "cap_bytes": MAX_RECEIPT_BYTES},
829
+ "created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
830
+ }
831
+ if store_artifact:
832
+ artifact = store_receipt(root, result)
833
+ result["artifact"] = artifact
834
+ return result
835
+
836
+
837
+ def parse_all_sources(args: argparse.Namespace) -> list[SourceSpec]:
838
+ specs: list[SourceSpec] = []
839
+ if args.manifest:
840
+ specs.extend(read_manifest(Path(args.manifest)))
841
+ for raw in args.source or []:
842
+ specs.append(parse_source_spec(raw))
843
+ for index, spec in enumerate(specs):
844
+ spec.input_index = index
845
+ return specs
846
+
847
+
848
+ def slice_source(root: Path, *, raw_path: str, lines: LineRange) -> tuple[dict[str, Any], int]:
849
+ spec = SourceSpec(path=raw_path, lines=lines)
850
+ source, omitted_item = resolve_source(root, spec)
851
+ if omitted_item is not None:
852
+ payload = {"tool": TOOL_NAME, "status": "error", "reason": omitted_item.get("reason"), "path": omitted_item.get("path")}
853
+ return payload, 1
854
+ assert source is not None
855
+ content = "".join(source.selected_lines)
856
+ payload = {
857
+ "tool": TOOL_NAME,
858
+ "version": VERSION,
859
+ "status": "ok",
860
+ "path": source.display_path,
861
+ "query": {"type": "lines", "start": lines.start, "end": min(lines.end, source.total_lines), "returned_lines": len(source.selected_lines)},
862
+ "content": content,
863
+ "bytes": byte_len(content),
864
+ "redaction": {"redacted_lines": source.redacted_lines, "redacted_before_pack": True},
865
+ }
866
+ return payload, 0
867
+
868
+
869
+ def build_parser() -> argparse.ArgumentParser:
870
+ parser = argparse.ArgumentParser(description="Build budgeted local context packs with exact retrieval hints.")
871
+ sub = parser.add_subparsers(dest="command", required=True)
872
+ build = sub.add_parser("build", help="assemble a prioritized context pack")
873
+ build.add_argument("--root", default=".", help="project root; must not be a symlink")
874
+ build.add_argument("--manifest", help="JSON manifest with version/sources")
875
+ build.add_argument("--source", action="append", help="source spec: path=REL[,priority=N][,lines=A:B][,label=TEXT]")
876
+ build.add_argument("--budget-bytes", type=int, default=DEFAULT_BUDGET_BYTES)
877
+ build.add_argument("--json", action="store_true", help="emit JSON payload")
878
+ build.add_argument("--no-artifact", action="store_true", help="do not write .context-guard/packs receipt")
879
+ slice_cmd = sub.add_parser("slice", help="retrieve an exact sanitized file slice")
880
+ slice_cmd.add_argument("--root", default=".", help="project root; must not be a symlink")
881
+ slice_cmd.add_argument("--path", required=True, help="relative file path under root")
882
+ slice_cmd.add_argument("--lines", required=True, help="inclusive 1-indexed START:END")
883
+ slice_cmd.add_argument("--json", action="store_true", help="emit JSON payload")
884
+ return parser
885
+
886
+
887
+ def main(argv: list[str] | None = None) -> int:
888
+ parser = build_parser()
889
+ args = parser.parse_args(argv)
890
+ try:
891
+ root = normalize_root(Path(args.root))
892
+ if args.command == "build":
893
+ specs = parse_all_sources(args)
894
+ if not specs:
895
+ raise PackError("provide --manifest or --source")
896
+ budget = bounded_int(args.budget_bytes, DEFAULT_BUDGET_BYTES, MIN_BUDGET_BYTES, MAX_BUDGET_BYTES)
897
+ result = build_pack(root, specs, budget_bytes=budget, root_arg=str(args.root), store_artifact=not args.no_artifact)
898
+ if args.json:
899
+ json.dump(result, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
900
+ sys.stdout.write("\n")
901
+ else:
902
+ sys.stdout.write(str(result["pack"]))
903
+ print(
904
+ f"[context-guard-pack] pack_id={result['pack_id']} bytes={result['pack_bytes']}/{result['budget_bytes']} "
905
+ f"included={result['sources']['included']} partial={result['sources']['partial']} omitted={result['sources']['omitted']}",
906
+ file=sys.stderr,
907
+ )
908
+ return 0
909
+ if args.command == "slice":
910
+ lines = parse_line_range(args.lines)
911
+ if lines is None:
912
+ raise PackError("invalid_lines")
913
+ payload, rc = slice_source(root, raw_path=args.path, lines=lines)
914
+ if args.json:
915
+ json.dump(payload, sys.stdout, ensure_ascii=False, indent=2, sort_keys=True)
916
+ sys.stdout.write("\n")
917
+ elif rc == 0:
918
+ sys.stdout.write(str(payload.get("content", "")))
919
+ else:
920
+ print(f"context-guard-pack: {payload.get('reason')}", file=sys.stderr)
921
+ return rc
922
+ raise PackError("unknown command")
923
+ except PackError as exc:
924
+ print(f"context-guard-pack: {exc}", file=sys.stderr)
925
+ return 2
926
+
927
+
928
+ if __name__ == "__main__":
929
+ raise SystemExit(main())