@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -10,6 +10,8 @@ import json
10
10
  import os
11
11
  from pathlib import Path
12
12
  import re
13
+ import secrets
14
+ import shlex
13
15
  import stat
14
16
  import sys
15
17
  import time
@@ -30,6 +32,18 @@ MAX_COMMAND_PREVIEW_BYTES = 2_048
30
32
  MAX_TOP_ERROR_RECEIPTS = 12
31
33
  MAX_DUPLICATE_GROUPS = 12
32
34
  MAX_SUGGESTED_QUERIES = 12
35
+ SEARCH_SCHEMA_VERSION = "contextguard.artifact.search.v1"
36
+ OUTPUT_SANDBOX_SCHEMA_VERSION = "contextguard.artifact.output-sandbox.v1"
37
+ DEFAULT_SEARCH_MAX_ARTIFACTS = 100
38
+ MAX_SEARCH_MAX_ARTIFACTS = 1_000
39
+ DEFAULT_SEARCH_MAX_MATCHES = 40
40
+ MAX_SEARCH_MAX_MATCHES = 1_000
41
+ DEFAULT_SEARCH_CONTEXT_LINES = 1
42
+ MAX_SEARCH_CONTEXT_LINES = 20
43
+ DEFAULT_SEARCH_SNIPPET_CHARS = 360
44
+ MAX_SEARCH_SNIPPET_CHARS = 2_000
45
+ MAX_SEARCH_PATTERN_BYTES = 512
46
+ SEARCH_TRUNCATED_COUNT_UNKNOWN = "lower_bound"
33
47
  ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
34
48
  ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
35
49
  "tmp": Path("/private/tmp"),
@@ -183,15 +197,50 @@ def sanitize_one_line(text: str, *, show_paths: bool = False) -> str:
183
197
  return cap_utf8_bytes(cap_line(" ".join(sanitized.strip().split())), MAX_COMMAND_PREVIEW_BYTES)
184
198
 
185
199
 
200
+ NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
201
+ DIR_FD_OPEN_SUPPORTED = bool(os.supports_dir_fd and os.open in os.supports_dir_fd)
202
+ DIR_FD_MKDIR_SUPPORTED = bool(os.supports_dir_fd and os.mkdir in os.supports_dir_fd)
203
+ DIR_FD_STAT_SUPPORTED = bool(os.supports_dir_fd and os.stat in os.supports_dir_fd)
204
+ DIR_FD_UNLINK_SUPPORTED = bool(os.supports_dir_fd and os.unlink in os.supports_dir_fd)
205
+
206
+
207
+ def dir_fd_replace_supported() -> bool:
208
+ # Some Python builds support src_dir_fd/dst_dir_fd for os.replace without
209
+ # listing os.replace in os.supports_dir_fd, so use a signature/probe-light
210
+ # check instead of os.supports_dir_fd membership.
211
+ try:
212
+ import inspect
213
+
214
+ signature = inspect.signature(os.replace)
215
+ except (TypeError, ValueError):
216
+ return True
217
+ return "src_dir_fd" in signature.parameters and "dst_dir_fd" in signature.parameters
218
+
219
+
220
+ DIR_FD_REPLACE_SUPPORTED = dir_fd_replace_supported()
221
+
222
+
223
+ def os_error_detail(exc: OSError) -> str:
224
+ detail = exc.strerror or str(exc) or exc.__class__.__name__
225
+ if exc.errno is not None:
226
+ return f"{detail} (errno {exc.errno})"
227
+ return detail
228
+
229
+
230
+ def reject_parent_traversal(path: Path, *, label: str) -> None:
231
+ if any(part == ".." for part in path.expanduser().parts):
232
+ raise ValueError(f"{label} must not contain parent traversal")
233
+
234
+
186
235
  def ensure_private_dir(path: Path) -> None:
187
- path = normalize_allowed_first_absolute_symlink(path)
188
- reject_symlink_components(path)
189
- path.mkdir(parents=True, exist_ok=True)
190
- reject_symlink_components(path)
236
+ fd = open_private_directory_no_follow(path, label="artifact directory", create=True)
191
237
  try:
192
- os.chmod(path, 0o700)
193
- except OSError:
194
- pass
238
+ try:
239
+ os.fchmod(fd, 0o700)
240
+ except OSError:
241
+ pass
242
+ finally:
243
+ os.close(fd)
195
244
 
196
245
 
197
246
  def reject_symlink_components(path: Path) -> None:
@@ -213,22 +262,38 @@ def reject_symlink_components(path: Path) -> None:
213
262
 
214
263
  def regular_private_file_size(path: Path) -> int:
215
264
  path = normalize_allowed_first_absolute_symlink(path)
216
- reject_symlink_components(path.parent)
217
- st = os.lstat(path)
218
- if stat.S_ISLNK(st.st_mode):
219
- raise ValueError(f"artifact file must not be a symlink: {path.name}")
220
- if not stat.S_ISREG(st.st_mode):
221
- raise ValueError(f"artifact file must be a regular file: {path.name}")
222
- return int(st.st_size)
265
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
266
+ try:
267
+ leaf = path.name
268
+ if leaf in {"", ".", ".."}:
269
+ raise ValueError("artifact file must name a regular file")
270
+ if not DIR_FD_STAT_SUPPORTED:
271
+ raise RuntimeError("artifact reads require dir_fd stat support")
272
+ st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
273
+ if stat.S_ISLNK(st.st_mode):
274
+ raise ValueError(f"artifact file must not be a symlink: {path.name}")
275
+ if not stat.S_ISREG(st.st_mode):
276
+ raise ValueError(f"artifact file must be a regular file: {path.name}")
277
+ return int(st.st_size)
278
+ finally:
279
+ os.close(parent_fd)
223
280
 
224
281
 
225
282
  def read_bounded_private_text(path: Path, max_bytes: int) -> str:
226
283
  path = normalize_allowed_first_absolute_symlink(path)
227
- size = regular_private_file_size(path)
228
- if size > max_bytes:
229
- raise ValueError(f"artifact file exceeds trusted size cap: {path.name}: {size} > {max_bytes}")
230
- flags = os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
231
- fd = os.open(str(path), flags)
284
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
285
+ flags = os.O_RDONLY | os.O_NOFOLLOW
286
+ if hasattr(os, "O_CLOEXEC"):
287
+ flags |= os.O_CLOEXEC
288
+ leaf = path.name
289
+ if leaf in {"", ".", ".."}:
290
+ os.close(parent_fd)
291
+ raise ValueError("artifact file must name a regular file")
292
+ try:
293
+ fd = os.open(leaf, flags, dir_fd=parent_fd)
294
+ except OSError:
295
+ os.close(parent_fd)
296
+ raise
232
297
  try:
233
298
  st = os.fstat(fd)
234
299
  if not stat.S_ISREG(st.st_mode):
@@ -241,35 +306,161 @@ def read_bounded_private_text(path: Path, max_bytes: int) -> str:
241
306
  return data.decode("utf-8", errors="replace")
242
307
  finally:
243
308
  os.close(fd)
309
+ os.close(parent_fd)
310
+
311
+
312
+ def no_follow_dir_flags() -> int:
313
+ if not NO_FOLLOW_SUPPORTED:
314
+ raise RuntimeError("artifact writes require O_NOFOLLOW support")
315
+ flags = os.O_RDONLY | os.O_NOFOLLOW
316
+ if hasattr(os, "O_CLOEXEC"):
317
+ flags |= os.O_CLOEXEC
318
+ if hasattr(os, "O_DIRECTORY"):
319
+ flags |= os.O_DIRECTORY
320
+ return flags
321
+
322
+
323
+ def temp_file_flags() -> int:
324
+ if not NO_FOLLOW_SUPPORTED:
325
+ raise RuntimeError("artifact writes require O_NOFOLLOW support")
326
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW
327
+ if hasattr(os, "O_CLOEXEC"):
328
+ flags |= os.O_CLOEXEC
329
+ if hasattr(os, "O_NOCTTY"):
330
+ flags |= os.O_NOCTTY
331
+ return flags
332
+
333
+
334
+ def open_private_directory_no_follow(path: Path, *, label: str, create: bool) -> int:
335
+ reject_parent_traversal(path, label=label)
336
+ path = normalize_allowed_first_absolute_symlink(path.expanduser())
337
+ if not DIR_FD_OPEN_SUPPORTED:
338
+ raise RuntimeError(f"{label} requires dir_fd open support")
339
+ if create and not DIR_FD_MKDIR_SUPPORTED:
340
+ raise RuntimeError(f"{label} requires dir_fd mkdir support")
341
+ flags = no_follow_dir_flags()
342
+ if path.is_absolute():
343
+ current_fd = os.open(path.anchor or os.sep, os.O_RDONLY | (os.O_CLOEXEC if hasattr(os, "O_CLOEXEC") else 0))
344
+ parts = path.parts[1:]
345
+ else:
346
+ current_fd = os.open(".", flags)
347
+ parts = path.parts
348
+ try:
349
+ for part in parts:
350
+ if part in {"", "."}:
351
+ continue
352
+ if part == "..":
353
+ raise RuntimeError(f"{label} must not contain parent traversal")
354
+ try:
355
+ next_fd = os.open(part, flags, dir_fd=current_fd)
356
+ except FileNotFoundError:
357
+ if not create:
358
+ raise
359
+ os.mkdir(part, 0o700, dir_fd=current_fd)
360
+ next_fd = os.open(part, flags, dir_fd=current_fd)
361
+ try:
362
+ if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
363
+ raise RuntimeError(f"{label} must not traverse non-directory components")
364
+ except Exception:
365
+ os.close(next_fd)
366
+ raise
367
+ os.close(current_fd)
368
+ current_fd = next_fd
369
+ owned_fd = current_fd
370
+ current_fd = -1
371
+ return owned_fd
372
+ except FileNotFoundError:
373
+ raise
374
+ except OSError as exc:
375
+ raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
376
+ finally:
377
+ if current_fd >= 0:
378
+ os.close(current_fd)
244
379
 
245
380
 
246
- def write_private_text(path: Path, text: str) -> None:
247
- path = normalize_allowed_first_absolute_symlink(path)
248
- ensure_private_dir(path.parent)
249
- tmp = path.with_name(path.name + f".tmp-{os.getpid()}-{time.time_ns()}")
250
- flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0)
251
- fd = os.open(str(tmp), flags, 0o600)
381
+ def precheck_artifact_leaf(parent_fd: int, leaf: str, *, label: str) -> None:
382
+ if not DIR_FD_STAT_SUPPORTED:
383
+ raise RuntimeError(f"{label} requires dir_fd stat support")
252
384
  try:
253
- with os.fdopen(fd, "w", encoding="utf-8", newline="") as handle:
254
- handle.write(text)
255
- except Exception:
256
- try:
257
- tmp.unlink()
258
- except FileNotFoundError:
259
- pass
260
- raise
385
+ st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
386
+ except FileNotFoundError:
387
+ return
388
+ except OSError as exc:
389
+ raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
390
+ if not stat.S_ISREG(st.st_mode):
391
+ raise RuntimeError(f"{label} must be missing or a regular file")
392
+
393
+
394
+ def write_all_fd(fd: int, data: bytes) -> None:
395
+ view = memoryview(data)
396
+ offset = 0
397
+ while offset < len(view):
398
+ written = os.write(fd, view[offset:])
399
+ if written <= 0:
400
+ raise OSError("short write")
401
+ offset += written
402
+
403
+
404
+ def fsync_required(fd: int, *, label: str, committed: bool = False) -> None:
261
405
  try:
262
- os.replace(tmp, path)
263
- except Exception:
264
- try:
265
- tmp.unlink()
266
- except FileNotFoundError:
267
- pass
268
- raise
406
+ os.fsync(fd)
407
+ except OSError as exc:
408
+ if committed:
409
+ raise RuntimeError(f"committed_but_parent_fsync_failed: {os_error_detail(exc)}") from exc
410
+ raise RuntimeError(f"could not fsync {label}: {os_error_detail(exc)}") from exc
411
+
412
+
413
+ def write_private_text(path: Path, text: str) -> None:
414
+ reject_parent_traversal(path, label="artifact file")
415
+ path = normalize_allowed_first_absolute_symlink(path.expanduser())
416
+ if not DIR_FD_REPLACE_SUPPORTED:
417
+ raise RuntimeError("artifact writes require dir_fd replace support")
418
+ if not DIR_FD_UNLINK_SUPPORTED:
419
+ raise RuntimeError("artifact writes require dir_fd unlink support")
420
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=True)
269
421
  try:
270
- os.chmod(path, 0o600)
422
+ os.fchmod(parent_fd, 0o700)
271
423
  except OSError:
272
424
  pass
425
+ fd = -1
426
+ temp_leaf: str | None = None
427
+ try:
428
+ leaf = path.name
429
+ if leaf in {"", ".", ".."}:
430
+ raise RuntimeError("artifact file must name a regular file")
431
+ precheck_artifact_leaf(parent_fd, leaf, label="artifact file")
432
+ for _attempt in range(20):
433
+ candidate = f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
434
+ try:
435
+ fd = os.open(candidate, temp_file_flags(), 0o600, dir_fd=parent_fd)
436
+ temp_leaf = candidate
437
+ break
438
+ except FileExistsError:
439
+ continue
440
+ if fd < 0 or temp_leaf is None:
441
+ raise RuntimeError("could not create temporary artifact file")
442
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
443
+ raise RuntimeError("temporary artifact file must be a regular file")
444
+ os.fchmod(fd, 0o600)
445
+ write_all_fd(fd, text.encode("utf-8"))
446
+ fsync_required(fd, label="artifact temp file")
447
+ os.close(fd)
448
+ fd = -1
449
+ fsync_required(parent_fd, label="artifact directory before replace")
450
+ os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
451
+ temp_leaf = None
452
+ fsync_required(parent_fd, label="artifact directory after replace", committed=True)
453
+ except OSError as exc:
454
+ raise RuntimeError(f"could not write artifact file: {os_error_detail(exc)}") from exc
455
+ finally:
456
+ if fd >= 0:
457
+ os.close(fd)
458
+ if temp_leaf is not None:
459
+ try:
460
+ os.unlink(temp_leaf, dir_fd=parent_fd)
461
+ except OSError:
462
+ pass
463
+ os.close(parent_fd)
273
464
 
274
465
 
275
466
  def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
@@ -283,6 +474,7 @@ def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
283
474
  def artifact_paths(directory: Path, artifact_id: str) -> tuple[Path, Path]:
284
475
  if not ARTIFACT_ID_RE.fullmatch(artifact_id):
285
476
  raise ValueError("artifact id must be 16-64 lowercase hex chars")
477
+ reject_parent_traversal(directory, label="artifact directory")
286
478
  directory = normalize_allowed_first_absolute_symlink(directory)
287
479
  return directory / f"{artifact_id}.txt", directory / f"{artifact_id}.json"
288
480
 
@@ -295,15 +487,21 @@ def artifact_read_directories(raw_dir: str) -> list[Path]:
295
487
  default. Reads and listings include that legacy default so old receipts keep
296
488
  working; stores intentionally continue to use only the new path.
297
489
  """
298
- primary = normalize_allowed_first_absolute_symlink(Path(raw_dir).expanduser())
490
+ raw_path = Path(raw_dir).expanduser()
491
+ reject_parent_traversal(raw_path, label="artifact directory")
492
+ primary = normalize_allowed_first_absolute_symlink(raw_path)
299
493
  directories = [primary]
300
- if Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR):
494
+ if default_artifact_dir_requested(raw_dir):
301
495
  legacy = normalize_allowed_first_absolute_symlink(Path(LEGACY_ARTIFACT_DIR).expanduser())
302
496
  if legacy != primary:
303
497
  directories.append(legacy)
304
498
  return directories
305
499
 
306
500
 
501
+ def default_artifact_dir_requested(raw_dir: str) -> bool:
502
+ return Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR)
503
+
504
+
307
505
  CONTENT_TYPE_VALUES = ("json", "diff", "log", "search", "code", "prose", "text")
308
506
  # Recommended retrieval strategy per content type. Pattern-oriented payloads
309
507
  # (logs, search hits, diffs) are best sliced by `--pattern`; structured or
@@ -396,6 +594,8 @@ def build_retrieval_hints(
396
594
  content_type: str,
397
595
  strategy: str,
398
596
  total_lines: int,
597
+ raw_dir: str | None = None,
598
+ show_paths: bool = False,
399
599
  ) -> list[dict[str, object]]:
400
600
  """Build deterministic, machine-readable retrieval hints for bounded round-trip.
401
601
 
@@ -413,8 +613,8 @@ def build_retrieval_hints(
413
613
  lines_hint: dict[str, object] = {
414
614
  "type": "lines",
415
615
  "selector": {"start": 1, "end": end_line},
416
- "cli": line_query_cli(artifact_id, 1, end_line),
417
- "exact": total_lines <= MAX_QUERY_LINES,
616
+ "cli": line_query_cli(artifact_id, 1, end_line, raw_dir=raw_dir, show_paths=show_paths),
617
+ "exact": total_lines <= MAX_QUERY_LINES and artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths),
418
618
  }
419
619
  if end_line > DEFAULT_MAX_LINES:
420
620
  lines_hint["max_lines"] = end_line
@@ -436,37 +636,69 @@ def build_retrieval_hints(
436
636
  {
437
637
  "type": "pattern",
438
638
  "selector": {"pattern": anchor},
439
- "cli": f"context-guard-artifact get {artifact_id} --pattern '{anchor}'",
639
+ "cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern {shlex.quote(anchor)}",
440
640
  }
441
641
  )
442
642
  hints.append(
443
643
  {
444
644
  "type": "head",
445
645
  "selector": {"max_lines": DEFAULT_MAX_LINES},
446
- "cli": f"context-guard-artifact get {artifact_id} --max-lines {DEFAULT_MAX_LINES}",
646
+ "cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --max-lines {DEFAULT_MAX_LINES}",
447
647
  }
448
648
  )
449
649
  return hints
450
650
 
451
651
 
452
- def line_query_cli(artifact_id: str, start: int, end: int) -> str:
453
- cli = f"context-guard-artifact get {artifact_id} --lines {start}:{end}"
652
+ def artifact_dir_cli_prefix(raw_dir: str | None, *, show_paths: bool = False) -> str:
653
+ if not raw_dir or default_artifact_dir_requested(raw_dir):
654
+ return "context-guard-artifact"
655
+ if not show_paths:
656
+ return "context-guard-artifact --dir <artifact_dir>"
657
+ return f"context-guard-artifact --dir {shlex.quote(raw_dir)}"
658
+
659
+
660
+ def artifact_dir_cli_is_exact(raw_dir: str | None, *, show_paths: bool = False) -> bool:
661
+ return not raw_dir or default_artifact_dir_requested(raw_dir) or show_paths
662
+
663
+
664
+ def line_query_cli(
665
+ artifact_id: str,
666
+ start: int,
667
+ end: int,
668
+ *,
669
+ raw_dir: str | None = None,
670
+ show_paths: bool = False,
671
+ ) -> str:
672
+ cli = f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --lines {start}:{end}"
454
673
  requested_lines = end - start + 1
455
674
  if requested_lines > DEFAULT_MAX_LINES:
456
675
  cli += f" --max-lines {min(requested_lines, MAX_QUERY_LINES)}"
457
676
  return cli
458
677
 
459
678
 
460
- def line_receipt(artifact_id: str, line_number: int, text: str) -> dict[str, object]:
679
+ def line_receipt(
680
+ artifact_id: str,
681
+ line_number: int,
682
+ text: str,
683
+ *,
684
+ raw_dir: str | None = None,
685
+ show_paths: bool = False,
686
+ ) -> dict[str, object]:
461
687
  return {
462
688
  "line": line_number,
463
689
  "text": cap_digest_text(text.strip()),
464
690
  "selector": {"type": "lines", "start": line_number, "end": line_number},
465
- "cli": line_query_cli(artifact_id, line_number, line_number),
691
+ "cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
466
692
  }
467
693
 
468
694
 
469
- def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[str, object]]:
695
+ def build_top_error_receipts(
696
+ artifact_id: str,
697
+ lines: list[str],
698
+ *,
699
+ raw_dir: str | None = None,
700
+ show_paths: bool = False,
701
+ ) -> list[dict[str, object]]:
470
702
  receipts: list[dict[str, object]] = []
471
703
  seen: set[str] = set()
472
704
  for line_number, line in enumerate(lines, start=1):
@@ -475,7 +707,7 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
475
707
  text = cap_digest_text(line.strip())
476
708
  if not text or text in seen:
477
709
  continue
478
- receipt = line_receipt(artifact_id, line_number, text)
710
+ receipt = line_receipt(artifact_id, line_number, text, raw_dir=raw_dir, show_paths=show_paths)
479
711
  receipts.append(receipt)
480
712
  seen.add(text)
481
713
  if len(receipts) >= MAX_TOP_ERROR_RECEIPTS:
@@ -483,7 +715,14 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
483
715
  return receipts
484
716
 
485
717
 
486
- def build_duplicate_line_groups(artifact_id: str, lines: list[str], *, limit: int = MAX_DUPLICATE_GROUPS) -> list[dict[str, object]]:
718
+ def build_duplicate_line_groups(
719
+ artifact_id: str,
720
+ lines: list[str],
721
+ *,
722
+ limit: int = MAX_DUPLICATE_GROUPS,
723
+ raw_dir: str | None = None,
724
+ show_paths: bool = False,
725
+ ) -> list[dict[str, object]]:
487
726
  counts: dict[str, int] = {}
488
727
  first_line: dict[str, int] = {}
489
728
  for line_number, line in enumerate(lines, start=1):
@@ -506,13 +745,20 @@ def build_duplicate_line_groups(artifact_id: str, lines: list[str], *, limit: in
506
745
  "first_line": line_number,
507
746
  "text": text,
508
747
  "selector": {"type": "lines", "start": line_number, "end": line_number},
509
- "cli": line_query_cli(artifact_id, line_number, line_number),
748
+ "cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
510
749
  }
511
750
  )
512
751
  return groups
513
752
 
514
753
 
515
- def build_digest(sanitized_text: str, *, artifact_id: str, redacted_lines: int) -> dict[str, object]:
754
+ def build_digest(
755
+ sanitized_text: str,
756
+ *,
757
+ artifact_id: str,
758
+ redacted_lines: int,
759
+ raw_dir: str | None = None,
760
+ show_paths: bool = False,
761
+ ) -> dict[str, object]:
516
762
  lines = sanitized_text.splitlines()
517
763
  top_errors = compact_items(
518
764
  (line for line in lines if ERROR_RE.search(line)),
@@ -528,8 +774,8 @@ def build_digest(sanitized_text: str, *, artifact_id: str, redacted_lines: int)
528
774
  "markers": sanitized_text.count("[REDACTED]"),
529
775
  },
530
776
  "top_error_lines": top_errors,
531
- "top_error_receipts": build_top_error_receipts(artifact_id, lines),
532
- "duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines),
777
+ "top_error_receipts": build_top_error_receipts(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
778
+ "duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
533
779
  "representative_head": compact_items(
534
780
  lines,
535
781
  limit=8,
@@ -572,7 +818,198 @@ def suggested_queries_for(metadata: dict[str, object]) -> list[str]:
572
818
  return queries[:MAX_SUGGESTED_QUERIES]
573
819
 
574
820
 
575
- def receipt_for(metadata: dict[str, object]) -> dict[str, object]:
821
+ def artifact_handle(artifact_id: str) -> str:
822
+ return f"contextguard-artifact:{artifact_id}"
823
+
824
+
825
+ def compact_stored_output(metadata: dict[str, object]) -> dict[str, object]:
826
+ stored = metadata.get("stored_output")
827
+ if not isinstance(stored, dict):
828
+ return {}
829
+ compact: dict[str, object] = {}
830
+ for key in ("scope", "bytes", "lines", "sha256", "content_file", "metadata_file"):
831
+ if key in stored:
832
+ compact[key] = stored[key]
833
+ content_type = metadata.get("content_type")
834
+ if isinstance(content_type, str):
835
+ compact["content_type"] = content_type
836
+ return compact
837
+
838
+
839
+ def digest_count(digest: dict[str, object], key: str) -> int:
840
+ value = digest.get(key)
841
+ return len(value) if isinstance(value, list) else 0
842
+
843
+
844
+ def build_output_sandbox_summary(metadata: dict[str, object]) -> dict[str, object]:
845
+ digest = metadata.get("digest")
846
+ if not isinstance(digest, dict):
847
+ return {"status": "stored"}
848
+ summary: dict[str, object] = {
849
+ "status": digest.get("status") or "stored",
850
+ "top_error_count": digest_count(digest, "top_error_lines"),
851
+ "top_error_receipt_count": digest_count(digest, "top_error_receipts"),
852
+ "duplicate_line_group_count": digest_count(digest, "duplicate_line_groups"),
853
+ "representative_head_count": digest_count(digest, "representative_head"),
854
+ "representative_tail_count": digest_count(digest, "representative_tail"),
855
+ }
856
+ redaction_counts = digest.get("redaction_counts")
857
+ if isinstance(redaction_counts, dict):
858
+ summary["redaction_counts"] = {
859
+ str(key): value
860
+ for key, value in redaction_counts.items()
861
+ if isinstance(value, (int, float, str, bool)) or value is None
862
+ }
863
+ elif "redacted_lines" in digest:
864
+ summary["redacted_lines"] = digest.get("redacted_lines")
865
+ capped = digest.get("capped_for_metadata")
866
+ if isinstance(capped, bool):
867
+ summary["capped_for_metadata"] = capped
868
+ return summary
869
+
870
+
871
+ def rehydration_command_record(
872
+ *,
873
+ kind: str,
874
+ cli: str,
875
+ selector: dict[str, object],
876
+ exact: bool,
877
+ note: str | None = None,
878
+ ) -> dict[str, object]:
879
+ record: dict[str, object] = {
880
+ "type": kind,
881
+ "selector": selector,
882
+ "cli": cli,
883
+ "exact": exact,
884
+ }
885
+ if note:
886
+ record["note"] = note
887
+ return record
888
+
889
+
890
+ def build_output_sandbox_rehydration(
891
+ metadata: dict[str, object],
892
+ *,
893
+ raw_dir: str | None = None,
894
+ show_paths: bool = False,
895
+ ) -> dict[str, object]:
896
+ artifact_id = str(metadata["artifact_id"])
897
+ cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
898
+ prefix = artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)
899
+ note = (
900
+ None
901
+ if cli_exact
902
+ else "custom artifact directory is redacted; rerun with the same --dir value or pass --show-paths for a directly executable local command"
903
+ )
904
+ commands: list[dict[str, object]] = [
905
+ rehydration_command_record(
906
+ kind="metadata",
907
+ selector={"type": "receipt"},
908
+ cli=f"{prefix} receipt {artifact_id} --json",
909
+ exact=cli_exact,
910
+ note=note,
911
+ )
912
+ ]
913
+
914
+ retrieval = metadata.get("retrieval")
915
+ hints = retrieval.get("hints") if isinstance(retrieval, dict) else None
916
+ if isinstance(hints, list):
917
+ for hint in hints:
918
+ if not isinstance(hint, dict):
919
+ continue
920
+ hint_type = hint.get("type")
921
+ selector = hint.get("selector")
922
+ if not isinstance(selector, dict):
923
+ selector = {}
924
+ cli: str | None = None
925
+ exact = bool(hint.get("exact", True)) and cli_exact
926
+ if hint_type == "lines":
927
+ start = selector.get("start")
928
+ end = selector.get("end")
929
+ if isinstance(start, int) and isinstance(end, int):
930
+ cli = line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths)
931
+ elif hint_type == "pattern":
932
+ pattern = selector.get("pattern")
933
+ if isinstance(pattern, str) and pattern:
934
+ cli = f"{prefix} get {artifact_id} --pattern {shlex.quote(pattern)}"
935
+ elif hint_type == "head":
936
+ max_lines = selector.get("max_lines")
937
+ if isinstance(max_lines, int) and max_lines > 0:
938
+ cli = f"{prefix} get {artifact_id} --max-lines {max_lines}"
939
+ if cli is None:
940
+ raw_cli = hint.get("cli")
941
+ cli = raw_cli if isinstance(raw_cli, str) and raw_cli else None
942
+ if cli:
943
+ commands.append(
944
+ rehydration_command_record(
945
+ kind=str(hint_type or "query"),
946
+ selector=selector,
947
+ cli=cli,
948
+ exact=exact,
949
+ note=note if not cli_exact else str(hint.get("note") or "") or None,
950
+ )
951
+ )
952
+ if len(commands) >= 5:
953
+ break
954
+
955
+ digest = metadata.get("digest")
956
+ top_error_lines = digest.get("top_error_lines") if isinstance(digest, dict) else None
957
+ if isinstance(top_error_lines, list):
958
+ anchor = first_error_anchor("\n".join(str(line) for line in top_error_lines))
959
+ if anchor and len(commands) < 5:
960
+ commands.append(
961
+ rehydration_command_record(
962
+ kind="search",
963
+ selector={"type": "literal", "pattern": anchor},
964
+ cli=f"{prefix} search {shlex.quote(anchor)} --json",
965
+ exact=cli_exact,
966
+ note=note,
967
+ )
968
+ )
969
+
970
+ return {
971
+ "commands": commands,
972
+ "dir_argument": "default" if default_artifact_dir_requested(raw_dir or DEFAULT_ARTIFACT_DIR) else ("included" if show_paths else "redacted"),
973
+ "exact_commands": cli_exact,
974
+ "note": note,
975
+ }
976
+
977
+
978
+ def build_output_sandbox_envelope(
979
+ metadata: dict[str, object],
980
+ *,
981
+ raw_dir: str | None = None,
982
+ show_paths: bool = False,
983
+ ) -> dict[str, object]:
984
+ artifact_id = str(metadata["artifact_id"])
985
+ return {
986
+ "schema_version": OUTPUT_SANDBOX_SCHEMA_VERSION,
987
+ "mode": "local_artifact_receipt",
988
+ "handle": artifact_handle(artifact_id),
989
+ "artifact_id": artifact_id,
990
+ "stored_output": compact_stored_output(metadata),
991
+ "summary": build_output_sandbox_summary(metadata),
992
+ "rehydration": build_output_sandbox_rehydration(metadata, raw_dir=raw_dir, show_paths=show_paths),
993
+ "agent_guidance": [
994
+ "Keep this compact receipt in agent context instead of pasting the full output.",
995
+ "Before relying on omitted details, rehydrate the exact sanitized slice with one of rehydration.commands[].cli.",
996
+ "For repeated diagnostics, query narrower lines or literal matches instead of rerunning broad commands unchanged.",
997
+ ],
998
+ "claim_boundary": {
999
+ "local_only": True,
1000
+ "stored_content_is_sanitized_copy": True,
1001
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
1002
+ "exact_rehydration_required_before_relying_on_omitted_detail": True,
1003
+ },
1004
+ }
1005
+
1006
+
1007
+ def receipt_for(
1008
+ metadata: dict[str, object],
1009
+ *,
1010
+ raw_dir: str | None = None,
1011
+ show_paths: bool = False,
1012
+ ) -> dict[str, object]:
576
1013
  artifact_id = str(metadata["artifact_id"])
577
1014
  return {
578
1015
  "artifact_id": artifact_id,
@@ -585,11 +1022,12 @@ def receipt_for(metadata: dict[str, object]) -> dict[str, object]:
585
1022
  "digest": metadata.get("digest"),
586
1023
  "retrieval": metadata.get("retrieval"),
587
1024
  "available_queries": [
588
- f"context-guard-artifact get {artifact_id} --lines 1:80",
589
- f"context-guard-artifact get {artifact_id} --pattern ERROR --max-lines 40",
590
- f"context-guard-artifact get {artifact_id} --json --lines 1:20",
1025
+ line_query_cli(artifact_id, 1, 80, raw_dir=raw_dir, show_paths=show_paths),
1026
+ f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern ERROR --max-lines 40",
1027
+ f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --json --lines 1:20",
591
1028
  ],
592
1029
  "suggested_queries": suggested_queries_for(metadata),
1030
+ "output_sandbox": build_output_sandbox_envelope(metadata, raw_dir=raw_dir, show_paths=show_paths),
593
1031
  }
594
1032
 
595
1033
 
@@ -699,7 +1137,13 @@ def store_command(args: argparse.Namespace) -> int:
699
1137
  "content_file": content_path.name,
700
1138
  "metadata_file": meta_path.name,
701
1139
  },
702
- "digest": build_digest(sanitized_text, artifact_id=artifact_id, redacted_lines=redacted_lines),
1140
+ "digest": build_digest(
1141
+ sanitized_text,
1142
+ artifact_id=artifact_id,
1143
+ redacted_lines=redacted_lines,
1144
+ raw_dir=args.dir,
1145
+ show_paths=args.show_paths,
1146
+ ),
703
1147
  "retrieval": {
704
1148
  "strategy": strategy,
705
1149
  "deterministic": True,
@@ -709,17 +1153,22 @@ def store_command(args: argparse.Namespace) -> int:
709
1153
  content_type=content_type,
710
1154
  strategy=strategy,
711
1155
  total_lines=total_lines,
1156
+ raw_dir=args.dir,
1157
+ show_paths=args.show_paths,
712
1158
  ),
713
1159
  },
714
1160
  }
715
1161
  shrink_digest_for_metadata_cap(metadata)
716
1162
  write_private_text(content_path, sanitized_text)
717
1163
  write_private_text(meta_path, metadata_json_text(metadata))
718
- receipt = receipt_for(metadata)
1164
+ receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=args.show_paths)
719
1165
  if args.json:
720
1166
  print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
721
1167
  else:
722
1168
  print(f"artifact_id={artifact_id}")
1169
+ sandbox = receipt.get("output_sandbox")
1170
+ handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
1171
+ print(f"handle={handle}")
723
1172
  stored = receipt["stored_output"]
724
1173
  if isinstance(stored, dict):
725
1174
  print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
@@ -728,7 +1177,16 @@ def store_command(args: argparse.Namespace) -> int:
728
1177
  print("top_error_lines:")
729
1178
  for line in digest["top_error_lines"]: # type: ignore[index]
730
1179
  print(f"- {line}")
731
- print(f"query=context-guard-artifact get {artifact_id} --lines 1:80")
1180
+ available_queries = receipt.get("available_queries")
1181
+ if isinstance(available_queries, list) and available_queries:
1182
+ print(f"query={available_queries[0]}")
1183
+ rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
1184
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
1185
+ if isinstance(commands, list):
1186
+ for command in commands:
1187
+ if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
1188
+ print(f"rehydrate={command['cli']}")
1189
+ break
732
1190
  return 0
733
1191
 
734
1192
 
@@ -745,6 +1203,26 @@ def load_metadata(directory: Path, artifact_id: str) -> dict[str, object]:
745
1203
  return data
746
1204
 
747
1205
 
1206
+ def load_verified_artifact(directory: Path, artifact_id: str) -> tuple[dict[str, object], Path, str]:
1207
+ metadata = load_metadata(directory, artifact_id)
1208
+ content_path, _meta_path = artifact_paths(directory, artifact_id)
1209
+ stored_output = metadata.get("stored_output")
1210
+ expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
1211
+ if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
1212
+ raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
1213
+ expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
1214
+ if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
1215
+ raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
1216
+ actual_size = regular_private_file_size(content_path)
1217
+ if actual_size != expected_bytes:
1218
+ raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
1219
+ content = read_bounded_private_text(content_path, expected_bytes)
1220
+ actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
1221
+ if actual_sha != expected_sha:
1222
+ raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
1223
+ return metadata, content_path, content
1224
+
1225
+
748
1226
  def parse_line_range(value: str | None) -> tuple[int, int] | None:
749
1227
  if not value:
750
1228
  return None
@@ -766,6 +1244,149 @@ def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
766
1244
  return text[:keep].rstrip() + marker, True
767
1245
 
768
1246
 
1247
+ def search_literal(value: str) -> str:
1248
+ if not value:
1249
+ raise ValueError("search pattern must not be empty")
1250
+ if "\x00" in value:
1251
+ raise ValueError("search pattern must not contain NUL bytes")
1252
+ size = len(value.encode("utf-8", errors="replace"))
1253
+ if size > MAX_SEARCH_PATTERN_BYTES:
1254
+ raise ValueError(f"search pattern exceeds {MAX_SEARCH_PATTERN_BYTES} bytes")
1255
+ return value
1256
+
1257
+
1258
+ def safe_query_label(value: str) -> str:
1259
+ return sanitize_one_line(value, show_paths=False)
1260
+
1261
+
1262
+ def artifact_dir_label(raw_dir: str) -> str:
1263
+ if default_artifact_dir_requested(raw_dir):
1264
+ return "default"
1265
+ return sanitize_one_line(raw_dir, show_paths=False)
1266
+
1267
+
1268
+ def metadata_text_field(metadata: dict[str, object], key: str) -> str | None:
1269
+ value = metadata.get(key)
1270
+ if not isinstance(value, str):
1271
+ return None
1272
+ return sanitize_one_line(value, show_paths=False)
1273
+
1274
+
1275
+ def metadata_content_type(metadata: dict[str, object]) -> str:
1276
+ value = metadata.get("content_type")
1277
+ return value if isinstance(value, str) and value in CONTENT_TYPE_VALUES else "text"
1278
+
1279
+
1280
+ def metadata_candidate_paths(directory: Path, limit: int) -> tuple[list[Path], int, int]:
1281
+ candidates: list[Path] = []
1282
+ skipped = 0
1283
+ truncated_lower_bound = 0
1284
+ if limit <= 0:
1285
+ return candidates, skipped, 0
1286
+ try:
1287
+ with os.scandir(directory) as entries:
1288
+ for entry in entries:
1289
+ name = entry.name
1290
+ if not name.endswith(".json"):
1291
+ continue
1292
+ if not ARTIFACT_ID_RE.fullmatch(name[:-5]):
1293
+ skipped += 1
1294
+ continue
1295
+ try:
1296
+ if not entry.is_file(follow_symlinks=False):
1297
+ skipped += 1
1298
+ continue
1299
+ except OSError:
1300
+ skipped += 1
1301
+ continue
1302
+ if len(candidates) >= limit:
1303
+ truncated_lower_bound += 1
1304
+ break
1305
+ candidates.append(directory / name)
1306
+ except OSError:
1307
+ return candidates, skipped + 1, truncated_lower_bound
1308
+ return sorted(candidates), skipped, truncated_lower_bound
1309
+
1310
+
1311
+ def search_match_record(
1312
+ *,
1313
+ artifact_id: str,
1314
+ line_number: int,
1315
+ lines: list[str],
1316
+ context_lines: int,
1317
+ snippet_chars: int,
1318
+ metadata: dict[str, object],
1319
+ raw_dir: str,
1320
+ show_paths: bool,
1321
+ ) -> dict[str, object]:
1322
+ start = max(1, line_number - context_lines)
1323
+ end = min(len(lines), line_number + context_lines)
1324
+ cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
1325
+
1326
+ def line_item(number: int) -> dict[str, object]:
1327
+ return {"line": number, "text": cap_line(lines[number - 1].rstrip("\n"), limit=snippet_chars)}
1328
+
1329
+ return {
1330
+ "artifact_id": artifact_id,
1331
+ "line": line_number,
1332
+ "text": cap_line(lines[line_number - 1].rstrip("\n"), limit=snippet_chars),
1333
+ "context_before": [line_item(number) for number in range(start, line_number)],
1334
+ "context_after": [line_item(number) for number in range(line_number + 1, end + 1)],
1335
+ "content_type": metadata_content_type(metadata),
1336
+ "command_preview": metadata_text_field(metadata, "command_preview"),
1337
+ "retrieval": {
1338
+ "selector": {"type": "lines", "start": start, "end": end},
1339
+ "cli": line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths),
1340
+ "exact": cli_exact,
1341
+ "dir_argument": "default" if default_artifact_dir_requested(raw_dir) else ("included" if show_paths else "redacted"),
1342
+ "note": (
1343
+ None
1344
+ if cli_exact
1345
+ else "custom artifact directory is redacted; rerun with the same --dir used for search, or pass search --show-paths to emit a directly executable local CLI"
1346
+ ),
1347
+ },
1348
+ }
1349
+
1350
+
1351
+ def search_artifact_content(
1352
+ *,
1353
+ artifact_id: str,
1354
+ metadata: dict[str, object],
1355
+ content: str,
1356
+ literal: str,
1357
+ ignore_case: bool,
1358
+ context_lines: int,
1359
+ snippet_chars: int,
1360
+ remaining_matches: int,
1361
+ raw_dir: str,
1362
+ show_paths: bool,
1363
+ ) -> tuple[list[dict[str, object]], int]:
1364
+ lines = content.splitlines()
1365
+ needle = literal.casefold() if ignore_case else literal
1366
+ matches: list[dict[str, object]] = []
1367
+ matched_lines = 0
1368
+ for line_number, line in enumerate(lines, start=1):
1369
+ haystack = line.casefold() if ignore_case else line
1370
+ if needle not in haystack:
1371
+ continue
1372
+ matched_lines += 1
1373
+ if len(matches) >= remaining_matches:
1374
+ continue
1375
+ matches.append(
1376
+ search_match_record(
1377
+ artifact_id=artifact_id,
1378
+ line_number=line_number,
1379
+ lines=lines,
1380
+ context_lines=context_lines,
1381
+ snippet_chars=snippet_chars,
1382
+ metadata=metadata,
1383
+ raw_dir=raw_dir,
1384
+ show_paths=show_paths,
1385
+ )
1386
+ )
1387
+ return matches, matched_lines
1388
+
1389
+
769
1390
  def query_content(
770
1391
  content: str,
771
1392
  *,
@@ -805,8 +1426,7 @@ def get_command(args: argparse.Namespace) -> int:
805
1426
  last_missing: FileNotFoundError | None = None
806
1427
  for directory in artifact_read_directories(args.dir):
807
1428
  try:
808
- metadata = load_metadata(directory, artifact_id)
809
- content_path, _meta_path = artifact_paths(directory, artifact_id)
1429
+ metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
810
1430
  break
811
1431
  except FileNotFoundError as exc:
812
1432
  last_missing = exc
@@ -815,19 +1435,9 @@ def get_command(args: argparse.Namespace) -> int:
815
1435
  raise last_missing
816
1436
  raise FileNotFoundError(f"artifact not found: {artifact_id}")
817
1437
  stored_output = metadata.get("stored_output")
818
- expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
819
- if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
820
- raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
821
1438
  expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
822
- if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
1439
+ if not isinstance(expected_bytes, int):
823
1440
  raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
824
- actual_size = regular_private_file_size(content_path)
825
- if actual_size != expected_bytes:
826
- raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
827
- content = read_bounded_private_text(content_path, expected_bytes)
828
- actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
829
- if actual_sha != expected_sha:
830
- raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
831
1441
  default_max_chars = max(DEFAULT_MAX_CHARS, expected_bytes) if full else DEFAULT_MAX_CHARS
832
1442
  max_chars = bounded_int(args.max_chars, default_max_chars, 1, MAX_MAX_BYTES)
833
1443
  line_range = parse_line_range(args.lines)
@@ -856,6 +1466,176 @@ def get_command(args: argparse.Namespace) -> int:
856
1466
  return 0
857
1467
 
858
1468
 
1469
+ def receipt_command(args: argparse.Namespace) -> int:
1470
+ artifact_id = args.artifact_id
1471
+ try:
1472
+ last_missing: FileNotFoundError | None = None
1473
+ for directory in artifact_read_directories(args.dir):
1474
+ try:
1475
+ metadata, _content_path, _content = load_verified_artifact(directory, artifact_id)
1476
+ break
1477
+ except FileNotFoundError as exc:
1478
+ last_missing = exc
1479
+ else:
1480
+ if last_missing is not None:
1481
+ raise last_missing
1482
+ raise FileNotFoundError(f"artifact not found: {artifact_id}")
1483
+ receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=bool(getattr(args, "show_paths", False)))
1484
+ except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
1485
+ print(f"context-guard-artifact: {exc}", file=sys.stderr)
1486
+ return 1
1487
+ if args.json:
1488
+ print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
1489
+ else:
1490
+ sandbox = receipt.get("output_sandbox")
1491
+ handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
1492
+ print(f"artifact_id={artifact_id}")
1493
+ print(f"handle={handle}")
1494
+ stored = receipt.get("stored_output")
1495
+ if isinstance(stored, dict):
1496
+ print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
1497
+ rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
1498
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
1499
+ if isinstance(commands, list):
1500
+ for command in commands[:4]:
1501
+ if isinstance(command, dict) and command.get("cli"):
1502
+ print(f"rehydrate={command.get('cli')}")
1503
+ print("claim_boundary=local sanitized artifact; no hosted token/cost savings claim")
1504
+ return 0
1505
+
1506
+
1507
+ def search_command(args: argparse.Namespace) -> int:
1508
+ try:
1509
+ literal = search_literal(args.pattern)
1510
+ max_artifacts = bounded_int(args.max_artifacts, DEFAULT_SEARCH_MAX_ARTIFACTS, 1, MAX_SEARCH_MAX_ARTIFACTS)
1511
+ max_matches = bounded_int(args.max_matches, DEFAULT_SEARCH_MAX_MATCHES, 1, MAX_SEARCH_MAX_MATCHES)
1512
+ context_lines = bounded_int(args.context_lines, DEFAULT_SEARCH_CONTEXT_LINES, 0, MAX_SEARCH_CONTEXT_LINES)
1513
+ snippet_chars = bounded_int(args.max_snippet_chars, DEFAULT_SEARCH_SNIPPET_CHARS, 1, MAX_SEARCH_SNIPPET_CHARS)
1514
+ ignore_case = bool(args.ignore_case)
1515
+ matches: list[dict[str, object]] = []
1516
+ seen: set[str] = set()
1517
+ scanned_artifacts = 0
1518
+ skipped_artifacts = 0
1519
+ total_matched_lines = 0
1520
+ meta_candidates_seen = 0
1521
+ scan_truncated = False
1522
+ scan_truncated_count = 0
1523
+ matched_artifact_ids: set[str] = set()
1524
+
1525
+ for directory in artifact_read_directories(args.dir):
1526
+ remaining_candidates = max_artifacts - meta_candidates_seen
1527
+ if remaining_candidates <= 0:
1528
+ scan_truncated = True
1529
+ break
1530
+ try:
1531
+ reject_symlink_components(directory)
1532
+ directory_is_safe = directory.is_dir() and not directory.is_symlink()
1533
+ except RuntimeError:
1534
+ directory_is_safe = False
1535
+ if not directory_is_safe:
1536
+ continue
1537
+ meta_paths, skipped_candidates, truncated_candidates = metadata_candidate_paths(directory, remaining_candidates)
1538
+ skipped_artifacts += skipped_candidates
1539
+ if truncated_candidates:
1540
+ scan_truncated = True
1541
+ scan_truncated_count += truncated_candidates
1542
+ for meta_path in meta_paths:
1543
+ meta_candidates_seen += 1
1544
+ try:
1545
+ data = json.loads(read_bounded_private_text(meta_path, MAX_METADATA_BYTES))
1546
+ except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
1547
+ skipped_artifacts += 1
1548
+ continue
1549
+ artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
1550
+ if not (isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id)) or artifact_id in seen:
1551
+ skipped_artifacts += 1
1552
+ continue
1553
+ seen.add(artifact_id)
1554
+ if scanned_artifacts >= max_artifacts:
1555
+ scan_truncated = True
1556
+ scan_truncated_count += 1
1557
+ continue
1558
+ try:
1559
+ metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
1560
+ except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
1561
+ skipped_artifacts += 1
1562
+ continue
1563
+ scanned_artifacts += 1
1564
+ remaining = max(0, max_matches - len(matches))
1565
+ artifact_matches, artifact_match_count = search_artifact_content(
1566
+ artifact_id=artifact_id,
1567
+ metadata=metadata,
1568
+ content=content,
1569
+ literal=literal,
1570
+ ignore_case=ignore_case,
1571
+ context_lines=context_lines,
1572
+ snippet_chars=snippet_chars,
1573
+ remaining_matches=remaining,
1574
+ raw_dir=args.dir,
1575
+ show_paths=bool(getattr(args, "show_paths", False)),
1576
+ )
1577
+ if artifact_match_count:
1578
+ matched_artifact_ids.add(artifact_id)
1579
+ total_matched_lines += artifact_match_count
1580
+ matches.extend(artifact_matches)
1581
+ payload = {
1582
+ "tool": "context-guard-artifact",
1583
+ "schema_version": SEARCH_SCHEMA_VERSION,
1584
+ "mode": "search",
1585
+ "query": {
1586
+ "label": safe_query_label(literal),
1587
+ "raw_pattern_stored": False,
1588
+ "literal": True,
1589
+ "ignore_case": ignore_case,
1590
+ },
1591
+ "artifact_dir": artifact_dir_label(args.dir),
1592
+ "scanned_artifacts": scanned_artifacts,
1593
+ "skipped_artifacts": skipped_artifacts,
1594
+ "matched_artifacts": len(matched_artifact_ids),
1595
+ "matched_lines": total_matched_lines,
1596
+ "metadata_candidates_scanned": meta_candidates_seen,
1597
+ "matches": matches,
1598
+ "matches_truncated_count": max(0, total_matched_lines - max_matches),
1599
+ "artifact_scan_truncated": scan_truncated,
1600
+ "artifact_scan_truncated_count": scan_truncated_count,
1601
+ "artifact_scan_truncated_count_mode": SEARCH_TRUNCATED_COUNT_UNKNOWN if scan_truncated else "exact",
1602
+ "limits": {
1603
+ "max_artifacts": max_artifacts,
1604
+ "max_matches": max_matches,
1605
+ "context_lines": context_lines,
1606
+ "max_snippet_chars": snippet_chars,
1607
+ },
1608
+ "sandbox": {
1609
+ "local_only": True,
1610
+ "workflow": ["store", "search", "get"],
1611
+ "exact_rehydration": "use matches[].retrieval.cli when exact=true; for redacted custom dirs, reuse the same --dir or opt into --show-paths",
1612
+ },
1613
+ "claim_boundary": {
1614
+ "local_only": True,
1615
+ "stored_content_is_sanitized_copy": True,
1616
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
1617
+ "exact_rehydration_required_before_relying_on_omitted_detail": True,
1618
+ },
1619
+ }
1620
+ except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
1621
+ print(f"context-guard-artifact: {exc}", file=sys.stderr)
1622
+ return 1
1623
+ if args.json:
1624
+ print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))
1625
+ else:
1626
+ for item in payload["matches"]:
1627
+ if isinstance(item, dict):
1628
+ print(f"{item.get('artifact_id')}:{item.get('line')}: {item.get('text')}")
1629
+ retrieval = item.get("retrieval")
1630
+ if isinstance(retrieval, dict):
1631
+ print(f" rehydrate={retrieval.get('cli')}")
1632
+ if not payload["matches"]:
1633
+ print("no matches")
1634
+ elif payload["matches_truncated_count"]:
1635
+ print(f"matches_truncated_count={payload['matches_truncated_count']}")
1636
+ return 0
1637
+
1638
+
859
1639
  def list_command(args: argparse.Namespace) -> int:
860
1640
  items: list[dict[str, object]] = []
861
1641
  seen: set[str] = set()
@@ -874,7 +1654,7 @@ def list_command(args: argparse.Namespace) -> int:
874
1654
  continue
875
1655
  artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
876
1656
  if isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id) and artifact_id not in seen:
877
- items.append(receipt_for(data))
1657
+ items.append(receipt_for(data, raw_dir=args.dir, show_paths=False))
878
1658
  seen.add(artifact_id)
879
1659
  items.sort(key=lambda item: str(item.get("artifact_id", "")))
880
1660
  if args.json:
@@ -915,9 +1695,34 @@ def build_parser() -> argparse.ArgumentParser:
915
1695
  get.add_argument("--json", action="store_true", help="emit query JSON with content")
916
1696
  get.set_defaults(func=get_command)
917
1697
 
1698
+ receipt = subparsers.add_parser("receipt", help="print metadata-only receipt and rehydration handle for a stored artifact")
1699
+ receipt.add_argument("artifact_id")
1700
+ receipt.add_argument(
1701
+ "--show-paths",
1702
+ action="store_true",
1703
+ help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
1704
+ )
1705
+ receipt.add_argument("--json", action="store_true", help="emit receipt JSON without artifact content")
1706
+ receipt.set_defaults(func=receipt_command)
1707
+
918
1708
  list_parser = subparsers.add_parser("list", help="list stored artifacts")
919
1709
  list_parser.add_argument("--json", action="store_true", help="emit list JSON")
920
1710
  list_parser.set_defaults(func=list_command)
1711
+
1712
+ search = subparsers.add_parser("search", help="search stored sanitized artifacts by literal text")
1713
+ search.add_argument("pattern", help=f"literal substring to search for (max {MAX_SEARCH_PATTERN_BYTES} UTF-8 bytes)")
1714
+ search.add_argument("--ignore-case", action="store_true", help="case-insensitive literal search")
1715
+ search.add_argument("--context-lines", type=int, default=DEFAULT_SEARCH_CONTEXT_LINES, help=f"context lines around each match (default: {DEFAULT_SEARCH_CONTEXT_LINES})")
1716
+ search.add_argument("--max-artifacts", type=int, default=DEFAULT_SEARCH_MAX_ARTIFACTS, help=f"maximum artifacts to scan (default: {DEFAULT_SEARCH_MAX_ARTIFACTS})")
1717
+ search.add_argument("--max-matches", type=int, default=DEFAULT_SEARCH_MAX_MATCHES, help=f"maximum match records to return (default: {DEFAULT_SEARCH_MAX_MATCHES})")
1718
+ search.add_argument("--max-snippet-chars", type=int, default=DEFAULT_SEARCH_SNIPPET_CHARS, help=f"maximum characters per displayed line (default: {DEFAULT_SEARCH_SNIPPET_CHARS})")
1719
+ search.add_argument(
1720
+ "--show-paths",
1721
+ action="store_true",
1722
+ help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
1723
+ )
1724
+ search.add_argument("--json", action="store_true", help="emit sandbox search JSON")
1725
+ search.set_defaults(func=search_command)
921
1726
  return parser
922
1727
 
923
1728