@ictechgy/context-guard 0.4.9 → 0.4.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. package/CHANGELOG.md +16 -0
  2. package/README.ko.md +41 -24
  3. package/README.md +66 -26
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  8. package/docs/distribution.md +10 -7
  9. package/docs/experimental-benchmark-fixtures.md +8 -1
  10. package/package.json +3 -6
  11. package/packaging/homebrew/context-guard.rb.template +1 -1
  12. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  13. package/plugins/context-guard/README.ko.md +9 -6
  14. package/plugins/context-guard/README.md +21 -13
  15. package/plugins/context-guard/bin/context-guard +113 -26
  16. package/plugins/context-guard/bin/context-guard-artifact +542 -46
  17. package/plugins/context-guard/bin/context-guard-cache-score +380 -0
  18. package/plugins/context-guard/bin/context-guard-compress +146 -1
  19. package/plugins/context-guard/bin/context-guard-cost +783 -4
  20. package/plugins/context-guard/bin/context-guard-experiments +99 -18
  21. package/plugins/context-guard/bin/context-guard-failed-nudge +3 -0
  22. package/plugins/context-guard/bin/context-guard-filter +163 -7
  23. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  24. package/plugins/context-guard/bin/context-guard-pack +602 -43
  25. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  26. package/plugins/context-guard/bin/context-guard-setup +165 -31
  27. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  28. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  29. package/plugins/context-guard/bin/context-guard-tool-prune +241 -1
  30. package/plugins/context-guard/lib/context_guard_commands.py +206 -0
  31. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  32. package/context-guard-kit/README.md +0 -91
  33. package/context-guard-kit/benchmark_runner.py +0 -2401
  34. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  35. package/context-guard-kit/context_compress.py +0 -695
  36. package/context-guard-kit/context_escrow.py +0 -935
  37. package/context-guard-kit/context_filter.py +0 -637
  38. package/context-guard-kit/context_guard_cli.py +0 -325
  39. package/context-guard-kit/context_guard_diet.py +0 -1711
  40. package/context-guard-kit/context_pack.py +0 -2713
  41. package/context-guard-kit/cost_guard.py +0 -2349
  42. package/context-guard-kit/experimental_registry.py +0 -4348
  43. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  44. package/context-guard-kit/guard_large_read.py +0 -690
  45. package/context-guard-kit/hook_secret_patterns.py +0 -43
  46. package/context-guard-kit/read_symbol.py +0 -483
  47. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  48. package/context-guard-kit/sanitize_output.py +0 -725
  49. package/context-guard-kit/settings.example.json +0 -67
  50. package/context-guard-kit/setup_wizard.py +0 -2515
  51. package/context-guard-kit/statusline.sh +0 -362
  52. package/context-guard-kit/statusline_merged.sh +0 -157
  53. package/context-guard-kit/tool_schema_pruner.py +0 -837
  54. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -10,6 +10,8 @@ import json
10
10
  import os
11
11
  from pathlib import Path
12
12
  import re
13
+ import secrets
14
+ import shlex
13
15
  import stat
14
16
  import sys
15
17
  import time
@@ -30,6 +32,17 @@ MAX_COMMAND_PREVIEW_BYTES = 2_048
30
32
  MAX_TOP_ERROR_RECEIPTS = 12
31
33
  MAX_DUPLICATE_GROUPS = 12
32
34
  MAX_SUGGESTED_QUERIES = 12
35
+ SEARCH_SCHEMA_VERSION = "contextguard.artifact.search.v1"
36
+ DEFAULT_SEARCH_MAX_ARTIFACTS = 100
37
+ MAX_SEARCH_MAX_ARTIFACTS = 1_000
38
+ DEFAULT_SEARCH_MAX_MATCHES = 40
39
+ MAX_SEARCH_MAX_MATCHES = 1_000
40
+ DEFAULT_SEARCH_CONTEXT_LINES = 1
41
+ MAX_SEARCH_CONTEXT_LINES = 20
42
+ DEFAULT_SEARCH_SNIPPET_CHARS = 360
43
+ MAX_SEARCH_SNIPPET_CHARS = 2_000
44
+ MAX_SEARCH_PATTERN_BYTES = 512
45
+ SEARCH_TRUNCATED_COUNT_UNKNOWN = "lower_bound"
33
46
  ARTIFACT_ID_RE = re.compile(r"^[a-f0-9]{16,64}$")
34
47
  ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
35
48
  "tmp": Path("/private/tmp"),
@@ -183,15 +196,50 @@ def sanitize_one_line(text: str, *, show_paths: bool = False) -> str:
183
196
  return cap_utf8_bytes(cap_line(" ".join(sanitized.strip().split())), MAX_COMMAND_PREVIEW_BYTES)
184
197
 
185
198
 
199
+ NO_FOLLOW_SUPPORTED = hasattr(os, "O_NOFOLLOW")
200
+ DIR_FD_OPEN_SUPPORTED = bool(os.supports_dir_fd and os.open in os.supports_dir_fd)
201
+ DIR_FD_MKDIR_SUPPORTED = bool(os.supports_dir_fd and os.mkdir in os.supports_dir_fd)
202
+ DIR_FD_STAT_SUPPORTED = bool(os.supports_dir_fd and os.stat in os.supports_dir_fd)
203
+ DIR_FD_UNLINK_SUPPORTED = bool(os.supports_dir_fd and os.unlink in os.supports_dir_fd)
204
+
205
+
206
+ def dir_fd_replace_supported() -> bool:
207
+ # Some Python builds support src_dir_fd/dst_dir_fd for os.replace without
208
+ # listing os.replace in os.supports_dir_fd, so use a signature/probe-light
209
+ # check instead of os.supports_dir_fd membership.
210
+ try:
211
+ import inspect
212
+
213
+ signature = inspect.signature(os.replace)
214
+ except (TypeError, ValueError):
215
+ return True
216
+ return "src_dir_fd" in signature.parameters and "dst_dir_fd" in signature.parameters
217
+
218
+
219
+ DIR_FD_REPLACE_SUPPORTED = dir_fd_replace_supported()
220
+
221
+
222
+ def os_error_detail(exc: OSError) -> str:
223
+ detail = exc.strerror or str(exc) or exc.__class__.__name__
224
+ if exc.errno is not None:
225
+ return f"{detail} (errno {exc.errno})"
226
+ return detail
227
+
228
+
229
+ def reject_parent_traversal(path: Path, *, label: str) -> None:
230
+ if any(part == ".." for part in path.expanduser().parts):
231
+ raise ValueError(f"{label} must not contain parent traversal")
232
+
233
+
186
234
  def ensure_private_dir(path: Path) -> None:
187
- path = normalize_allowed_first_absolute_symlink(path)
188
- reject_symlink_components(path)
189
- path.mkdir(parents=True, exist_ok=True)
190
- reject_symlink_components(path)
235
+ fd = open_private_directory_no_follow(path, label="artifact directory", create=True)
191
236
  try:
192
- os.chmod(path, 0o700)
193
- except OSError:
194
- pass
237
+ try:
238
+ os.fchmod(fd, 0o700)
239
+ except OSError:
240
+ pass
241
+ finally:
242
+ os.close(fd)
195
243
 
196
244
 
197
245
  def reject_symlink_components(path: Path) -> None:
@@ -243,33 +291,156 @@ def read_bounded_private_text(path: Path, max_bytes: int) -> str:
243
291
  os.close(fd)
244
292
 
245
293
 
246
- def write_private_text(path: Path, text: str) -> None:
247
- path = normalize_allowed_first_absolute_symlink(path)
248
- ensure_private_dir(path.parent)
249
- tmp = path.with_name(path.name + f".tmp-{os.getpid()}-{time.time_ns()}")
250
- flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | getattr(os, "O_NOFOLLOW", 0)
251
- fd = os.open(str(tmp), flags, 0o600)
294
+ def no_follow_dir_flags() -> int:
295
+ if not NO_FOLLOW_SUPPORTED:
296
+ raise RuntimeError("artifact writes require O_NOFOLLOW support")
297
+ flags = os.O_RDONLY | os.O_NOFOLLOW
298
+ if hasattr(os, "O_CLOEXEC"):
299
+ flags |= os.O_CLOEXEC
300
+ if hasattr(os, "O_DIRECTORY"):
301
+ flags |= os.O_DIRECTORY
302
+ return flags
303
+
304
+
305
+ def temp_file_flags() -> int:
306
+ if not NO_FOLLOW_SUPPORTED:
307
+ raise RuntimeError("artifact writes require O_NOFOLLOW support")
308
+ flags = os.O_WRONLY | os.O_CREAT | os.O_EXCL | os.O_NOFOLLOW
309
+ if hasattr(os, "O_CLOEXEC"):
310
+ flags |= os.O_CLOEXEC
311
+ if hasattr(os, "O_NOCTTY"):
312
+ flags |= os.O_NOCTTY
313
+ return flags
314
+
315
+
316
+ def open_private_directory_no_follow(path: Path, *, label: str, create: bool) -> int:
317
+ reject_parent_traversal(path, label=label)
318
+ path = normalize_allowed_first_absolute_symlink(path.expanduser())
319
+ if not DIR_FD_OPEN_SUPPORTED:
320
+ raise RuntimeError(f"{label} requires dir_fd open support")
321
+ if create and not DIR_FD_MKDIR_SUPPORTED:
322
+ raise RuntimeError(f"{label} requires dir_fd mkdir support")
323
+ flags = no_follow_dir_flags()
324
+ if path.is_absolute():
325
+ current_fd = os.open(path.anchor or os.sep, os.O_RDONLY | (os.O_CLOEXEC if hasattr(os, "O_CLOEXEC") else 0))
326
+ parts = path.parts[1:]
327
+ else:
328
+ current_fd = os.open(".", flags)
329
+ parts = path.parts
252
330
  try:
253
- with os.fdopen(fd, "w", encoding="utf-8", newline="") as handle:
254
- handle.write(text)
255
- except Exception:
256
- try:
257
- tmp.unlink()
258
- except FileNotFoundError:
259
- pass
260
- raise
331
+ for part in parts:
332
+ if part in {"", "."}:
333
+ continue
334
+ if part == "..":
335
+ raise RuntimeError(f"{label} must not contain parent traversal")
336
+ try:
337
+ next_fd = os.open(part, flags, dir_fd=current_fd)
338
+ except FileNotFoundError:
339
+ if not create:
340
+ raise
341
+ os.mkdir(part, 0o700, dir_fd=current_fd)
342
+ next_fd = os.open(part, flags, dir_fd=current_fd)
343
+ try:
344
+ if not stat.S_ISDIR(os.fstat(next_fd).st_mode):
345
+ raise RuntimeError(f"{label} must not traverse non-directory components")
346
+ except Exception:
347
+ os.close(next_fd)
348
+ raise
349
+ os.close(current_fd)
350
+ current_fd = next_fd
351
+ owned_fd = current_fd
352
+ current_fd = -1
353
+ return owned_fd
354
+ except OSError as exc:
355
+ raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
356
+ finally:
357
+ if current_fd >= 0:
358
+ os.close(current_fd)
359
+
360
+
361
+ def precheck_artifact_leaf(parent_fd: int, leaf: str, *, label: str) -> None:
362
+ if not DIR_FD_STAT_SUPPORTED:
363
+ raise RuntimeError(f"{label} requires dir_fd stat support")
261
364
  try:
262
- os.replace(tmp, path)
263
- except Exception:
264
- try:
265
- tmp.unlink()
266
- except FileNotFoundError:
267
- pass
268
- raise
365
+ st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
366
+ except FileNotFoundError:
367
+ return
368
+ except OSError as exc:
369
+ raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
370
+ if not stat.S_ISREG(st.st_mode):
371
+ raise RuntimeError(f"{label} must be missing or a regular file")
372
+
373
+
374
+ def write_all_fd(fd: int, data: bytes) -> None:
375
+ view = memoryview(data)
376
+ offset = 0
377
+ while offset < len(view):
378
+ written = os.write(fd, view[offset:])
379
+ if written <= 0:
380
+ raise OSError("short write")
381
+ offset += written
382
+
383
+
384
+ def fsync_required(fd: int, *, label: str, committed: bool = False) -> None:
269
385
  try:
270
- os.chmod(path, 0o600)
386
+ os.fsync(fd)
387
+ except OSError as exc:
388
+ if committed:
389
+ raise RuntimeError(f"committed_but_parent_fsync_failed: {os_error_detail(exc)}") from exc
390
+ raise RuntimeError(f"could not fsync {label}: {os_error_detail(exc)}") from exc
391
+
392
+
393
+ def write_private_text(path: Path, text: str) -> None:
394
+ reject_parent_traversal(path, label="artifact file")
395
+ path = normalize_allowed_first_absolute_symlink(path.expanduser())
396
+ if not DIR_FD_REPLACE_SUPPORTED:
397
+ raise RuntimeError("artifact writes require dir_fd replace support")
398
+ if not DIR_FD_UNLINK_SUPPORTED:
399
+ raise RuntimeError("artifact writes require dir_fd unlink support")
400
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=True)
401
+ try:
402
+ os.fchmod(parent_fd, 0o700)
271
403
  except OSError:
272
404
  pass
405
+ fd = -1
406
+ temp_leaf: str | None = None
407
+ try:
408
+ leaf = path.name
409
+ if leaf in {"", ".", ".."}:
410
+ raise RuntimeError("artifact file must name a regular file")
411
+ precheck_artifact_leaf(parent_fd, leaf, label="artifact file")
412
+ for _attempt in range(20):
413
+ candidate = f".{leaf}.{os.getpid()}.{secrets.token_hex(8)}.tmp"
414
+ try:
415
+ fd = os.open(candidate, temp_file_flags(), 0o600, dir_fd=parent_fd)
416
+ temp_leaf = candidate
417
+ break
418
+ except FileExistsError:
419
+ continue
420
+ if fd < 0 or temp_leaf is None:
421
+ raise RuntimeError("could not create temporary artifact file")
422
+ if not stat.S_ISREG(os.fstat(fd).st_mode):
423
+ raise RuntimeError("temporary artifact file must be a regular file")
424
+ os.fchmod(fd, 0o600)
425
+ write_all_fd(fd, text.encode("utf-8"))
426
+ fsync_required(fd, label="artifact temp file")
427
+ os.close(fd)
428
+ fd = -1
429
+ fsync_required(parent_fd, label="artifact directory before replace")
430
+ os.replace(temp_leaf, leaf, src_dir_fd=parent_fd, dst_dir_fd=parent_fd)
431
+ temp_leaf = None
432
+ fsync_required(parent_fd, label="artifact directory after replace", committed=True)
433
+ except OSError as exc:
434
+ raise RuntimeError(f"could not write artifact file: {os_error_detail(exc)}") from exc
435
+ finally:
436
+ if fd >= 0:
437
+ os.close(fd)
438
+ if temp_leaf is not None:
439
+ try:
440
+ os.unlink(temp_leaf, dir_fd=parent_fd)
441
+ except OSError:
442
+ pass
443
+ os.close(parent_fd)
273
444
 
274
445
 
275
446
  def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
@@ -283,6 +454,7 @@ def read_bounded_stdin(max_bytes: int) -> tuple[str, bool, int]:
283
454
  def artifact_paths(directory: Path, artifact_id: str) -> tuple[Path, Path]:
284
455
  if not ARTIFACT_ID_RE.fullmatch(artifact_id):
285
456
  raise ValueError("artifact id must be 16-64 lowercase hex chars")
457
+ reject_parent_traversal(directory, label="artifact directory")
286
458
  directory = normalize_allowed_first_absolute_symlink(directory)
287
459
  return directory / f"{artifact_id}.txt", directory / f"{artifact_id}.json"
288
460
 
@@ -295,15 +467,21 @@ def artifact_read_directories(raw_dir: str) -> list[Path]:
295
467
  default. Reads and listings include that legacy default so old receipts keep
296
468
  working; stores intentionally continue to use only the new path.
297
469
  """
298
- primary = normalize_allowed_first_absolute_symlink(Path(raw_dir).expanduser())
470
+ raw_path = Path(raw_dir).expanduser()
471
+ reject_parent_traversal(raw_path, label="artifact directory")
472
+ primary = normalize_allowed_first_absolute_symlink(raw_path)
299
473
  directories = [primary]
300
- if Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR):
474
+ if default_artifact_dir_requested(raw_dir):
301
475
  legacy = normalize_allowed_first_absolute_symlink(Path(LEGACY_ARTIFACT_DIR).expanduser())
302
476
  if legacy != primary:
303
477
  directories.append(legacy)
304
478
  return directories
305
479
 
306
480
 
481
+ def default_artifact_dir_requested(raw_dir: str) -> bool:
482
+ return Path(raw_dir).expanduser() == Path(DEFAULT_ARTIFACT_DIR)
483
+
484
+
307
485
  CONTENT_TYPE_VALUES = ("json", "diff", "log", "search", "code", "prose", "text")
308
486
  # Recommended retrieval strategy per content type. Pattern-oriented payloads
309
487
  # (logs, search hits, diffs) are best sliced by `--pattern`; structured or
@@ -449,8 +627,27 @@ def build_retrieval_hints(
449
627
  return hints
450
628
 
451
629
 
452
- def line_query_cli(artifact_id: str, start: int, end: int) -> str:
453
- cli = f"context-guard-artifact get {artifact_id} --lines {start}:{end}"
630
+ def artifact_dir_cli_prefix(raw_dir: str | None, *, show_paths: bool = False) -> str:
631
+ if not raw_dir or default_artifact_dir_requested(raw_dir):
632
+ return "context-guard-artifact"
633
+ if not show_paths:
634
+ return "context-guard-artifact --dir <artifact_dir>"
635
+ return f"context-guard-artifact --dir {shlex.quote(raw_dir)}"
636
+
637
+
638
+ def artifact_dir_cli_is_exact(raw_dir: str | None, *, show_paths: bool = False) -> bool:
639
+ return not raw_dir or default_artifact_dir_requested(raw_dir) or show_paths
640
+
641
+
642
+ def line_query_cli(
643
+ artifact_id: str,
644
+ start: int,
645
+ end: int,
646
+ *,
647
+ raw_dir: str | None = None,
648
+ show_paths: bool = False,
649
+ ) -> str:
650
+ cli = f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --lines {start}:{end}"
454
651
  requested_lines = end - start + 1
455
652
  if requested_lines > DEFAULT_MAX_LINES:
456
653
  cli += f" --max-lines {min(requested_lines, MAX_QUERY_LINES)}"
@@ -745,6 +942,26 @@ def load_metadata(directory: Path, artifact_id: str) -> dict[str, object]:
745
942
  return data
746
943
 
747
944
 
945
+ def load_verified_artifact(directory: Path, artifact_id: str) -> tuple[dict[str, object], Path, str]:
946
+ metadata = load_metadata(directory, artifact_id)
947
+ content_path, _meta_path = artifact_paths(directory, artifact_id)
948
+ stored_output = metadata.get("stored_output")
949
+ expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
950
+ if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
951
+ raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
952
+ expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
953
+ if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
954
+ raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
955
+ actual_size = regular_private_file_size(content_path)
956
+ if actual_size != expected_bytes:
957
+ raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
958
+ content = read_bounded_private_text(content_path, expected_bytes)
959
+ actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
960
+ if actual_sha != expected_sha:
961
+ raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
962
+ return metadata, content_path, content
963
+
964
+
748
965
  def parse_line_range(value: str | None) -> tuple[int, int] | None:
749
966
  if not value:
750
967
  return None
@@ -766,6 +983,149 @@ def cap_text(text: str, max_chars: int) -> tuple[str, bool]:
766
983
  return text[:keep].rstrip() + marker, True
767
984
 
768
985
 
986
+ def search_literal(value: str) -> str:
987
+ if not value:
988
+ raise ValueError("search pattern must not be empty")
989
+ if "\x00" in value:
990
+ raise ValueError("search pattern must not contain NUL bytes")
991
+ size = len(value.encode("utf-8", errors="replace"))
992
+ if size > MAX_SEARCH_PATTERN_BYTES:
993
+ raise ValueError(f"search pattern exceeds {MAX_SEARCH_PATTERN_BYTES} bytes")
994
+ return value
995
+
996
+
997
+ def safe_query_label(value: str) -> str:
998
+ return sanitize_one_line(value, show_paths=False)
999
+
1000
+
1001
+ def artifact_dir_label(raw_dir: str) -> str:
1002
+ if default_artifact_dir_requested(raw_dir):
1003
+ return "default"
1004
+ return sanitize_one_line(raw_dir, show_paths=False)
1005
+
1006
+
1007
+ def metadata_text_field(metadata: dict[str, object], key: str) -> str | None:
1008
+ value = metadata.get(key)
1009
+ if not isinstance(value, str):
1010
+ return None
1011
+ return sanitize_one_line(value, show_paths=False)
1012
+
1013
+
1014
+ def metadata_content_type(metadata: dict[str, object]) -> str:
1015
+ value = metadata.get("content_type")
1016
+ return value if isinstance(value, str) and value in CONTENT_TYPE_VALUES else "text"
1017
+
1018
+
1019
+ def metadata_candidate_paths(directory: Path, limit: int) -> tuple[list[Path], int, int]:
1020
+ candidates: list[Path] = []
1021
+ skipped = 0
1022
+ truncated_lower_bound = 0
1023
+ if limit <= 0:
1024
+ return candidates, skipped, 0
1025
+ try:
1026
+ with os.scandir(directory) as entries:
1027
+ for entry in entries:
1028
+ name = entry.name
1029
+ if not name.endswith(".json"):
1030
+ continue
1031
+ if not ARTIFACT_ID_RE.fullmatch(name[:-5]):
1032
+ skipped += 1
1033
+ continue
1034
+ try:
1035
+ if not entry.is_file(follow_symlinks=False):
1036
+ skipped += 1
1037
+ continue
1038
+ except OSError:
1039
+ skipped += 1
1040
+ continue
1041
+ if len(candidates) >= limit:
1042
+ truncated_lower_bound += 1
1043
+ break
1044
+ candidates.append(directory / name)
1045
+ except OSError:
1046
+ return candidates, skipped + 1, truncated_lower_bound
1047
+ return sorted(candidates), skipped, truncated_lower_bound
1048
+
1049
+
1050
+ def search_match_record(
1051
+ *,
1052
+ artifact_id: str,
1053
+ line_number: int,
1054
+ lines: list[str],
1055
+ context_lines: int,
1056
+ snippet_chars: int,
1057
+ metadata: dict[str, object],
1058
+ raw_dir: str,
1059
+ show_paths: bool,
1060
+ ) -> dict[str, object]:
1061
+ start = max(1, line_number - context_lines)
1062
+ end = min(len(lines), line_number + context_lines)
1063
+ cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
1064
+
1065
+ def line_item(number: int) -> dict[str, object]:
1066
+ return {"line": number, "text": cap_line(lines[number - 1].rstrip("\n"), limit=snippet_chars)}
1067
+
1068
+ return {
1069
+ "artifact_id": artifact_id,
1070
+ "line": line_number,
1071
+ "text": cap_line(lines[line_number - 1].rstrip("\n"), limit=snippet_chars),
1072
+ "context_before": [line_item(number) for number in range(start, line_number)],
1073
+ "context_after": [line_item(number) for number in range(line_number + 1, end + 1)],
1074
+ "content_type": metadata_content_type(metadata),
1075
+ "command_preview": metadata_text_field(metadata, "command_preview"),
1076
+ "retrieval": {
1077
+ "selector": {"type": "lines", "start": start, "end": end},
1078
+ "cli": line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths),
1079
+ "exact": cli_exact,
1080
+ "dir_argument": "default" if default_artifact_dir_requested(raw_dir) else ("included" if show_paths else "redacted"),
1081
+ "note": (
1082
+ None
1083
+ if cli_exact
1084
+ else "custom artifact directory is redacted; rerun with the same --dir used for search, or pass search --show-paths to emit a directly executable local CLI"
1085
+ ),
1086
+ },
1087
+ }
1088
+
1089
+
1090
+ def search_artifact_content(
1091
+ *,
1092
+ artifact_id: str,
1093
+ metadata: dict[str, object],
1094
+ content: str,
1095
+ literal: str,
1096
+ ignore_case: bool,
1097
+ context_lines: int,
1098
+ snippet_chars: int,
1099
+ remaining_matches: int,
1100
+ raw_dir: str,
1101
+ show_paths: bool,
1102
+ ) -> tuple[list[dict[str, object]], int]:
1103
+ lines = content.splitlines()
1104
+ needle = literal.casefold() if ignore_case else literal
1105
+ matches: list[dict[str, object]] = []
1106
+ matched_lines = 0
1107
+ for line_number, line in enumerate(lines, start=1):
1108
+ haystack = line.casefold() if ignore_case else line
1109
+ if needle not in haystack:
1110
+ continue
1111
+ matched_lines += 1
1112
+ if len(matches) >= remaining_matches:
1113
+ continue
1114
+ matches.append(
1115
+ search_match_record(
1116
+ artifact_id=artifact_id,
1117
+ line_number=line_number,
1118
+ lines=lines,
1119
+ context_lines=context_lines,
1120
+ snippet_chars=snippet_chars,
1121
+ metadata=metadata,
1122
+ raw_dir=raw_dir,
1123
+ show_paths=show_paths,
1124
+ )
1125
+ )
1126
+ return matches, matched_lines
1127
+
1128
+
769
1129
  def query_content(
770
1130
  content: str,
771
1131
  *,
@@ -805,8 +1165,7 @@ def get_command(args: argparse.Namespace) -> int:
805
1165
  last_missing: FileNotFoundError | None = None
806
1166
  for directory in artifact_read_directories(args.dir):
807
1167
  try:
808
- metadata = load_metadata(directory, artifact_id)
809
- content_path, _meta_path = artifact_paths(directory, artifact_id)
1168
+ metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
810
1169
  break
811
1170
  except FileNotFoundError as exc:
812
1171
  last_missing = exc
@@ -815,19 +1174,9 @@ def get_command(args: argparse.Namespace) -> int:
815
1174
  raise last_missing
816
1175
  raise FileNotFoundError(f"artifact not found: {artifact_id}")
817
1176
  stored_output = metadata.get("stored_output")
818
- expected_sha = stored_output.get("sha256") if isinstance(stored_output, dict) else None
819
- if not isinstance(expected_sha, str) or not re.fullmatch(r"[a-f0-9]{64}", expected_sha):
820
- raise ValueError(f"artifact metadata missing stored_output sha256: {artifact_id}")
821
1177
  expected_bytes = stored_output.get("bytes") if isinstance(stored_output, dict) else None
822
- if not isinstance(expected_bytes, int) or expected_bytes < 0 or expected_bytes > MAX_MAX_BYTES:
1178
+ if not isinstance(expected_bytes, int):
823
1179
  raise ValueError(f"artifact metadata has invalid stored_output bytes: {artifact_id}")
824
- actual_size = regular_private_file_size(content_path)
825
- if actual_size != expected_bytes:
826
- raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
827
- content = read_bounded_private_text(content_path, expected_bytes)
828
- actual_sha = hashlib.sha256(content.encode("utf-8", errors="replace")).hexdigest()
829
- if actual_sha != expected_sha:
830
- raise ValueError(f"artifact content checksum mismatch: {artifact_id}")
831
1180
  default_max_chars = max(DEFAULT_MAX_CHARS, expected_bytes) if full else DEFAULT_MAX_CHARS
832
1181
  max_chars = bounded_int(args.max_chars, default_max_chars, 1, MAX_MAX_BYTES)
833
1182
  line_range = parse_line_range(args.lines)
@@ -856,6 +1205,138 @@ def get_command(args: argparse.Namespace) -> int:
856
1205
  return 0
857
1206
 
858
1207
 
1208
+ def search_command(args: argparse.Namespace) -> int:
1209
+ try:
1210
+ literal = search_literal(args.pattern)
1211
+ max_artifacts = bounded_int(args.max_artifacts, DEFAULT_SEARCH_MAX_ARTIFACTS, 1, MAX_SEARCH_MAX_ARTIFACTS)
1212
+ max_matches = bounded_int(args.max_matches, DEFAULT_SEARCH_MAX_MATCHES, 1, MAX_SEARCH_MAX_MATCHES)
1213
+ context_lines = bounded_int(args.context_lines, DEFAULT_SEARCH_CONTEXT_LINES, 0, MAX_SEARCH_CONTEXT_LINES)
1214
+ snippet_chars = bounded_int(args.max_snippet_chars, DEFAULT_SEARCH_SNIPPET_CHARS, 1, MAX_SEARCH_SNIPPET_CHARS)
1215
+ ignore_case = bool(args.ignore_case)
1216
+ matches: list[dict[str, object]] = []
1217
+ seen: set[str] = set()
1218
+ scanned_artifacts = 0
1219
+ skipped_artifacts = 0
1220
+ total_matched_lines = 0
1221
+ meta_candidates_seen = 0
1222
+ scan_truncated = False
1223
+ scan_truncated_count = 0
1224
+ matched_artifact_ids: set[str] = set()
1225
+
1226
+ for directory in artifact_read_directories(args.dir):
1227
+ remaining_candidates = max_artifacts - meta_candidates_seen
1228
+ if remaining_candidates <= 0:
1229
+ scan_truncated = True
1230
+ break
1231
+ try:
1232
+ reject_symlink_components(directory)
1233
+ directory_is_safe = directory.is_dir() and not directory.is_symlink()
1234
+ except RuntimeError:
1235
+ directory_is_safe = False
1236
+ if not directory_is_safe:
1237
+ continue
1238
+ meta_paths, skipped_candidates, truncated_candidates = metadata_candidate_paths(directory, remaining_candidates)
1239
+ skipped_artifacts += skipped_candidates
1240
+ if truncated_candidates:
1241
+ scan_truncated = True
1242
+ scan_truncated_count += truncated_candidates
1243
+ for meta_path in meta_paths:
1244
+ meta_candidates_seen += 1
1245
+ try:
1246
+ data = json.loads(read_bounded_private_text(meta_path, MAX_METADATA_BYTES))
1247
+ except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
1248
+ skipped_artifacts += 1
1249
+ continue
1250
+ artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
1251
+ if not (isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id)) or artifact_id in seen:
1252
+ skipped_artifacts += 1
1253
+ continue
1254
+ seen.add(artifact_id)
1255
+ if scanned_artifacts >= max_artifacts:
1256
+ scan_truncated = True
1257
+ scan_truncated_count += 1
1258
+ continue
1259
+ try:
1260
+ metadata, _content_path, content = load_verified_artifact(directory, artifact_id)
1261
+ except (OSError, ValueError, RuntimeError, json.JSONDecodeError):
1262
+ skipped_artifacts += 1
1263
+ continue
1264
+ scanned_artifacts += 1
1265
+ remaining = max(0, max_matches - len(matches))
1266
+ artifact_matches, artifact_match_count = search_artifact_content(
1267
+ artifact_id=artifact_id,
1268
+ metadata=metadata,
1269
+ content=content,
1270
+ literal=literal,
1271
+ ignore_case=ignore_case,
1272
+ context_lines=context_lines,
1273
+ snippet_chars=snippet_chars,
1274
+ remaining_matches=remaining,
1275
+ raw_dir=args.dir,
1276
+ show_paths=bool(getattr(args, "show_paths", False)),
1277
+ )
1278
+ if artifact_match_count:
1279
+ matched_artifact_ids.add(artifact_id)
1280
+ total_matched_lines += artifact_match_count
1281
+ matches.extend(artifact_matches)
1282
+ payload = {
1283
+ "tool": "context-guard-artifact",
1284
+ "schema_version": SEARCH_SCHEMA_VERSION,
1285
+ "mode": "search",
1286
+ "query": {
1287
+ "label": safe_query_label(literal),
1288
+ "raw_pattern_stored": False,
1289
+ "literal": True,
1290
+ "ignore_case": ignore_case,
1291
+ },
1292
+ "artifact_dir": artifact_dir_label(args.dir),
1293
+ "scanned_artifacts": scanned_artifacts,
1294
+ "skipped_artifacts": skipped_artifacts,
1295
+ "matched_artifacts": len(matched_artifact_ids),
1296
+ "matched_lines": total_matched_lines,
1297
+ "metadata_candidates_scanned": meta_candidates_seen,
1298
+ "matches": matches,
1299
+ "matches_truncated_count": max(0, total_matched_lines - max_matches),
1300
+ "artifact_scan_truncated": scan_truncated,
1301
+ "artifact_scan_truncated_count": scan_truncated_count,
1302
+ "artifact_scan_truncated_count_mode": SEARCH_TRUNCATED_COUNT_UNKNOWN if scan_truncated else "exact",
1303
+ "limits": {
1304
+ "max_artifacts": max_artifacts,
1305
+ "max_matches": max_matches,
1306
+ "context_lines": context_lines,
1307
+ "max_snippet_chars": snippet_chars,
1308
+ },
1309
+ "sandbox": {
1310
+ "local_only": True,
1311
+ "workflow": ["store", "search", "get"],
1312
+ "exact_rehydration": "use matches[].retrieval.cli when exact=true; for redacted custom dirs, reuse the same --dir or opt into --show-paths",
1313
+ },
1314
+ "claim_boundary": {
1315
+ "local_only": True,
1316
+ "stored_content_is_sanitized_copy": True,
1317
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
1318
+ "exact_rehydration_required_before_relying_on_omitted_detail": True,
1319
+ },
1320
+ }
1321
+ except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
1322
+ print(f"context-guard-artifact: {exc}", file=sys.stderr)
1323
+ return 1
1324
+ if args.json:
1325
+ print(json.dumps(payload, ensure_ascii=False, indent=2, sort_keys=True))
1326
+ else:
1327
+ for item in payload["matches"]:
1328
+ if isinstance(item, dict):
1329
+ print(f"{item.get('artifact_id')}:{item.get('line')}: {item.get('text')}")
1330
+ retrieval = item.get("retrieval")
1331
+ if isinstance(retrieval, dict):
1332
+ print(f" rehydrate={retrieval.get('cli')}")
1333
+ if not payload["matches"]:
1334
+ print("no matches")
1335
+ elif payload["matches_truncated_count"]:
1336
+ print(f"matches_truncated_count={payload['matches_truncated_count']}")
1337
+ return 0
1338
+
1339
+
859
1340
  def list_command(args: argparse.Namespace) -> int:
860
1341
  items: list[dict[str, object]] = []
861
1342
  seen: set[str] = set()
@@ -918,6 +1399,21 @@ def build_parser() -> argparse.ArgumentParser:
918
1399
  list_parser = subparsers.add_parser("list", help="list stored artifacts")
919
1400
  list_parser.add_argument("--json", action="store_true", help="emit list JSON")
920
1401
  list_parser.set_defaults(func=list_command)
1402
+
1403
+ search = subparsers.add_parser("search", help="search stored sanitized artifacts by literal text")
1404
+ search.add_argument("pattern", help=f"literal substring to search for (max {MAX_SEARCH_PATTERN_BYTES} UTF-8 bytes)")
1405
+ search.add_argument("--ignore-case", action="store_true", help="case-insensitive literal search")
1406
+ search.add_argument("--context-lines", type=int, default=DEFAULT_SEARCH_CONTEXT_LINES, help=f"context lines around each match (default: {DEFAULT_SEARCH_CONTEXT_LINES})")
1407
+ search.add_argument("--max-artifacts", type=int, default=DEFAULT_SEARCH_MAX_ARTIFACTS, help=f"maximum artifacts to scan (default: {DEFAULT_SEARCH_MAX_ARTIFACTS})")
1408
+ search.add_argument("--max-matches", type=int, default=DEFAULT_SEARCH_MAX_MATCHES, help=f"maximum match records to return (default: {DEFAULT_SEARCH_MAX_MATCHES})")
1409
+ search.add_argument("--max-snippet-chars", type=int, default=DEFAULT_SEARCH_SNIPPET_CHARS, help=f"maximum characters per displayed line (default: {DEFAULT_SEARCH_SNIPPET_CHARS})")
1410
+ search.add_argument(
1411
+ "--show-paths",
1412
+ action="store_true",
1413
+ help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
1414
+ )
1415
+ search.add_argument("--json", action="store_true", help="emit sandbox search JSON")
1416
+ search.set_defaults(func=search_command)
921
1417
  return parser
922
1418
 
923
1419