@ictechgy/context-guard 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.ko.md +46 -28
  3. package/README.md +42 -33
  4. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  5. package/docs/benchmark-workflow-examples.md +3 -0
  6. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  7. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  8. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  9. package/docs/experimental-benchmark-fixtures.md +24 -7
  10. package/package.json +2 -1
  11. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  12. package/plugins/context-guard/README.ko.md +14 -11
  13. package/plugins/context-guard/README.md +15 -14
  14. package/plugins/context-guard/bin/context-guard +48 -17
  15. package/plugins/context-guard/bin/context-guard-artifact +342 -33
  16. package/plugins/context-guard/bin/context-guard-audit +36 -5
  17. package/plugins/context-guard/bin/context-guard-bench +1675 -44
  18. package/plugins/context-guard/bin/context-guard-cache-score +347 -35
  19. package/plugins/context-guard/bin/context-guard-compress +89 -27
  20. package/plugins/context-guard/bin/context-guard-cost +7 -2
  21. package/plugins/context-guard/bin/context-guard-experiments +364 -8
  22. package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
  23. package/plugins/context-guard/bin/context-guard-filter +88 -18
  24. package/plugins/context-guard/bin/context-guard-pack +329 -19
  25. package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
  26. package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
  27. package/plugins/context-guard/bin/context-guard-setup +21 -5
  28. package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
  29. package/plugins/context-guard/bin/context-guard-trim-output +394 -90
  30. package/plugins/context-guard/brief/README.md +5 -5
  31. package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
  32. package/plugins/context-guard/lib/context_guard_commands.py +217 -190
@@ -33,6 +33,7 @@ MAX_TOP_ERROR_RECEIPTS = 12
33
33
  MAX_DUPLICATE_GROUPS = 12
34
34
  MAX_SUGGESTED_QUERIES = 12
35
35
  SEARCH_SCHEMA_VERSION = "contextguard.artifact.search.v1"
36
+ OUTPUT_SANDBOX_SCHEMA_VERSION = "contextguard.artifact.output-sandbox.v1"
36
37
  DEFAULT_SEARCH_MAX_ARTIFACTS = 100
37
38
  MAX_SEARCH_MAX_ARTIFACTS = 1_000
38
39
  DEFAULT_SEARCH_MAX_MATCHES = 40
@@ -261,22 +262,38 @@ def reject_symlink_components(path: Path) -> None:
261
262
 
262
263
  def regular_private_file_size(path: Path) -> int:
263
264
  path = normalize_allowed_first_absolute_symlink(path)
264
- reject_symlink_components(path.parent)
265
- st = os.lstat(path)
266
- if stat.S_ISLNK(st.st_mode):
267
- raise ValueError(f"artifact file must not be a symlink: {path.name}")
268
- if not stat.S_ISREG(st.st_mode):
269
- raise ValueError(f"artifact file must be a regular file: {path.name}")
270
- return int(st.st_size)
265
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
266
+ try:
267
+ leaf = path.name
268
+ if leaf in {"", ".", ".."}:
269
+ raise ValueError("artifact file must name a regular file")
270
+ if not DIR_FD_STAT_SUPPORTED:
271
+ raise RuntimeError("artifact reads require dir_fd stat support")
272
+ st = os.stat(leaf, dir_fd=parent_fd, follow_symlinks=False)
273
+ if stat.S_ISLNK(st.st_mode):
274
+ raise ValueError(f"artifact file must not be a symlink: {path.name}")
275
+ if not stat.S_ISREG(st.st_mode):
276
+ raise ValueError(f"artifact file must be a regular file: {path.name}")
277
+ return int(st.st_size)
278
+ finally:
279
+ os.close(parent_fd)
271
280
 
272
281
 
273
282
  def read_bounded_private_text(path: Path, max_bytes: int) -> str:
274
283
  path = normalize_allowed_first_absolute_symlink(path)
275
- size = regular_private_file_size(path)
276
- if size > max_bytes:
277
- raise ValueError(f"artifact file exceeds trusted size cap: {path.name}: {size} > {max_bytes}")
278
- flags = os.O_RDONLY | getattr(os, "O_NOFOLLOW", 0)
279
- fd = os.open(str(path), flags)
284
+ parent_fd = open_private_directory_no_follow(path.parent, label="artifact directory", create=False)
285
+ flags = os.O_RDONLY | os.O_NOFOLLOW
286
+ if hasattr(os, "O_CLOEXEC"):
287
+ flags |= os.O_CLOEXEC
288
+ leaf = path.name
289
+ if leaf in {"", ".", ".."}:
290
+ os.close(parent_fd)
291
+ raise ValueError("artifact file must name a regular file")
292
+ try:
293
+ fd = os.open(leaf, flags, dir_fd=parent_fd)
294
+ except OSError:
295
+ os.close(parent_fd)
296
+ raise
280
297
  try:
281
298
  st = os.fstat(fd)
282
299
  if not stat.S_ISREG(st.st_mode):
@@ -289,6 +306,7 @@ def read_bounded_private_text(path: Path, max_bytes: int) -> str:
289
306
  return data.decode("utf-8", errors="replace")
290
307
  finally:
291
308
  os.close(fd)
309
+ os.close(parent_fd)
292
310
 
293
311
 
294
312
  def no_follow_dir_flags() -> int:
@@ -351,6 +369,8 @@ def open_private_directory_no_follow(path: Path, *, label: str, create: bool) ->
351
369
  owned_fd = current_fd
352
370
  current_fd = -1
353
371
  return owned_fd
372
+ except FileNotFoundError:
373
+ raise
354
374
  except OSError as exc:
355
375
  raise RuntimeError(f"could not inspect {label}: {os_error_detail(exc)}") from exc
356
376
  finally:
@@ -574,6 +594,8 @@ def build_retrieval_hints(
574
594
  content_type: str,
575
595
  strategy: str,
576
596
  total_lines: int,
597
+ raw_dir: str | None = None,
598
+ show_paths: bool = False,
577
599
  ) -> list[dict[str, object]]:
578
600
  """Build deterministic, machine-readable retrieval hints for bounded round-trip.
579
601
 
@@ -591,8 +613,8 @@ def build_retrieval_hints(
591
613
  lines_hint: dict[str, object] = {
592
614
  "type": "lines",
593
615
  "selector": {"start": 1, "end": end_line},
594
- "cli": line_query_cli(artifact_id, 1, end_line),
595
- "exact": total_lines <= MAX_QUERY_LINES,
616
+ "cli": line_query_cli(artifact_id, 1, end_line, raw_dir=raw_dir, show_paths=show_paths),
617
+ "exact": total_lines <= MAX_QUERY_LINES and artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths),
596
618
  }
597
619
  if end_line > DEFAULT_MAX_LINES:
598
620
  lines_hint["max_lines"] = end_line
@@ -614,14 +636,14 @@ def build_retrieval_hints(
614
636
  {
615
637
  "type": "pattern",
616
638
  "selector": {"pattern": anchor},
617
- "cli": f"context-guard-artifact get {artifact_id} --pattern '{anchor}'",
639
+ "cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern {shlex.quote(anchor)}",
618
640
  }
619
641
  )
620
642
  hints.append(
621
643
  {
622
644
  "type": "head",
623
645
  "selector": {"max_lines": DEFAULT_MAX_LINES},
624
- "cli": f"context-guard-artifact get {artifact_id} --max-lines {DEFAULT_MAX_LINES}",
646
+ "cli": f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --max-lines {DEFAULT_MAX_LINES}",
625
647
  }
626
648
  )
627
649
  return hints
@@ -654,16 +676,29 @@ def line_query_cli(
654
676
  return cli
655
677
 
656
678
 
657
- def line_receipt(artifact_id: str, line_number: int, text: str) -> dict[str, object]:
679
+ def line_receipt(
680
+ artifact_id: str,
681
+ line_number: int,
682
+ text: str,
683
+ *,
684
+ raw_dir: str | None = None,
685
+ show_paths: bool = False,
686
+ ) -> dict[str, object]:
658
687
  return {
659
688
  "line": line_number,
660
689
  "text": cap_digest_text(text.strip()),
661
690
  "selector": {"type": "lines", "start": line_number, "end": line_number},
662
- "cli": line_query_cli(artifact_id, line_number, line_number),
691
+ "cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
663
692
  }
664
693
 
665
694
 
666
- def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[str, object]]:
695
+ def build_top_error_receipts(
696
+ artifact_id: str,
697
+ lines: list[str],
698
+ *,
699
+ raw_dir: str | None = None,
700
+ show_paths: bool = False,
701
+ ) -> list[dict[str, object]]:
667
702
  receipts: list[dict[str, object]] = []
668
703
  seen: set[str] = set()
669
704
  for line_number, line in enumerate(lines, start=1):
@@ -672,7 +707,7 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
672
707
  text = cap_digest_text(line.strip())
673
708
  if not text or text in seen:
674
709
  continue
675
- receipt = line_receipt(artifact_id, line_number, text)
710
+ receipt = line_receipt(artifact_id, line_number, text, raw_dir=raw_dir, show_paths=show_paths)
676
711
  receipts.append(receipt)
677
712
  seen.add(text)
678
713
  if len(receipts) >= MAX_TOP_ERROR_RECEIPTS:
@@ -680,7 +715,14 @@ def build_top_error_receipts(artifact_id: str, lines: list[str]) -> list[dict[st
680
715
  return receipts
681
716
 
682
717
 
683
- def build_duplicate_line_groups(artifact_id: str, lines: list[str], *, limit: int = MAX_DUPLICATE_GROUPS) -> list[dict[str, object]]:
718
+ def build_duplicate_line_groups(
719
+ artifact_id: str,
720
+ lines: list[str],
721
+ *,
722
+ limit: int = MAX_DUPLICATE_GROUPS,
723
+ raw_dir: str | None = None,
724
+ show_paths: bool = False,
725
+ ) -> list[dict[str, object]]:
684
726
  counts: dict[str, int] = {}
685
727
  first_line: dict[str, int] = {}
686
728
  for line_number, line in enumerate(lines, start=1):
@@ -703,13 +745,20 @@ def build_duplicate_line_groups(artifact_id: str, lines: list[str], *, limit: in
703
745
  "first_line": line_number,
704
746
  "text": text,
705
747
  "selector": {"type": "lines", "start": line_number, "end": line_number},
706
- "cli": line_query_cli(artifact_id, line_number, line_number),
748
+ "cli": line_query_cli(artifact_id, line_number, line_number, raw_dir=raw_dir, show_paths=show_paths),
707
749
  }
708
750
  )
709
751
  return groups
710
752
 
711
753
 
712
- def build_digest(sanitized_text: str, *, artifact_id: str, redacted_lines: int) -> dict[str, object]:
754
+ def build_digest(
755
+ sanitized_text: str,
756
+ *,
757
+ artifact_id: str,
758
+ redacted_lines: int,
759
+ raw_dir: str | None = None,
760
+ show_paths: bool = False,
761
+ ) -> dict[str, object]:
713
762
  lines = sanitized_text.splitlines()
714
763
  top_errors = compact_items(
715
764
  (line for line in lines if ERROR_RE.search(line)),
@@ -725,8 +774,8 @@ def build_digest(sanitized_text: str, *, artifact_id: str, redacted_lines: int)
725
774
  "markers": sanitized_text.count("[REDACTED]"),
726
775
  },
727
776
  "top_error_lines": top_errors,
728
- "top_error_receipts": build_top_error_receipts(artifact_id, lines),
729
- "duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines),
777
+ "top_error_receipts": build_top_error_receipts(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
778
+ "duplicate_line_groups": build_duplicate_line_groups(artifact_id, lines, raw_dir=raw_dir, show_paths=show_paths),
730
779
  "representative_head": compact_items(
731
780
  lines,
732
781
  limit=8,
@@ -769,7 +818,198 @@ def suggested_queries_for(metadata: dict[str, object]) -> list[str]:
769
818
  return queries[:MAX_SUGGESTED_QUERIES]
770
819
 
771
820
 
772
- def receipt_for(metadata: dict[str, object]) -> dict[str, object]:
821
+ def artifact_handle(artifact_id: str) -> str:
822
+ return f"contextguard-artifact:{artifact_id}"
823
+
824
+
825
+ def compact_stored_output(metadata: dict[str, object]) -> dict[str, object]:
826
+ stored = metadata.get("stored_output")
827
+ if not isinstance(stored, dict):
828
+ return {}
829
+ compact: dict[str, object] = {}
830
+ for key in ("scope", "bytes", "lines", "sha256", "content_file", "metadata_file"):
831
+ if key in stored:
832
+ compact[key] = stored[key]
833
+ content_type = metadata.get("content_type")
834
+ if isinstance(content_type, str):
835
+ compact["content_type"] = content_type
836
+ return compact
837
+
838
+
839
+ def digest_count(digest: dict[str, object], key: str) -> int:
840
+ value = digest.get(key)
841
+ return len(value) if isinstance(value, list) else 0
842
+
843
+
844
+ def build_output_sandbox_summary(metadata: dict[str, object]) -> dict[str, object]:
845
+ digest = metadata.get("digest")
846
+ if not isinstance(digest, dict):
847
+ return {"status": "stored"}
848
+ summary: dict[str, object] = {
849
+ "status": digest.get("status") or "stored",
850
+ "top_error_count": digest_count(digest, "top_error_lines"),
851
+ "top_error_receipt_count": digest_count(digest, "top_error_receipts"),
852
+ "duplicate_line_group_count": digest_count(digest, "duplicate_line_groups"),
853
+ "representative_head_count": digest_count(digest, "representative_head"),
854
+ "representative_tail_count": digest_count(digest, "representative_tail"),
855
+ }
856
+ redaction_counts = digest.get("redaction_counts")
857
+ if isinstance(redaction_counts, dict):
858
+ summary["redaction_counts"] = {
859
+ str(key): value
860
+ for key, value in redaction_counts.items()
861
+ if isinstance(value, (int, float, str, bool)) or value is None
862
+ }
863
+ elif "redacted_lines" in digest:
864
+ summary["redacted_lines"] = digest.get("redacted_lines")
865
+ capped = digest.get("capped_for_metadata")
866
+ if isinstance(capped, bool):
867
+ summary["capped_for_metadata"] = capped
868
+ return summary
869
+
870
+
871
+ def rehydration_command_record(
872
+ *,
873
+ kind: str,
874
+ cli: str,
875
+ selector: dict[str, object],
876
+ exact: bool,
877
+ note: str | None = None,
878
+ ) -> dict[str, object]:
879
+ record: dict[str, object] = {
880
+ "type": kind,
881
+ "selector": selector,
882
+ "cli": cli,
883
+ "exact": exact,
884
+ }
885
+ if note:
886
+ record["note"] = note
887
+ return record
888
+
889
+
890
+ def build_output_sandbox_rehydration(
891
+ metadata: dict[str, object],
892
+ *,
893
+ raw_dir: str | None = None,
894
+ show_paths: bool = False,
895
+ ) -> dict[str, object]:
896
+ artifact_id = str(metadata["artifact_id"])
897
+ cli_exact = artifact_dir_cli_is_exact(raw_dir, show_paths=show_paths)
898
+ prefix = artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)
899
+ note = (
900
+ None
901
+ if cli_exact
902
+ else "custom artifact directory is redacted; rerun with the same --dir value or pass --show-paths for a directly executable local command"
903
+ )
904
+ commands: list[dict[str, object]] = [
905
+ rehydration_command_record(
906
+ kind="metadata",
907
+ selector={"type": "receipt"},
908
+ cli=f"{prefix} receipt {artifact_id} --json",
909
+ exact=cli_exact,
910
+ note=note,
911
+ )
912
+ ]
913
+
914
+ retrieval = metadata.get("retrieval")
915
+ hints = retrieval.get("hints") if isinstance(retrieval, dict) else None
916
+ if isinstance(hints, list):
917
+ for hint in hints:
918
+ if not isinstance(hint, dict):
919
+ continue
920
+ hint_type = hint.get("type")
921
+ selector = hint.get("selector")
922
+ if not isinstance(selector, dict):
923
+ selector = {}
924
+ cli: str | None = None
925
+ exact = bool(hint.get("exact", True)) and cli_exact
926
+ if hint_type == "lines":
927
+ start = selector.get("start")
928
+ end = selector.get("end")
929
+ if isinstance(start, int) and isinstance(end, int):
930
+ cli = line_query_cli(artifact_id, start, end, raw_dir=raw_dir, show_paths=show_paths)
931
+ elif hint_type == "pattern":
932
+ pattern = selector.get("pattern")
933
+ if isinstance(pattern, str) and pattern:
934
+ cli = f"{prefix} get {artifact_id} --pattern {shlex.quote(pattern)}"
935
+ elif hint_type == "head":
936
+ max_lines = selector.get("max_lines")
937
+ if isinstance(max_lines, int) and max_lines > 0:
938
+ cli = f"{prefix} get {artifact_id} --max-lines {max_lines}"
939
+ if cli is None:
940
+ raw_cli = hint.get("cli")
941
+ cli = raw_cli if isinstance(raw_cli, str) and raw_cli else None
942
+ if cli:
943
+ commands.append(
944
+ rehydration_command_record(
945
+ kind=str(hint_type or "query"),
946
+ selector=selector,
947
+ cli=cli,
948
+ exact=exact,
949
+ note=note if not cli_exact else str(hint.get("note") or "") or None,
950
+ )
951
+ )
952
+ if len(commands) >= 5:
953
+ break
954
+
955
+ digest = metadata.get("digest")
956
+ top_error_lines = digest.get("top_error_lines") if isinstance(digest, dict) else None
957
+ if isinstance(top_error_lines, list):
958
+ anchor = first_error_anchor("\n".join(str(line) for line in top_error_lines))
959
+ if anchor and len(commands) < 5:
960
+ commands.append(
961
+ rehydration_command_record(
962
+ kind="search",
963
+ selector={"type": "literal", "pattern": anchor},
964
+ cli=f"{prefix} search {shlex.quote(anchor)} --json",
965
+ exact=cli_exact,
966
+ note=note,
967
+ )
968
+ )
969
+
970
+ return {
971
+ "commands": commands,
972
+ "dir_argument": "default" if default_artifact_dir_requested(raw_dir or DEFAULT_ARTIFACT_DIR) else ("included" if show_paths else "redacted"),
973
+ "exact_commands": cli_exact,
974
+ "note": note,
975
+ }
976
+
977
+
978
+ def build_output_sandbox_envelope(
979
+ metadata: dict[str, object],
980
+ *,
981
+ raw_dir: str | None = None,
982
+ show_paths: bool = False,
983
+ ) -> dict[str, object]:
984
+ artifact_id = str(metadata["artifact_id"])
985
+ return {
986
+ "schema_version": OUTPUT_SANDBOX_SCHEMA_VERSION,
987
+ "mode": "local_artifact_receipt",
988
+ "handle": artifact_handle(artifact_id),
989
+ "artifact_id": artifact_id,
990
+ "stored_output": compact_stored_output(metadata),
991
+ "summary": build_output_sandbox_summary(metadata),
992
+ "rehydration": build_output_sandbox_rehydration(metadata, raw_dir=raw_dir, show_paths=show_paths),
993
+ "agent_guidance": [
994
+ "Keep this compact receipt in agent context instead of pasting the full output.",
995
+ "Before relying on omitted details, rehydrate the exact sanitized slice with one of rehydration.commands[].cli.",
996
+ "For repeated diagnostics, query narrower lines or literal matches instead of rerunning broad commands unchanged.",
997
+ ],
998
+ "claim_boundary": {
999
+ "local_only": True,
1000
+ "stored_content_is_sanitized_copy": True,
1001
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
1002
+ "exact_rehydration_required_before_relying_on_omitted_detail": True,
1003
+ },
1004
+ }
1005
+
1006
+
1007
+ def receipt_for(
1008
+ metadata: dict[str, object],
1009
+ *,
1010
+ raw_dir: str | None = None,
1011
+ show_paths: bool = False,
1012
+ ) -> dict[str, object]:
773
1013
  artifact_id = str(metadata["artifact_id"])
774
1014
  return {
775
1015
  "artifact_id": artifact_id,
@@ -782,11 +1022,12 @@ def receipt_for(metadata: dict[str, object]) -> dict[str, object]:
782
1022
  "digest": metadata.get("digest"),
783
1023
  "retrieval": metadata.get("retrieval"),
784
1024
  "available_queries": [
785
- f"context-guard-artifact get {artifact_id} --lines 1:80",
786
- f"context-guard-artifact get {artifact_id} --pattern ERROR --max-lines 40",
787
- f"context-guard-artifact get {artifact_id} --json --lines 1:20",
1025
+ line_query_cli(artifact_id, 1, 80, raw_dir=raw_dir, show_paths=show_paths),
1026
+ f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --pattern ERROR --max-lines 40",
1027
+ f"{artifact_dir_cli_prefix(raw_dir, show_paths=show_paths)} get {artifact_id} --json --lines 1:20",
788
1028
  ],
789
1029
  "suggested_queries": suggested_queries_for(metadata),
1030
+ "output_sandbox": build_output_sandbox_envelope(metadata, raw_dir=raw_dir, show_paths=show_paths),
790
1031
  }
791
1032
 
792
1033
 
@@ -896,7 +1137,13 @@ def store_command(args: argparse.Namespace) -> int:
896
1137
  "content_file": content_path.name,
897
1138
  "metadata_file": meta_path.name,
898
1139
  },
899
- "digest": build_digest(sanitized_text, artifact_id=artifact_id, redacted_lines=redacted_lines),
1140
+ "digest": build_digest(
1141
+ sanitized_text,
1142
+ artifact_id=artifact_id,
1143
+ redacted_lines=redacted_lines,
1144
+ raw_dir=args.dir,
1145
+ show_paths=args.show_paths,
1146
+ ),
900
1147
  "retrieval": {
901
1148
  "strategy": strategy,
902
1149
  "deterministic": True,
@@ -906,17 +1153,22 @@ def store_command(args: argparse.Namespace) -> int:
906
1153
  content_type=content_type,
907
1154
  strategy=strategy,
908
1155
  total_lines=total_lines,
1156
+ raw_dir=args.dir,
1157
+ show_paths=args.show_paths,
909
1158
  ),
910
1159
  },
911
1160
  }
912
1161
  shrink_digest_for_metadata_cap(metadata)
913
1162
  write_private_text(content_path, sanitized_text)
914
1163
  write_private_text(meta_path, metadata_json_text(metadata))
915
- receipt = receipt_for(metadata)
1164
+ receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=args.show_paths)
916
1165
  if args.json:
917
1166
  print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
918
1167
  else:
919
1168
  print(f"artifact_id={artifact_id}")
1169
+ sandbox = receipt.get("output_sandbox")
1170
+ handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
1171
+ print(f"handle={handle}")
920
1172
  stored = receipt["stored_output"]
921
1173
  if isinstance(stored, dict):
922
1174
  print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
@@ -925,7 +1177,16 @@ def store_command(args: argparse.Namespace) -> int:
925
1177
  print("top_error_lines:")
926
1178
  for line in digest["top_error_lines"]: # type: ignore[index]
927
1179
  print(f"- {line}")
928
- print(f"query=context-guard-artifact get {artifact_id} --lines 1:80")
1180
+ available_queries = receipt.get("available_queries")
1181
+ if isinstance(available_queries, list) and available_queries:
1182
+ print(f"query={available_queries[0]}")
1183
+ rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
1184
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
1185
+ if isinstance(commands, list):
1186
+ for command in commands:
1187
+ if isinstance(command, dict) and command.get("type") != "metadata" and isinstance(command.get("cli"), str):
1188
+ print(f"rehydrate={command['cli']}")
1189
+ break
929
1190
  return 0
930
1191
 
931
1192
 
@@ -1205,6 +1466,44 @@ def get_command(args: argparse.Namespace) -> int:
1205
1466
  return 0
1206
1467
 
1207
1468
 
1469
+ def receipt_command(args: argparse.Namespace) -> int:
1470
+ artifact_id = args.artifact_id
1471
+ try:
1472
+ last_missing: FileNotFoundError | None = None
1473
+ for directory in artifact_read_directories(args.dir):
1474
+ try:
1475
+ metadata, _content_path, _content = load_verified_artifact(directory, artifact_id)
1476
+ break
1477
+ except FileNotFoundError as exc:
1478
+ last_missing = exc
1479
+ else:
1480
+ if last_missing is not None:
1481
+ raise last_missing
1482
+ raise FileNotFoundError(f"artifact not found: {artifact_id}")
1483
+ receipt = receipt_for(metadata, raw_dir=args.dir, show_paths=bool(getattr(args, "show_paths", False)))
1484
+ except (FileNotFoundError, ValueError, OSError, json.JSONDecodeError) as exc:
1485
+ print(f"context-guard-artifact: {exc}", file=sys.stderr)
1486
+ return 1
1487
+ if args.json:
1488
+ print(json.dumps(receipt, ensure_ascii=False, indent=2, sort_keys=True))
1489
+ else:
1490
+ sandbox = receipt.get("output_sandbox")
1491
+ handle = sandbox.get("handle") if isinstance(sandbox, dict) else artifact_handle(artifact_id)
1492
+ print(f"artifact_id={artifact_id}")
1493
+ print(f"handle={handle}")
1494
+ stored = receipt.get("stored_output")
1495
+ if isinstance(stored, dict):
1496
+ print(f"stored_output={stored.get('lines')} lines/{stored.get('bytes')} bytes")
1497
+ rehydration = sandbox.get("rehydration") if isinstance(sandbox, dict) else None
1498
+ commands = rehydration.get("commands") if isinstance(rehydration, dict) else None
1499
+ if isinstance(commands, list):
1500
+ for command in commands[:4]:
1501
+ if isinstance(command, dict) and command.get("cli"):
1502
+ print(f"rehydrate={command.get('cli')}")
1503
+ print("claim_boundary=local sanitized artifact; no hosted token/cost savings claim")
1504
+ return 0
1505
+
1506
+
1208
1507
  def search_command(args: argparse.Namespace) -> int:
1209
1508
  try:
1210
1509
  literal = search_literal(args.pattern)
@@ -1355,7 +1654,7 @@ def list_command(args: argparse.Namespace) -> int:
1355
1654
  continue
1356
1655
  artifact_id = str(data.get("artifact_id", "")) if isinstance(data, dict) else ""
1357
1656
  if isinstance(data, dict) and ARTIFACT_ID_RE.fullmatch(artifact_id) and artifact_id not in seen:
1358
- items.append(receipt_for(data))
1657
+ items.append(receipt_for(data, raw_dir=args.dir, show_paths=False))
1359
1658
  seen.add(artifact_id)
1360
1659
  items.sort(key=lambda item: str(item.get("artifact_id", "")))
1361
1660
  if args.json:
@@ -1396,6 +1695,16 @@ def build_parser() -> argparse.ArgumentParser:
1396
1695
  get.add_argument("--json", action="store_true", help="emit query JSON with content")
1397
1696
  get.set_defaults(func=get_command)
1398
1697
 
1698
+ receipt = subparsers.add_parser("receipt", help="print metadata-only receipt and rehydration handle for a stored artifact")
1699
+ receipt.add_argument("artifact_id")
1700
+ receipt.add_argument(
1701
+ "--show-paths",
1702
+ action="store_true",
1703
+ help="show raw custom --dir values in rehydration commands; local debugging only because private paths may be exposed",
1704
+ )
1705
+ receipt.add_argument("--json", action="store_true", help="emit receipt JSON without artifact content")
1706
+ receipt.set_defaults(func=receipt_command)
1707
+
1399
1708
  list_parser = subparsers.add_parser("list", help="list stored artifacts")
1400
1709
  list_parser.add_argument("--json", action="store_true", help="emit list JSON")
1401
1710
  list_parser.set_defaults(func=list_command)
@@ -56,8 +56,10 @@ JSON_PARSE_RECURSION_LIMIT = 10_000
56
56
  READ_CHUNK_BYTES = 64 * 1024
57
57
  DEFAULT_MAX_FILE_BYTES = 50 * 1024 * 1024
58
58
  DEFAULT_MAX_LINE_BYTES = 2 * 1024 * 1024
59
+ DEFAULT_MAX_SCAN_FILES = 100_000
59
60
  MAX_FILE_BYTES_LIMIT = 2 * 1024 * 1024 * 1024
60
61
  MAX_LINE_BYTES_LIMIT = 128 * 1024 * 1024
62
+ MAX_SCAN_FILES_LIMIT = 1_000_000
61
63
  SECRET_VALUE_RE = re.compile(
62
64
  r"(?i)(gh[pousr]_[A-Za-z0-9_]{8,}|github_pat_[A-Za-z0-9_]{20,}|"
63
65
  r"xox[abprs]-[A-Za-z0-9-]{8,}|(?:AKIA|ASIA)[0-9A-Z]{8,}|"
@@ -143,14 +145,14 @@ class PromptCacheAudit:
143
145
 
144
146
  def observe(self, root: Any) -> None:
145
147
  self.sampled_records += 1
148
+ if len(self.samples) >= PROMPT_AUDIT_MAX_RECORDS:
149
+ self.capped_records += 1
150
+ return
146
151
  segments, bytes_sampled, redactions, collection_capped = prompt_segments_for_record(root)
147
152
  if collection_capped:
148
153
  self.prompt_collection_capped_records += 1
149
154
  if not segments:
150
155
  return
151
- if len(self.samples) >= PROMPT_AUDIT_MAX_RECORDS:
152
- self.capped_records += 1
153
- return
154
156
  self.analyzed_prompt_records += 1
155
157
  self.total_segments += len(segments)
156
158
  self.total_bytes_sampled += bytes_sampled
@@ -169,6 +171,8 @@ class UsageSummary:
169
171
  files: int = 0
170
172
  records: int = 0
171
173
  skipped_files: int = 0
174
+ unscanned_files_lower_bound: int = 0
175
+ scan_truncated: bool = False
172
176
  skipped_records: int = 0
173
177
  parse_errors: list[str] = field(default_factory=list)
174
178
  tokens: Counter[str] = field(default_factory=Counter)
@@ -618,6 +622,7 @@ def os_error_summary(exc: OSError) -> str:
618
622
  class ScanLimits:
619
623
  max_file_bytes: int = DEFAULT_MAX_FILE_BYTES
620
624
  max_line_bytes: int = DEFAULT_MAX_LINE_BYTES
625
+ max_files: int = DEFAULT_MAX_SCAN_FILES
621
626
 
622
627
 
623
628
  def open_regular_no_symlink(file: Path):
@@ -809,6 +814,15 @@ def scan(
809
814
  limits = limits or ScanLimits()
810
815
  summary = UsageSummary()
811
816
  for file in iter_jsonl_files(paths):
817
+ if summary.files >= limits.max_files:
818
+ summary.skipped_files += 1
819
+ summary.unscanned_files_lower_bound += 1
820
+ summary.scan_truncated = True
821
+ summary.note_error(
822
+ f"transcript scan file limit reached ({limits.max_files}); "
823
+ "rerun with narrower paths or --max-files if more evidence is required"
824
+ )
825
+ break
812
826
  summary.files += 1
813
827
  try:
814
828
  with open_regular_no_symlink(file) as handle:
@@ -925,6 +939,8 @@ def scan_integrity(summary: UsageSummary) -> dict[str, Any]:
925
939
  "files_scanned": summary.files,
926
940
  "records_scanned": summary.records,
927
941
  "skipped_files": summary.skipped_files,
942
+ "unscanned_files_lower_bound": summary.unscanned_files_lower_bound,
943
+ "scan_truncated": summary.scan_truncated,
928
944
  "skipped_records": summary.skipped_records,
929
945
  "parse_error_count": len(summary.parse_errors),
930
946
  "complete": complete,
@@ -2151,11 +2167,14 @@ def summary_json(
2151
2167
  "files": summary.files,
2152
2168
  "records": summary.records,
2153
2169
  "skipped_files": summary.skipped_files,
2170
+ "unscanned_files_lower_bound": summary.unscanned_files_lower_bound,
2171
+ "scan_truncated": summary.scan_truncated,
2154
2172
  "skipped_records": summary.skipped_records,
2155
2173
  "parse_errors": summary.parse_errors,
2156
2174
  "scan_limits": {
2157
2175
  "max_file_bytes": limits.max_file_bytes,
2158
2176
  "max_line_bytes": limits.max_line_bytes,
2177
+ "max_files": limits.max_files,
2159
2178
  },
2160
2179
  "total_tokens": summary.total_tokens,
2161
2180
  "tokens": dict(summary.tokens),
@@ -2221,10 +2240,17 @@ def main() -> int:
2221
2240
  default=DEFAULT_MAX_LINE_BYTES,
2222
2241
  help="skip individual JSONL records larger than this many bytes (default: 2 MiB)",
2223
2242
  )
2243
+ parser.add_argument(
2244
+ "--max-files",
2245
+ type=int,
2246
+ default=DEFAULT_MAX_SCAN_FILES,
2247
+ help=f"stop after this many transcript files (default: {DEFAULT_MAX_SCAN_FILES})",
2248
+ )
2224
2249
  args = parser.parse_args()
2225
2250
  limits = ScanLimits(
2226
2251
  max_file_bytes=require_scan_limit(parser, "--max-file-bytes", args.max_file_bytes, MAX_FILE_BYTES_LIMIT),
2227
2252
  max_line_bytes=require_scan_limit(parser, "--max-line-bytes", args.max_line_bytes, MAX_LINE_BYTES_LIMIT),
2253
+ max_files=require_scan_limit(parser, "--max-files", args.max_files, MAX_SCAN_FILES_LIMIT),
2228
2254
  )
2229
2255
 
2230
2256
  summary = scan(args.paths, show_paths=args.show_paths, show_commands=args.show_commands, limits=limits)
@@ -2248,9 +2274,14 @@ def main() -> int:
2248
2274
  print("Claude Code transcript usage audit")
2249
2275
  print(
2250
2276
  f"files_scanned={summary.files} records={summary.records} "
2251
- f"skipped_files={summary.skipped_files} skipped_records={summary.skipped_records}"
2277
+ f"skipped_files={summary.skipped_files} skipped_records={summary.skipped_records} "
2278
+ f"scan_truncated={str(summary.scan_truncated).lower()} "
2279
+ f"unscanned_files_lower_bound={summary.unscanned_files_lower_bound}"
2280
+ )
2281
+ print(
2282
+ f"scan_limits=max_file_bytes:{limits.max_file_bytes} "
2283
+ f"max_line_bytes:{limits.max_line_bytes} max_files:{limits.max_files}"
2252
2284
  )
2253
- print(f"scan_limits=max_file_bytes:{limits.max_file_bytes} max_line_bytes:{limits.max_line_bytes}")
2254
2285
  print(f"observed_total_tokens={summary.total_tokens}")
2255
2286
  if summary.cost_usd:
2256
2287
  print(f"observed_cost_usd={summary.cost_usd:.4f}")