npm - @ictechgy/context-guard - Versions diffs - 0.4.11 → 0.4.12 - Mend

@ictechgy/context-guard 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +4 -0
package/README.ko.md +19 -12
package/README.md +11 -11
package/package.json +1 -1
package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
package/plugins/context-guard/bin/context-guard +42 -46
package/plugins/context-guard/bin/context-guard-audit +3 -3
package/plugins/context-guard/bin/context-guard-bench +136 -16
package/plugins/context-guard/bin/context-guard-cache-score +29 -2
package/plugins/context-guard/bin/context-guard-compress +89 -27
package/plugins/context-guard/bin/context-guard-filter +88 -18
package/plugins/context-guard/bin/context-guard-pack +28 -2
package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
package/plugins/context-guard/bin/context-guard-sanitize-output +169 -6
package/plugins/context-guard/bin/context-guard-setup +21 -5
package/plugins/context-guard/bin/context-guard-tool-prune +48 -10
package/plugins/context-guard/bin/context-guard-trim-output +109 -52
package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
package/plugins/context-guard/lib/context_guard_commands.py +4 -1

package/plugins/context-guard/bin/context-guard-sanitize-output CHANGED Viewed

@@ -49,9 +49,17 @@ PRIVATE_KEY_END_RE = re.compile(
 AUTH_HEADER_RE = re.compile(
     r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Proxy-)?Authorization\s*:\s*).+$"
 )
+COOKIE_HEADER_RE = re.compile(
+    r"(?i)^(?P<prefix>\s*(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:Set-)?Cookie\s*:\s*).+$"
+)
+SESSION_SECRET_KEY = (
+    r"(?:session(?:[_-]?(?:id|token))?|sessionid|sid|jsessionid|"
+    r"csrf(?:[_-]?token)?|xsrf(?:[_-]?token)?)"
+)
 SECRET_KEY = (
     r"[A-Za-z0-9_.-]*(?:api[_-]?key|apikey|token|secret|password|passwd|pwd|"
     r"private[_-]?key|access[_-]?key|client[_-]?secret)[A-Za-z0-9_.-]*"
+    rf"|{SESSION_SECRET_KEY}"
     r"|AWS_ACCESS_KEY_ID|AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|"
     r"GOOGLE_APPLICATION_CREDENTIALS|AZURE_CLIENT_SECRET"
 )
@@ -61,11 +69,48 @@ INLINE_QUOTED_SECRET_ASSIGNMENT_RE = re.compile(
     rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
     rf"(?P<quote>[\"'])(?P<value>(?:\\.|(?!(?P=quote)).)*)(?P=quote)(?P<tail>[^\s,;}}\]]*)"
 )
+CODE_IDENTIFIER = r"[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)*"
+CALL_ARGUMENT_CHUNK = r"(?:[^()\"'\n;]+|\"(?:\\.|[^\"\\])*\"|'(?:\\.|[^'\\])*'|\([^()]*\))*"
+INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE = re.compile(
+    rf"(?i)(?P<lead>^|[\s;{{\[,])"
+    rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
+    rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
+    rf"(?P<value>(?![\"']){CODE_IDENTIFIER}\({CALL_ARGUMENT_CHUNK}\))"
+)
+SECRET_IDENTIFIER_PART = (
+    r"(?:[A-Za-z_$][A-Za-z0-9_$]*(?:api_?key|apikey|token|secret|password|passwd|pwd|"
+    r"private_?key|access_?key|client_?secret|sessionid|session_id|session_token|"
+    r"csrf_token|xsrf_token)[A-Za-z0-9_$]*|session|sid|csrf|xsrf)"
+)
+FALLBACK_SECRET_OPERAND = rf"(?:[A-Za-z_$][A-Za-z0-9_$]*\.)*{SECRET_IDENTIFIER_PART}"
+INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE = re.compile(
+    rf"(?i)(?P<lead>^|[\s;{{\[,])"
+    rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
+    rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
+    rf"(?P<value>(?![\"']|\[REDACTED\])"
+    rf"[^;\n]*?(?:\bor\b|\|\||\?\?|\belse\b|\?[^:\n;]*:)\s*"
+    rf"(?:[\"'](?:\\.|[^\"'\\])*[\"']|{FALLBACK_SECRET_OPERAND})[^;\n]*)"
+)
+INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE = re.compile(
+    rf"(?i)(?P<lead>^|[\s;{{\[,])"
+    rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
+    rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
+    rf"(?P<value>(?![\"']|\[REDACTED\])"
+    rf"[^\s,;}}\]]*(?:\([^;\n]*?\)|\{{[^;\n]*?\}}|\[[^;\n]*?\])[^\s,;}}\]]*)"
+)
 INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE = re.compile(
     rf"(?i)(?P<lead>^|[\s;{{\[,])"
     rf"(?P<prefix>(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
     rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*)"
-    rf"(?P<value>[^\s,;}}\]]+)"
+    rf"(?P<value>(?![\"']|\[REDACTED\])[^\s,;}}\]]+)"
+)
+UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE = re.compile(
+    rf"(?i)(?:^|[\s;{{\[,])"
+    rf"(?:(?:[^:\n]+):\d+(?::\d+)?:)?\s*(?:[+-]\s*)?(?:export\s+)?"
+    rf"[\"']?(?:{SECRET_KEY})[\"']?\s*[:=]\s*(?P<value>(?![\"']).*)$"
+)
+CONTINUATION_OPERATOR_RE = re.compile(
+    r"(?i)(?:\\|\|\||&&|\?\?|[+*/%&|^?,]|\?|:|\bor\b|\band\b|\belse\b)\s*(?://.*|#.*)?$"
 )
 URL_LIKE_RE = re.compile(r"\b[A-Za-z][A-Za-z0-9+.-]*://[^\s]+")
 URL_SECRET_PARAM_RE = re.compile(rf"(?i)([?&#;](?:{SECRET_KEY})=)[^\s?&#;]+")
@@ -80,6 +125,43 @@ SAFE_UNQUOTED_VALUES = {
     "undefined",
 }
 IDENTIFIER_CHAIN_RE = re.compile(r"^[A-Za-z_$][A-Za-z0-9_$]*(?:\.[A-Za-z_$][A-Za-z0-9_$]*)+$")
+SAFE_ENV_LOOKUP_CALL_RE = re.compile(r"^(?:os\.getenv|os\.environ\.get)\(\s*[\"'][A-Za-z0-9_.-]{1,80}[\"']\s*\)$")
+SAFE_RE_COMPILE_CALL_RE = re.compile(r"^re\.compile\([^;\n]*\)$")
+SAFE_CODE_EXPRESSION_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\(\s*(?:{CODE_IDENTIFIER}(?:\s*,\s*{CODE_IDENTIFIER})*)?\s*\)$")
+GETTER_CALL_RE = re.compile(rf"^{CODE_IDENTIFIER}\.get\(\s*[\"'](?P<key>[A-Za-z0-9_.-]{{1,80}})[\"']\s*\)$")
+CAMEL_ACRONYM_BOUNDARY_RE = re.compile(r"(?<=[A-Z])(?=[A-Z][a-z])")
+CAMEL_WORD_BOUNDARY_RE = re.compile(r"(?<=[a-z0-9])(?=[A-Z])")
+SAFE_GETTER_KEY_NAMES = {
+    "access_key",
+    "access_token",
+    "api_key",
+    "apikey",
+    "auth",
+    "authorization",
+    "aws_access_key_id",
+    "aws_secret_access_key",
+    "aws_session_token",
+    "azure_client_secret",
+    "client_id",
+    "client_secret",
+    "cookie",
+    "credential",
+    "credentials",
+    "csrf",
+    "google_application_credentials",
+    "jwt",
+    "password",
+    "passwd",
+    "private_key",
+    "pwd",
+    "refresh_token",
+    "secret",
+    "session",
+    "session_id",
+    "sessionid",
+    "sid",
+    "token",
+}
 INLINE_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
     (re.compile(r"(?i)\bBearer\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
     (re.compile(r"(?i)\bBasic\s+[A-Za-z0-9._~+/=-]+"), "[REDACTED]"),
@@ -171,20 +253,33 @@ def cap_line(line: str, max_line_chars: int) -> tuple[str, bool]:
     return body[:keep] + marker + newline, True
+def normalize_getter_key(key: str) -> str:
+    key = CAMEL_ACRONYM_BOUNDARY_RE.sub("_", key)
+    key = CAMEL_WORD_BOUNDARY_RE.sub("_", key)
+    key = re.sub(r"[_.-]+", "_", key)
+    return re.sub(r"_+", "_", key).strip("_").lower()
+def is_safe_getter_key(key: str) -> bool:
+    return normalize_getter_key(key) in SAFE_GETTER_KEY_NAMES
 def should_redact_unquoted_secret_value(line: str, match: re.Match[str]) -> bool:
     value = match.group("value").strip()
+    prefix = match.group("prefix")
     if not value:
         return False
     if value.lower() in SAFE_UNQUOTED_VALUES:
         return False
     if IDENTIFIER_CHAIN_RE.match(value):
         return False
-    end = match.end("value")
-    if end < len(line) and line[end] in "([{":
-        # Likely a function call or expression (`api_key = os.getenv(...)`);
-        # preserve it so Claude can still reason about code flow.
+    if SAFE_ENV_LOOKUP_CALL_RE.match(value) or SAFE_RE_COMPILE_CALL_RE.match(value):
         return False
-    if any(ch in value for ch in "()[]{}"):
+    getter_match = GETTER_CALL_RE.match(value)
+    if re.search(r"\s[:=]\s*$", prefix) and (
+        SAFE_CODE_EXPRESSION_CALL_RE.match(value)
+        or (getter_match is not None and is_safe_getter_key(getter_match.group("key")))
+    ):
         return False
     return True
@@ -218,6 +313,9 @@ def redact_secret_assignments(line: str) -> tuple[str, bool]:
         return f"{match.group('lead')}{match.group('prefix')}[REDACTED]"
     line = INLINE_QUOTED_SECRET_ASSIGNMENT_RE.sub(quoted_repl, line)
+    line = INLINE_UNQUOTED_FALLBACK_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
+    line = INLINE_UNQUOTED_CALL_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
+    line = INLINE_UNQUOTED_BRACKETED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
     line = INLINE_UNQUOTED_SECRET_ASSIGNMENT_RE.sub(unquoted_repl, line)
     return line, redacted
@@ -257,6 +355,54 @@ def detect_multiline_secret_assignment(line: str) -> str | None:
     return None
+def expression_bracket_delta(text: str) -> int:
+    delta = 0
+    quote: str | None = None
+    escaped = False
+    for char in text:
+        if quote is not None:
+            if escaped:
+                escaped = False
+            elif char == "\\":
+                escaped = True
+            elif char == quote:
+                quote = None
+            continue
+        if char in {"'", '"'}:
+            quote = char
+        elif char in "([{":
+            delta += 1
+        elif char in ")}]":
+            delta -= 1
+    return delta
+def ends_with_continuation_operator(text: str) -> bool:
+    return bool(CONTINUATION_OPERATOR_RE.search(text.rstrip()))
+def detect_multiline_secret_expression(line: str) -> int | None:
+    marker = UNQUOTED_MULTILINE_SECRET_ASSIGNMENT_RE.search(line)
+    if marker is None:
+        return None
+    value = marker.group("value").strip()
+    if not value:
+        return 0
+    delta = expression_bracket_delta(value)
+    if delta > 0:
+        return delta
+    if ends_with_continuation_operator(value):
+        return max(delta, 0)
+    return None
+def update_multiline_secret_expression_state(line: str, depth: int) -> int | None:
+    next_depth = max(0, depth + expression_bracket_delta(line))
+    if next_depth == 0 and not ends_with_continuation_operator(line):
+        return None
+    return next_depth
 def private_key_state_after_line(line: str) -> bool | None:
     """Return updated private-key state for a line, or None when no marker appears."""
     if PRIVATE_KEY_BEGIN_RE.search(line):
@@ -277,6 +423,7 @@ class LineSanitizer:
         self.show_paths = show_paths
         self.in_private_key_block = False
         self.multiline_secret_quote: str | None = None
+        self.multiline_secret_expression_depth: int | None = None
         self.redactions = 0
     def sanitize(self, raw_line: str) -> tuple[str, bool]:
@@ -309,6 +456,12 @@ class LineSanitizer:
                 self.in_private_key_block = False
             return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
+        if self.multiline_secret_expression_depth is not None:
+            self.multiline_secret_expression_depth = update_multiline_secret_expression_state(
+                line, self.multiline_secret_expression_depth
+            )
+            return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
         multiline_quote = detect_multiline_secret_assignment(line)
         if multiline_quote is not None:
             self.multiline_secret_quote = multiline_quote
@@ -323,11 +476,21 @@ class LineSanitizer:
                 self.in_private_key_block = True
             return self._finish(diff_prefix + "[REDACTED PRIVATE KEY BLOCK]\n", redacted)
+        expression_depth = detect_multiline_secret_expression(line)
+        if expression_depth is not None:
+            self.multiline_secret_expression_depth = expression_depth
+            return self._finish(diff_prefix + "[REDACTED MULTILINE SECRET]\n", True)
         new_line, count = AUTH_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
         if count:
             redacted = True
             line = new_line
+        new_line, count = COOKIE_HEADER_RE.subn(r"\g<prefix>[REDACTED]", line)
+        if count:
+            redacted = True
+            line = new_line
         line, assignment_redacted = redact_secret_assignments(line)
         if assignment_redacted:
             redacted = True

package/plugins/context-guard/bin/context-guard-setup CHANGED Viewed

@@ -2210,6 +2210,25 @@ def backup_existing(path: Path) -> Path | None:
     return backup
+def rollback_restore_guidance(settings_path: Path, backup_path: Path | None, original_existed: bool) -> str:
+    if backup_path is not None:
+        return (
+            "Restore only with a no-follow, symlink-safe copy that opens the backup and target parent "
+            "without following links, then atomically replaces the target; do not use generic shell "
+            f"copy/delete commands on this mutable target. Backup: {backup_path}. Target: {settings_path}."
+        )
+    if original_existed:
+        return (
+            "No backup path was recorded; inspect the target with no-follow file operations before any "
+            f"manual recovery. Do not use generic shell copy/delete commands on this mutable target: {settings_path}."
+        )
+    return (
+        "The target did not exist before setup. If cleanup is required, verify the target and every parent "
+        "without following symlinks and remove only the verified regular file; do not use generic shell "
+        f"delete commands on this mutable target: {settings_path}."
+    )
 def write_rollback_record(
     *,
     root: Path,
@@ -2237,11 +2256,8 @@ def write_rollback_record(
         "target_path": str(settings_path),
         "backup_path": str(backup_path) if backup_path else None,
         "original_existed": original_existed,
-        "restore": (
-            f"cp {shlex.quote(str(backup_path))} {shlex.quote(str(settings_path))}"
-            if backup_path
-            else f"rm -f {shlex.quote(str(settings_path))}"
-        ),
+        "restore": rollback_restore_guidance(settings_path, backup_path, original_existed),
+        "restore_requires_no_follow": True,
     }
     atomic_write(rollback_path, json.dumps(record, indent=2, sort_keys=True) + "\n", 0o600)
     return rollback_id, rollback_path

package/plugins/context-guard/bin/context-guard-tool-prune CHANGED Viewed

@@ -87,6 +87,8 @@ class Candidate:
     index: int
     score: float = 0.0
     rank: int = 0
+    schema_bytes: int = 0
+    parameter_terms: frozenset[str] | None = None
 def fail(message: str) -> NoReturn:
@@ -276,7 +278,15 @@ def tool_schema_from_dict(raw: dict[str, Any], *, fallback_name: str | None = No
         schema["description"] = description
     if server and "server" not in schema:
         schema["server"] = server
-    return Candidate(name=name, server=cap_text(server, MAX_LABEL_CHARS) if server else None, description=description, schema=schema, index=index)
+    return Candidate(
+        name=name,
+        server=cap_text(server, MAX_LABEL_CHARS) if server else None,
+        description=description,
+        schema=schema,
+        index=index,
+        schema_bytes=byte_len_json(schema),
+        parameter_terms=frozenset(terms(" ".join(collect_parameter_text(schema)))),
+    )
 def normalize_catalog(raw: Any) -> list[Candidate]:
@@ -362,7 +372,11 @@ def score_candidate(candidate: Candidate, query_terms: set[str]) -> float:
         return 0.0
     name_terms = terms(candidate.name)
     desc_terms = terms(candidate.description)
-    parameter_terms = terms(" ".join(collect_parameter_text(candidate.schema)))
+    parameter_terms = (
+        set(candidate.parameter_terms)
+        if candidate.parameter_terms is not None
+        else terms(" ".join(collect_parameter_text(candidate.schema)))
+    )
     score = 0.0
     score += 4.0 * len(query_terms & name_terms)
     score += 1.5 * len(query_terms & desc_terms)
@@ -379,14 +393,38 @@ def rank_candidates(candidates: list[Candidate], query: str) -> list[Candidate]:
     query_terms = terms(query)
     scored: list[Candidate] = []
     for cand in candidates:
-        scored.append(Candidate(cand.name, cand.server, cand.description, cand.schema, cand.index, score_candidate(cand, query_terms), 0))
+        scored.append(Candidate(
+            cand.name,
+            cand.server,
+            cand.description,
+            cand.schema,
+            cand.index,
+            score_candidate(cand, query_terms),
+            0,
+            schema_bytes=cand.schema_bytes,
+            parameter_terms=cand.parameter_terms,
+        ))
     scored.sort(key=lambda item: (-item.score, item.index))
     ranked: list[Candidate] = []
     for rank, cand in enumerate(scored, start=1):
-        ranked.append(Candidate(cand.name, cand.server, cand.description, cand.schema, cand.index, cand.score, rank))
+        ranked.append(Candidate(
+            cand.name,
+            cand.server,
+            cand.description,
+            cand.schema,
+            cand.index,
+            cand.score,
+            rank,
+            schema_bytes=cand.schema_bytes,
+            parameter_terms=cand.parameter_terms,
+        ))
     return ranked
+def candidate_schema_bytes(cand: Candidate) -> int:
+    return cand.schema_bytes if cand.schema_bytes > 0 else byte_len_json(cand.schema)
 def normalized_link_target(parent: Path, raw_target: str) -> Path:
     target = Path(raw_target)
     if not target.is_absolute():
@@ -707,7 +745,7 @@ def build_payload(receipt_id: str, ranked: list[Candidate], query: str, redactio
                 "description": cand.description,
                 "score": cand.score,
                 "rank": cand.rank,
-                "schema_bytes": byte_len_json(cand.schema),
+                "schema_bytes": candidate_schema_bytes(cand),
                 "schema": cand.schema,
             }
             for cand in ranked
@@ -739,7 +777,7 @@ def retrieval_command(receipt_id: str, *, store_dir: str, tool_name: str | None
 def selected_tool_record(cand: Candidate, receipt_id: str, budget_left: int, *, store_dir: str) -> tuple[dict[str, Any], int]:
-    schema_size = byte_len_json(cand.schema)
+    schema_size = candidate_schema_bytes(cand)
     record: dict[str, Any] = {
         "name": cand.name,
         "server": cand.server,
@@ -765,7 +803,7 @@ def deferred_tool_record(cand: Candidate, receipt_id: str, *, store_dir: str) ->
         "score": cand.score,
         "rank": cand.rank,
         "description": cand.description,
-        "schema_bytes": byte_len_json(cand.schema),
+        "schema_bytes": candidate_schema_bytes(cand),
         "reason": "deferred_after_core_top",
         "retrieval": retrieval_command(receipt_id, store_dir=store_dir, tool_name=cand.name),
     }
@@ -1008,9 +1046,9 @@ def defer_report(args: argparse.Namespace) -> str:
         store_dir=args.store_dir,
         namespace_top=namespace_top,
     )
-    all_schema_bytes = sum(byte_len_json(cand.schema) for cand in ranked)
-    listed_deferred_schema_bytes = sum(byte_len_json(cand.schema) for cand in deferred_candidates)
-    total_deferred_schema_bytes = sum(byte_len_json(cand.schema) for cand in ranked[core_top:])
+    all_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked)
+    listed_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in deferred_candidates)
+    total_deferred_schema_bytes = sum(candidate_schema_bytes(cand) for cand in ranked[core_top:])
     tool_stub_report_bytes = byte_len_json(core_tools) + byte_len_json(deferred_tools)
     all_schema_tokens = proxy_tokens(all_schema_bytes)
     inline_core_schema_tokens = proxy_tokens(core_schema_bytes)

package/plugins/context-guard/bin/context-guard-trim-output CHANGED Viewed

@@ -20,6 +20,7 @@ import signal
 import stat
 import subprocess
 import sys
+import tempfile
 import threading
 import time
 import types
@@ -398,23 +399,75 @@ def store_sanitized_artifact_receipt(
     return receipt
-def capture_sanitized_artifact_line(
-    *,
-    capture_enabled: bool,
-    sanitized_line: str,
-    artifact_lines: list[str],
-    capture_bytes: int,
-    capture_overflow: bool,
-    max_bytes: int,
-) -> tuple[int, bool]:
-    if not capture_enabled or capture_overflow:
-        return capture_bytes, capture_overflow
-    source_bytes = len(sanitized_line.encode("utf-8", errors="replace"))
-    if capture_bytes + source_bytes <= max_bytes:
-        artifact_lines.append(sanitized_line)
-        return capture_bytes + source_bytes, False
-    artifact_lines.clear()
-    return capture_bytes, True
+class SanitizedArtifactCapture:
+    def __init__(self, *, enabled: bool, max_bytes: int) -> None:
+        self.enabled = enabled
+        self.max_bytes = max_bytes
+        self.bytes = 0
+        self.overflow = False
+        self.error: str | None = None
+        self._file: BinaryIO | None = None
+    def _ensure_file(self) -> BinaryIO | None:
+        if self._file is not None:
+            return self._file
+        try:
+            self._file = tempfile.TemporaryFile("w+b")
+        except OSError as exc:
+            self._record_error(exc)
+            return None
+        return self._file
+    def _record_error(self, exc: OSError) -> None:
+        if self.error is None:
+            self.error = f"{exc.__class__.__name__}: {exc}"
+    def add(self, sanitized_line: str) -> None:
+        if not self.enabled or self.overflow or self.error:
+            return
+        encoded = sanitized_line.encode("utf-8", errors="replace")
+        source_bytes = len(encoded)
+        if self.bytes + source_bytes > self.max_bytes:
+            self.overflow = True
+            self.close()
+            return
+        target = self._ensure_file()
+        if target is None:
+            return
+        try:
+            target.write(encoded)
+        except OSError as exc:
+            self._record_error(exc)
+            self.close()
+            return
+        self.bytes += source_bytes
+    def text(self) -> str:
+        if self._file is None:
+            return ""
+        try:
+            self._file.flush()
+            self._file.seek(0)
+            return self._file.read().decode("utf-8", errors="replace")
+        except OSError as exc:
+            self._record_error(exc)
+            self.close()
+            return ""
+    def close(self) -> None:
+        target = self._file
+        self._file = None
+        if target is not None:
+            try:
+                target.close()
+            except OSError as exc:
+                self._record_error(exc)
+    def __enter__(self) -> "SanitizedArtifactCapture":
+        return self
+    def __exit__(self, *exc: object) -> None:
+        self.close()
 def unique_keep_order(lines: Iterable[str]) -> list[str]:
@@ -1512,11 +1565,10 @@ def main() -> int:
     runner_summary = RunnerFailureSummary(args.runner_summary_items, show_paths=args.show_paths)
     duplicate_tracker = DuplicateLineTracker()
     redacted_lines = 0
-    artifact_lines: list[str] = []
-    artifact_capture_bytes = 0
-    artifact_capture_overflow = False
+    artifact_capture = SanitizedArtifactCapture(enabled=args.artifact_receipt, max_bytes=args.artifact_max_bytes)
     if proc.stdout is None:
+        artifact_capture.close()
         print("trim_command_output.py: subprocess produced no stdout pipe", file=sys.stderr)
         return 1
     command_stream = TimedCommandStream(
@@ -1532,14 +1584,7 @@ def main() -> int:
         visible_source, redacted = line_sanitizer.sanitize(line)  # type: ignore[attr-defined]
         if redacted:
             redacted_lines += 1
-        artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
-            capture_enabled=args.artifact_receipt,
-            sanitized_line=visible_source,
-            artifact_lines=artifact_lines,
-            capture_bytes=artifact_capture_bytes,
-            capture_overflow=artifact_capture_overflow,
-            max_bytes=args.artifact_max_bytes,
-        )
+        artifact_capture.add(visible_source)
         visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
         any_line_capped = any_line_capped or line_capped
         visible_chars += len(visible_line)
@@ -1562,14 +1607,7 @@ def main() -> int:
         visible_source, redacted = line_sanitizer.sanitize(line)  # type: ignore[attr-defined]
         if redacted:
             redacted_lines += 1
-        artifact_capture_bytes, artifact_capture_overflow = capture_sanitized_artifact_line(
-            capture_enabled=args.artifact_receipt,
-            sanitized_line=visible_source,
-            artifact_lines=artifact_lines,
-            capture_bytes=artifact_capture_bytes,
-            capture_overflow=artifact_capture_overflow,
-            max_bytes=args.artifact_max_bytes,
-        )
+        artifact_capture.add(visible_source)
         visible_line, line_capped = cap_line(visible_source, args.max_line_chars)
         any_line_capped = any_line_capped or line_capped
         visible_chars += len(visible_line)
@@ -1602,32 +1640,49 @@ def main() -> int:
             duplicate_line_groups=duplicate_tracker.as_list(),
         )
         if args.artifact_receipt:
-            if artifact_capture_overflow:
+            if artifact_capture.overflow:
                 payload["artifact_receipt"] = {
                     "stored": False,
                     "error": "sanitized_output_exceeds_artifact_max_bytes",
                     "max_bytes": args.artifact_max_bytes,
                     "exact_reexpand": {"available": False, "reason": "artifact size cap exceeded"},
                 }
+            elif artifact_capture.error:
+                payload["artifact_receipt"] = {
+                    "stored": False,
+                    "error": "artifact_receipt_capture_unavailable",
+                    "reason": artifact_capture.error,
+                    "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
+                }
             else:
-                try:
-                    payload["artifact_receipt"] = store_sanitized_artifact_receipt(
-                        sanitized_text="".join(artifact_lines),
-                        command=command,
-                        args=args,
-                        line_sanitizer=line_sanitizer,
-                        redacted_lines=redacted_lines,
-                    )
-                except UnsafeAdjacentModuleError as exc:
-                    print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
-                    return 2
-                except Exception as exc:
+                sanitized_artifact_text = artifact_capture.text()
+                if artifact_capture.error:
                     payload["artifact_receipt"] = {
                         "stored": False,
-                        "error": "artifact_receipt_unavailable",
-                        "reason": f"{exc.__class__.__name__}: {exc}",
-                        "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
+                        "error": "artifact_receipt_capture_unavailable",
+                        "reason": artifact_capture.error,
+                        "exact_reexpand": {"available": False, "reason": "artifact receipt capture unavailable"},
                     }
+                else:
+                    try:
+                        payload["artifact_receipt"] = store_sanitized_artifact_receipt(
+                            sanitized_text=sanitized_artifact_text,
+                            command=command,
+                            args=args,
+                            line_sanitizer=line_sanitizer,
+                            redacted_lines=redacted_lines,
+                        )
+                    except UnsafeAdjacentModuleError as exc:
+                        artifact_capture.close()
+                        print(f"context-guard-kit: unsafe adjacent helper: {exc}", file=sys.stderr)
+                        return 2
+                    except Exception as exc:
+                        payload["artifact_receipt"] = {
+                            "stored": False,
+                            "error": "artifact_receipt_unavailable",
+                            "reason": f"{exc.__class__.__name__}: {exc}",
+                            "exact_reexpand": {"available": False, "reason": "artifact receipt unavailable"},
+                        }
             artifact_receipt = payload.get("artifact_receipt")
             if isinstance(artifact_receipt, dict) and artifact_receipt.get("stored"):
                 next_queries = payload.setdefault("next_queries", [])
@@ -1642,6 +1697,7 @@ def main() -> int:
             sys.stdout.write(render_digest_json(payload, args.max_chars))
         else:
             sys.stdout.write(render_digest_markdown(payload, args.max_chars))
+        artifact_capture.close()
         return rc
     if total <= args.max_lines and visible_chars <= args.max_chars and not any_line_capped:
@@ -1689,6 +1745,7 @@ def main() -> int:
             output += "[context-guard-kit] final summary was capped by --max-chars.\n"
         sys.stdout.write(output)
+    artifact_capture.close()
     return rc