npm - @ictechgy/context-guard - Versions diffs - 0.4.11 → 0.4.12 - Mend

@ictechgy/context-guard 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/CHANGELOG.md +4 -0
package/README.ko.md +19 -12
package/README.md +11 -11
package/package.json +1 -1
package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
package/plugins/context-guard/bin/context-guard +42 -46
package/plugins/context-guard/bin/context-guard-audit +3 -3
package/plugins/context-guard/bin/context-guard-bench +136 -16
package/plugins/context-guard/bin/context-guard-cache-score +29 -2
package/plugins/context-guard/bin/context-guard-compress +89 -27
package/plugins/context-guard/bin/context-guard-filter +88 -18
package/plugins/context-guard/bin/context-guard-pack +28 -2
package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
package/plugins/context-guard/bin/context-guard-sanitize-output +169 -6
package/plugins/context-guard/bin/context-guard-setup +21 -5
package/plugins/context-guard/bin/context-guard-tool-prune +48 -10
package/plugins/context-guard/bin/context-guard-trim-output +109 -52
package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
package/plugins/context-guard/lib/context_guard_commands.py +4 -1

package/plugins/context-guard/bin/context-guard-cache-score CHANGED Viewed

@@ -63,6 +63,7 @@ MAX_JSON_PATH_SEGMENT_CHARS = 64
 MAX_JSON_WALK_NODES = 10_000
 MAX_JSON_WALK_DEPTH = 64
 MAX_JSON_SHAPE_WARNINGS = 200
+MAX_JSON_CANONICAL_COMPARE_BYTES = 200_000
 SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
 DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
 SENSITIVE_JSON_KEY_RE = re.compile(
@@ -93,6 +94,22 @@ def json_bytes(data: Any, *, indent: int | None = None) -> str:
     return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":") if indent is None else None, indent=indent)
+def bounded_canonical_json(data: Any, *, max_bytes: int) -> str | None:
+    encoder = json.JSONEncoder(ensure_ascii=False, sort_keys=True, indent=2)
+    chunks: list[str] = []
+    size = 0
+    for chunk in encoder.iterencode(data):
+        size += byte_len_text(chunk)
+        if size > max_bytes:
+            return None
+        chunks.append(chunk)
+    size += 1
+    if size > max_bytes:
+        return None
+    chunks.append("\n")
+    return "".join(chunks)
 def json_path_child(path: str, key: object) -> str:
     """Return a JSON warning path segment without echoing sensitive/dynamic keys."""
     text = str(key)
@@ -335,8 +352,18 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
     if not isinstance(data, (dict, list)):
         return "json-scalar", []
     warnings = _walk_json(data)
-    canonical = json_bytes(data, indent=2) + "\n"
-    if canonical != text:
+    input_bytes = byte_len_text(text)
+    canonical = bounded_canonical_json(data, max_bytes=MAX_JSON_CANONICAL_COMPARE_BYTES)
+    if canonical is None:
+        warnings.append({
+            "code": "json_canonical_check_skipped",
+            "path": "$",
+            "severity": "info",
+            "message": "JSON input is parseable but canonical formatting would exceed the comparison byte cap.",
+            "input_bytes": input_bytes,
+            "max_bytes": MAX_JSON_CANONICAL_COMPARE_BYTES,
+        })
+    elif canonical != text:
         warnings.append({
             "code": "json_not_canonical",
             "path": "$",

package/plugins/context-guard/bin/context-guard-compress CHANGED Viewed

@@ -20,10 +20,12 @@ import os
 from pathlib import Path
 import re
 import sys
-from typing import Callable
+from typing import Callable, Iterable
 DEFAULT_MAX_BYTES = 10_000_000
 MAX_MAX_BYTES = 100_000_000
+MAX_SEARCH_DEDUPE_KEYS = 50_000
+JSON_PARSE_FAILED = object()
 # 토큰 추정은 보수적 proxy 일 뿐이다(관측값 아님). 평균 ~4 chars/token 휴리스틱을 쓰되
 # 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
 TOKEN_PROXY_CHARS_PER_TOKEN = 4
@@ -214,20 +216,57 @@ def token_proxy(text: str) -> int:
     return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
+LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
+def iter_text_lines(text: str) -> Iterable[str]:
+    """Yield lines with str.splitlines() boundaries without building a line list."""
+    start = 0
+    index = 0
+    length = len(text)
+    while index < length:
+        char = text[index]
+        if char == "\r" and index + 1 < length and text[index + 1] == "\n":
+            yield text[start:index]
+            index += 2
+            start = index
+            continue
+        if char in LINE_BOUNDARY_CHARS:
+            yield text[start:index]
+            index += 1
+            start = index
+            continue
+        index += 1
+    if start < length:
+        yield text[start:]
+def sample_text_lines(text: str, limit: int) -> list[str]:
+    sample: list[str] = []
+    for line in iter_text_lines(text):
+        sample.append(line)
+        if len(sample) >= limit:
+            break
+    return sample
 def classify_content(text: str) -> str:
     """Best-effort content classification into one of CONTENT_TYPES.
-    Order matters: JSON and diff have the strongest unambiguous signals and are
-    checked first; search/log/code are sampled over the first lines; prose is the
-    conservative default so unknown text is never over-compressed.
+    Order matters: valid JSON and diff have the strongest unambiguous signals;
+    search/log/code are sampled over the first lines; prose is the conservative
+    default so unknown text is never over-compressed.
     """
     stripped = text.strip()
     if not stripped:
         return "prose"
     if _looks_like_json(stripped):
         return "json"
-    lines = stripped.splitlines()
-    sample = lines[:200]
+    return classify_non_json_content(stripped)
+def classify_non_json_content(stripped: str) -> str:
+    sample = sample_text_lines(stripped, 200)
     if _looks_like_diff(sample):
         return "diff"
     if _looks_like_search(sample):
@@ -355,14 +394,17 @@ def build_readable_compression_metadata(
     }
-def _looks_like_json(stripped: str) -> bool:
-    if stripped[0] not in "{[":
-        return False
+def parse_json_candidate(stripped: str) -> object:
+    if not stripped or stripped[0] not in "{[":
+        return JSON_PARSE_FAILED
     try:
-        json.loads(stripped)
+        return json.loads(stripped)
     except (ValueError, RecursionError):
-        return False
-    return True
+        return JSON_PARSE_FAILED
+def _looks_like_json(stripped: str) -> bool:
+    return parse_json_candidate(stripped) is not JSON_PARSE_FAILED
 def _ratio(matches: int, total: int, threshold: float) -> bool:
@@ -390,15 +432,7 @@ def _looks_like_code(sample: list[str]) -> bool:
     return _ratio(matches, len(sample), 0.25)
-def compress_json(text: str) -> tuple[str, dict[str, object]]:
-    """Re-serialize JSON without insignificant whitespace (data-preserving)."""
-    try:
-        parsed = json.loads(text)
-    except (ValueError, RecursionError):
-        # 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
-        compressed, detail = compress_prose(text)
-        detail["fallback_from"] = "json"
-        return compressed, detail
+def compress_parsed_json(text: str, parsed: object) -> tuple[str, dict[str, object]]:
     compact = json.dumps(parsed, ensure_ascii=False, separators=(",", ":"))
     if not text.endswith("\n"):
         trailing = ""
@@ -407,6 +441,17 @@ def compress_json(text: str) -> tuple[str, dict[str, object]]:
     return compact + trailing, {"strategy": "json-compact", "lossy": False, "json_parse_ok": True}
+def compress_json(text: str) -> tuple[str, dict[str, object]]:
+    """Re-serialize JSON without insignificant whitespace (data-preserving)."""
+    parsed = parse_json_candidate(text.strip())
+    if parsed is JSON_PARSE_FAILED:
+        # 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
+        compressed, detail = compress_prose(text)
+        detail["fallback_from"] = "json"
+        return compressed, detail
+    return compress_parsed_json(text, parsed)
 def compress_diff(text: str) -> tuple[str, dict[str, object]]:
     """Keep file headers, hunk headers, and +/- changes; collapse context runs."""
     out: list[str] = []
@@ -464,18 +509,28 @@ def compress_log(text: str) -> tuple[str, dict[str, object]]:
 def compress_search(text: str) -> tuple[str, dict[str, object]]:
-    """Drop exact-duplicate match lines while preserving first-seen order."""
+    """Drop exact-duplicate match lines while preserving first-seen order with bounded keys."""
     out: list[str] = []
     seen: set[str] = set()
     dropped = 0
-    for line in text.splitlines():
+    dedupe_limit_reached = False
+    for line in iter_text_lines(text):
         key = line.rstrip()
         if key in seen:
             dropped += 1
             continue
-        seen.add(key)
+        if len(seen) < MAX_SEARCH_DEDUPE_KEYS:
+            seen.add(key)
+        else:
+            dedupe_limit_reached = True
         out.append(line)
-    return _join_lines(out, text), {"strategy": "search-dedupe", "lossy": dropped > 0, "duplicate_lines_dropped": dropped}
+    return _join_lines(out, text), {
+        "strategy": "search-dedupe",
+        "lossy": dropped > 0,
+        "duplicate_lines_dropped": dropped,
+        "dedupe_key_limit": MAX_SEARCH_DEDUPE_KEYS,
+        "dedupe_key_limit_reached": dedupe_limit_reached,
+    }
 def compress_code(text: str) -> tuple[str, dict[str, object]]:
@@ -689,14 +744,21 @@ def compress_text(
     the compressed body, or the metadata that follows.
     """
     sanitized, redacted_lines = sanitize_text(text, show_paths=show_paths)
+    parsed_json: object = JSON_PARSE_FAILED
     if forced_type is not None:
         content_type, type_source = forced_type, "override"
     else:
-        content_type, type_source = classify_content(sanitized), "detected"
+        stripped = sanitized.strip()
+        parsed_json = parse_json_candidate(stripped)
+        content_type = "json" if parsed_json is not JSON_PARSE_FAILED else classify_non_json_content(stripped)
+        type_source = "detected"
     if compression_mode == "readable" and content_type == "prose":
         compressed, strategy_detail = compress_prose_readable(sanitized)
     else:
-        compressed, strategy_detail = STRATEGIES[content_type](sanitized)
+        if content_type == "json" and parsed_json is not JSON_PARSE_FAILED:
+            compressed, strategy_detail = compress_parsed_json(sanitized, parsed_json)
+        else:
+            compressed, strategy_detail = STRATEGIES[content_type](sanitized)
         if compression_mode == "readable":
             strategy_detail["readable_mode"] = True
             strategy_detail["readable_strategy"] = "sentence-window-preview"

package/plugins/context-guard/bin/context-guard-filter CHANGED Viewed

@@ -9,6 +9,8 @@ from __future__ import annotations
 import argparse
 import codecs
+import collections
+import itertools
 from dataclasses import dataclass
 import json
 import os
@@ -455,26 +457,94 @@ def cap_line(line: str, max_chars: int) -> str:
     return line[: max(0, max_chars - len(marker) - len(suffix))] + marker + suffix
-def select_lines(lines: list[str], flt: CompiledFilter, max_line_chars: int) -> list[str]:
-    selected = [cap_line(line, max_line_chars) for line in lines]
-    if flt.include_regex:
-        selected = [line for line in selected if any(pattern.search(line) for pattern in flt.include_regex)]
-    if flt.exclude_regex:
-        selected = [line for line in selected if not any(pattern.search(line) for pattern in flt.exclude_regex)]
+LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
+@dataclass
+class LineSelection:
+    lines: list[str]
+    input_lines: int
+    input_complete: bool
+def iter_text_lines_keepends(text: str) -> Iterable[str]:
+    """Yield lines with Python splitlines(keepends=True) boundaries without a list."""
+    start = 0
+    index = 0
+    length = len(text)
+    while index < length:
+        char = text[index]
+        if char == "\r" and index + 1 < length and text[index + 1] == "\n":
+            yield text[start : index + 2]
+            index += 2
+            start = index
+            continue
+        if char in LINE_BOUNDARY_CHARS:
+            yield text[start : index + 1]
+            index += 1
+            start = index
+            continue
+        index += 1
+    if start < length:
+        yield text[start:]
+def line_matches_filter(line: str, flt: CompiledFilter) -> bool:
+    if flt.include_regex and not any(pattern.search(line) for pattern in flt.include_regex):
+        return False
+    if flt.exclude_regex and any(pattern.search(line) for pattern in flt.exclude_regex):
+        return False
+    return True
+def select_lines_with_stats(lines: Iterable[str], flt: CompiledFilter, max_line_chars: int) -> LineSelection:
+    source_count = 0
+    matched_count = 0
+    input_complete = True
     if flt.head_lines is not None or flt.tail_lines is not None:
         head_n = flt.head_lines if flt.head_lines is not None else 0
         tail_n = flt.tail_lines if flt.tail_lines is not None else 0
-        head = selected[:head_n] if head_n else []
-        tail = selected[-tail_n:] if tail_n else []
-        if head and tail:
-            seen_head_count = len(head)
-            tail = tail[max(0, seen_head_count + len(tail) - len(selected)):]
-        selected = head + tail
+        head: list[str] = []
+        tail: collections.deque[str] = collections.deque(maxlen=tail_n)
+        for source_line in lines:
+            source_count += 1
+            line = cap_line(source_line, max_line_chars)
+            if not line_matches_filter(line, flt):
+                continue
+            matched_count += 1
+            if head_n and len(head) < head_n:
+                head.append(line)
+            if tail_n:
+                tail.append(line)
+            elif head_n and len(head) >= head_n:
+                input_complete = False
+                break
+        tail_list = list(tail)
+        if head and tail_list:
+            tail_list = tail_list[max(0, len(head) + len(tail_list) - matched_count):]
+        selected = head + tail_list
+    else:
+        limit = min(flt.max_lines if flt.max_lines is not None else MAX_EMIT_LINES, MAX_EMIT_LINES)
+        selected = []
+        for source_line in lines:
+            source_count += 1
+            line = cap_line(source_line, max_line_chars)
+            if not line_matches_filter(line, flt):
+                continue
+            matched_count += 1
+            selected.append(line)
+            if len(selected) >= limit:
+                input_complete = False
+                break
     if flt.max_lines is not None and len(selected) > flt.max_lines:
         selected = selected[:flt.max_lines]
     if len(selected) > MAX_EMIT_LINES:
         selected = selected[:MAX_EMIT_LINES]
-    return selected
+    return LineSelection(selected, source_count, input_complete)
+def select_lines(lines: Iterable[str], flt: CompiledFilter, max_line_chars: int) -> list[str]:
+    return select_lines_with_stats(lines, flt, max_line_chars).lines
 def validation_payload(valid: bool, errors: list[str], count: int = 0) -> dict[str, Any]:
@@ -720,7 +790,6 @@ def cmd_run(args: argparse.Namespace) -> int:
     filters, errors = load_filters(Path(args.config).expanduser())
     result = run_command(command, timeout_seconds, max_capture)
     rc = result.returncode
-    output = result.stdout_text + result.stderr_text
     protected_nonzero = rc != 0 and is_protected_command(command)
     report: dict[str, Any] = {"tool": TOOL_NAME, "schema_version": SCHEMA_VERSION, "mode": "run", "command_exit_code": rc, "decision": "passthrough", "reason": "unclassified", "protected_nonzero": protected_nonzero}
     if result.timed_out:
@@ -746,18 +815,19 @@ def cmd_run(args: argparse.Namespace) -> int:
             report["filter_id"] = matched.id
         else:
             try:
-                lines = output.splitlines(keepends=True)
-                filtered = select_lines(lines, matched, max_line_chars)
+                source_lines = itertools.chain(iter_text_lines_keepends(result.stdout_text), iter_text_lines_keepends(result.stderr_text))
+                selection = select_lines_with_stats(source_lines, matched, max_line_chars)
+                filtered = selection.lines
             except re.error as exc:
                 report["reason"] = f"filter-error:{compact(str(exc), 80)}"
                 report["filter_id"] = matched.id
             else:
-                if output and not filtered:
+                if (result.stdout_text or result.stderr_text) and not filtered:
                     report["reason"] = "empty-output-fallback"
                     report["filter_id"] = matched.id
                 else:
                     sys.stdout.write("".join(filtered))
-                    report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": len(lines), "output_lines": len(filtered)})
+                    report.update({"decision": "filtered", "reason": "matched", "filter_id": matched.id, "input_lines": selection.input_lines, "input_lines_complete": selection.input_complete, "output_lines": len(filtered)})
                     emit_run_report(args, report)
                     return rc
     if not result.passthrough_emitted:

package/plugins/context-guard/bin/context-guard-pack CHANGED Viewed

@@ -957,6 +957,29 @@ def metadata_size(data: dict[str, Any]) -> int:
     return len(json.dumps(data, ensure_ascii=False, indent=2, sort_keys=True).encode("utf-8", errors="replace")) + 1
+def receipt_working_copy(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
+    """Copy receipt metadata without deep-copying or serializing an oversized pack body.
+    The pack body is already an immutable string in normal builds and stdout remains
+    authoritative for it.  When it cannot possibly fit under the receipt cap by
+    itself, omit it before the first receipt-size probe so capping work only touches
+    metadata previews.
+    """
+    receipt: dict[str, Any] = {}
+    pack_omitted = False
+    for key, value in data.items():
+        if key == "pack" and isinstance(value, str):
+            if len(value.encode("utf-8", errors="replace")) > MAX_RECEIPT_BYTES:
+                pack_omitted = True
+                continue
+            receipt[key] = value
+            continue
+        receipt[key] = copy.deepcopy(value)
+    if pack_omitted:
+        receipt["pack_omitted_from_receipt"] = True
+    return receipt, pack_omitted
 def artifact_failure(error: str, *, bytes_count: int = 0, capped: bool = False) -> dict[str, Any]:
     return {
         "stored": False,
@@ -1113,8 +1136,11 @@ def finalize_receipt_size(receipt: dict[str, Any]) -> int:
 def shrink_receipt_for_write(data: dict[str, Any]) -> tuple[dict[str, Any], bool]:
-    receipt = copy.deepcopy(data)
-    capped = False
+    receipt, pack_omitted = receipt_working_copy(data)
+    capped = pack_omitted
+    if pack_omitted:
+        receipt.setdefault("artifact", {})["capped"] = True
+        receipt.setdefault("artifact", {})["cap_bytes"] = MAX_RECEIPT_BYTES
     if metadata_size(receipt) <= MAX_RECEIPT_BYTES:
         return receipt, capped
     capped = True

package/plugins/context-guard/bin/context-guard-read-symbol CHANGED Viewed

@@ -11,6 +11,7 @@ import argparse
 import ast
 import errno
 import hashlib
+import importlib.machinery
 import importlib.util
 import json
 import os
@@ -39,8 +40,27 @@ def _load_hook_secret_patterns():
     raise ImportError("hook_secret_patterns.py not found in " + ", ".join(searched))
+def _load_sanitize_output():
+    searched = []
+    for helper_path in (SCRIPT_DIR / "sanitize_output.py", SCRIPT_DIR / "context-guard-sanitize-output"):
+        searched.append(str(helper_path))
+        if not helper_path.is_file():
+            continue
+        loader = importlib.machinery.SourceFileLoader("_claude_token_sanitize_output", str(helper_path))
+        spec = importlib.util.spec_from_loader(loader.name, loader)
+        if spec is None:
+            continue
+        module = importlib.util.module_from_spec(spec)
+        loader.exec_module(module)
+        return module
+    raise ImportError("sanitize_output helper not found in " + ", ".join(searched))
 _hook_secret_patterns = _load_hook_secret_patterns()
+_sanitize_output = _load_sanitize_output()
 hook_label_has_sensitive_evidence = _hook_secret_patterns.hook_label_has_sensitive_evidence
+redact_sensitive_hook_text = _hook_secret_patterns.redact_sensitive_hook_text
+LineSanitizer = _sanitize_output.LineSanitizer
 DEFAULT_CONTEXT_LINES = 3
 DEFAULT_MAX_CHARS = 16_000
@@ -391,6 +411,11 @@ def strip_line_for_brace_count(line: str, in_block_comment: bool = False) -> tup
     return "".join(output), in_block_comment
+def redact_symbol_content(content: str) -> str:
+    sanitizer = LineSanitizer(show_paths=True)
+    return "".join(sanitizer.sanitize(line)[0] for line in content.splitlines(keepends=True))
 def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, show_paths: bool) -> SymbolSlice | None:
     text, scan_truncated = read_text_bounded(path)
     lines = text.splitlines(keepends=True)
@@ -409,6 +434,8 @@ def find_symbol_slice(path: Path, symbol: str, context: int, max_chars: int, sho
     start_with_context = max(0, start - max(0, context))
     end_with_context = min(len(lines), end + max(0, context))
     content = "".join(lines[start_with_context:end_with_context])
+    content = redact_symbol_content(content)
+    content = redact_sensitive_hook_text(content, "[REDACTED]")
     capped = False
     if max_chars > 0 and len(content) > max_chars:
         marker = f"\n[context-guard-kit] symbol slice capped: {len(content)} chars total\n"