@ictechgy/context-guard 0.4.9 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +28 -0
- package/README.ko.md +59 -31
- package/README.md +85 -36
- package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
- package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/distribution.md +10 -7
- package/docs/experimental-benchmark-fixtures.md +30 -6
- package/package.json +4 -6
- package/packaging/homebrew/context-guard.rb.template +1 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +20 -14
- package/plugins/context-guard/README.md +26 -17
- package/plugins/context-guard/bin/context-guard +147 -25
- package/plugins/context-guard/bin/context-guard-artifact +884 -79
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +665 -0
- package/plugins/context-guard/bin/context-guard-compress +146 -1
- package/plugins/context-guard/bin/context-guard-cost +790 -6
- package/plugins/context-guard/bin/context-guard-experiments +463 -26
- package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
- package/plugins/context-guard/bin/context-guard-filter +163 -7
- package/plugins/context-guard/bin/context-guard-guard-read +3 -0
- package/plugins/context-guard/bin/context-guard-pack +892 -49
- package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-setup +165 -31
- package/plugins/context-guard/bin/context-guard-statusline +490 -283
- package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
- package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +230 -0
- package/plugins/context-guard/skills/setup/SKILL.md +1 -0
- package/context-guard-kit/README.md +0 -91
- package/context-guard-kit/benchmark_runner.py +0 -2401
- package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
- package/context-guard-kit/context_compress.py +0 -695
- package/context-guard-kit/context_escrow.py +0 -935
- package/context-guard-kit/context_filter.py +0 -637
- package/context-guard-kit/context_guard_cli.py +0 -325
- package/context-guard-kit/context_guard_diet.py +0 -1711
- package/context-guard-kit/context_pack.py +0 -2713
- package/context-guard-kit/cost_guard.py +0 -2349
- package/context-guard-kit/experimental_registry.py +0 -4348
- package/context-guard-kit/failed_attempt_nudge.py +0 -567
- package/context-guard-kit/guard_large_read.py +0 -690
- package/context-guard-kit/hook_secret_patterns.py +0 -43
- package/context-guard-kit/read_symbol.py +0 -483
- package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
- package/context-guard-kit/sanitize_output.py +0 -725
- package/context-guard-kit/settings.example.json +0 -67
- package/context-guard-kit/setup_wizard.py +0 -2515
- package/context-guard-kit/statusline.sh +0 -362
- package/context-guard-kit/statusline_merged.sh +0 -157
- package/context-guard-kit/tool_schema_pruner.py +0 -837
- package/context-guard-kit/trim_command_output.py +0 -1449
|
@@ -28,6 +28,9 @@ MAX_MAX_BYTES = 100_000_000
|
|
|
28
28
|
# 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
|
|
29
29
|
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
30
30
|
CONTENT_TYPES = ("json", "diff", "log", "search", "code", "prose")
|
|
31
|
+
COMPRESSION_MODES = ("conservative", "readable")
|
|
32
|
+
READABLE_COMPRESSION_SCHEMA_VERSION = "contextguard.compress-readable.v1"
|
|
33
|
+
READABLE_SENTENCE_LIMIT = 5
|
|
31
34
|
|
|
32
35
|
# diff 구조 라인(파일 헤더/헝크/변경)을 식별한다. 나머지 context 라인은 접어서 줄인다.
|
|
33
36
|
DIFF_FILE_HEADER_RE = re.compile(r"^(diff --git |index [0-9a-f]|--- |\+\+\+ |rename |similarity |new file|deleted file)")
|
|
@@ -93,6 +96,20 @@ PROTECTED_DENIED_TRANSFORMS = (
|
|
|
93
96
|
"path_rewrite",
|
|
94
97
|
"quoted_literal_rewrite",
|
|
95
98
|
)
|
|
99
|
+
READABLE_BLOCKING_PROTECTED_KEYS = (
|
|
100
|
+
"code_fence",
|
|
101
|
+
"diff",
|
|
102
|
+
"hash",
|
|
103
|
+
"path",
|
|
104
|
+
"stack_frame",
|
|
105
|
+
"numeric_constant",
|
|
106
|
+
"quoted_string",
|
|
107
|
+
"json_key",
|
|
108
|
+
)
|
|
109
|
+
PROMPT_LIKE_INSTRUCTION_RE = re.compile(
|
|
110
|
+
r"(?i)\b(ignore (?:all )?(?:previous|above) instructions|system prompt|developer message|"
|
|
111
|
+
r"you are chatgpt|act as (?:a|an)|do not follow|BEGIN (?:SYSTEM|DEVELOPER)|END (?:SYSTEM|DEVELOPER))\b"
|
|
112
|
+
)
|
|
96
113
|
|
|
97
114
|
|
|
98
115
|
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
@@ -301,6 +318,43 @@ def build_transform_policy(protected_policy: dict[str, object]) -> dict[str, obj
|
|
|
301
318
|
}
|
|
302
319
|
|
|
303
320
|
|
|
321
|
+
def build_readable_compression_metadata(
|
|
322
|
+
*,
|
|
323
|
+
content_type: str,
|
|
324
|
+
strategy_detail: dict[str, object],
|
|
325
|
+
lossy: bool,
|
|
326
|
+
) -> dict[str, object]:
|
|
327
|
+
blocking = strategy_detail.get("readable_blocking_signals", {})
|
|
328
|
+
if not isinstance(blocking, dict):
|
|
329
|
+
blocking = {}
|
|
330
|
+
applied = bool(strategy_detail.get("readable_applied"))
|
|
331
|
+
exact_fallback_required = bool(lossy or applied)
|
|
332
|
+
return {
|
|
333
|
+
"schema_version": READABLE_COMPRESSION_SCHEMA_VERSION,
|
|
334
|
+
"mode": "readable",
|
|
335
|
+
"preview_only": True,
|
|
336
|
+
"applied": applied,
|
|
337
|
+
"content_type": content_type,
|
|
338
|
+
"strategy": strategy_detail.get("strategy"),
|
|
339
|
+
"readable_strategy": strategy_detail.get("readable_strategy", "structural-preview"),
|
|
340
|
+
"omitted_reason": strategy_detail.get("readable_omitted_reason"),
|
|
341
|
+
"blocking_signal_counts": blocking,
|
|
342
|
+
"protected_spans_stored": False,
|
|
343
|
+
"source_verification": {
|
|
344
|
+
"exact_fallback_required": exact_fallback_required,
|
|
345
|
+
"recommended_command": "context-guard-artifact store --command 'readable-mode exact fallback' --json < sanitized-prose.txt",
|
|
346
|
+
"verify_before_edit_or_claim": True,
|
|
347
|
+
},
|
|
348
|
+
"claim_boundary": {
|
|
349
|
+
"deterministic_local_only": True,
|
|
350
|
+
"no_network_model_embedding_or_reranker": True,
|
|
351
|
+
"no_generated_semantic_rewrite": True,
|
|
352
|
+
"byte_and_token_counts_are_local_proxies": True,
|
|
353
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
354
|
+
},
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
|
|
304
358
|
def _looks_like_json(stripped: str) -> bool:
|
|
305
359
|
if stripped[0] not in "{[":
|
|
306
360
|
return False
|
|
@@ -434,6 +488,64 @@ def compress_prose(text: str) -> tuple[str, dict[str, object]]:
|
|
|
434
488
|
return _whitespace_normalize(text, strategy="prose-whitespace", max_consecutive_blank=1)
|
|
435
489
|
|
|
436
490
|
|
|
491
|
+
def readable_blocking_signal_counts(text: str, content_type: str) -> dict[str, int]:
|
|
492
|
+
counts = protected_zone_counts(text)
|
|
493
|
+
blocking = {
|
|
494
|
+
key: int(counts.get(key, 0) or 0)
|
|
495
|
+
for key in READABLE_BLOCKING_PROTECTED_KEYS
|
|
496
|
+
if int(counts.get(key, 0) or 0) > 0
|
|
497
|
+
}
|
|
498
|
+
prompt_like = len(PROMPT_LIKE_INSTRUCTION_RE.findall(text))
|
|
499
|
+
if prompt_like:
|
|
500
|
+
blocking["prompt_like_instruction"] = prompt_like
|
|
501
|
+
if content_type != "prose":
|
|
502
|
+
blocking["non_prose_content"] = 1
|
|
503
|
+
return blocking
|
|
504
|
+
|
|
505
|
+
|
|
506
|
+
def split_prose_sentences(text: str) -> list[str]:
|
|
507
|
+
compact = " ".join(text.split())
|
|
508
|
+
if not compact:
|
|
509
|
+
return []
|
|
510
|
+
sentences = re.split(r"(?<=[.!?])\s+", compact)
|
|
511
|
+
return [sentence.strip() for sentence in sentences if sentence.strip()]
|
|
512
|
+
|
|
513
|
+
|
|
514
|
+
def compress_prose_readable(text: str) -> tuple[str, dict[str, object]]:
|
|
515
|
+
"""Readable opt-in sentence window for sanitized unprotected prose only."""
|
|
516
|
+
normalized, base_detail = compress_prose(text)
|
|
517
|
+
blocking = readable_blocking_signal_counts(normalized, "prose")
|
|
518
|
+
detail = dict(base_detail)
|
|
519
|
+
detail.update({
|
|
520
|
+
"readable_mode": True,
|
|
521
|
+
"readable_strategy": "sentence-window-preview",
|
|
522
|
+
"readable_blocking_signals": blocking,
|
|
523
|
+
})
|
|
524
|
+
if blocking:
|
|
525
|
+
detail["readable_applied"] = False
|
|
526
|
+
detail["readable_omitted_reason"] = "protected_or_prompt_like_signal"
|
|
527
|
+
return normalized, detail
|
|
528
|
+
sentences = split_prose_sentences(normalized)
|
|
529
|
+
if len(sentences) <= READABLE_SENTENCE_LIMIT:
|
|
530
|
+
detail["readable_applied"] = False
|
|
531
|
+
detail["readable_omitted_reason"] = "short_prose"
|
|
532
|
+
return normalized, detail
|
|
533
|
+
included_sentences = sentences[:3] + sentences[-1:]
|
|
534
|
+
kept = sentences[:3] + [f"[context-guard-readable] {len(sentences) - len(included_sentences)} sentence(s) omitted; retrieve exact source before relying on omitted detail."] + sentences[-1:]
|
|
535
|
+
preview = " ".join(kept)
|
|
536
|
+
if text.endswith("\n"):
|
|
537
|
+
preview += "\n"
|
|
538
|
+
detail.update({
|
|
539
|
+
"strategy": "prose-readable-window",
|
|
540
|
+
"lossy": True,
|
|
541
|
+
"readable_applied": True,
|
|
542
|
+
"sentences_original": len(sentences),
|
|
543
|
+
"sentences_included": len(included_sentences),
|
|
544
|
+
"sentences_omitted": len(sentences) - len(included_sentences),
|
|
545
|
+
})
|
|
546
|
+
return preview, detail
|
|
547
|
+
|
|
548
|
+
|
|
437
549
|
def _whitespace_normalize(text: str, *, strategy: str, max_consecutive_blank: int) -> tuple[str, dict[str, object]]:
|
|
438
550
|
out: list[str] = []
|
|
439
551
|
blank_run = 0
|
|
@@ -482,6 +594,7 @@ def build_metadata(
|
|
|
482
594
|
input_bytes: int,
|
|
483
595
|
max_bytes: int,
|
|
484
596
|
protected_policy_enabled: bool = False,
|
|
597
|
+
compression_mode: str = "conservative",
|
|
485
598
|
) -> dict[str, object]:
|
|
486
599
|
"""Assemble the compress receipt: observed byte/line counts plus an estimated token proxy.
|
|
487
600
|
|
|
@@ -550,6 +663,12 @@ def build_metadata(
|
|
|
550
663
|
"Protected lossy structural transform: store the full sanitized text with "
|
|
551
664
|
"`context-guard-artifact store` and retrieve exact slices before relying on omitted content."
|
|
552
665
|
)
|
|
666
|
+
if compression_mode == "readable":
|
|
667
|
+
metadata["readable_compression"] = build_readable_compression_metadata(
|
|
668
|
+
content_type=content_type,
|
|
669
|
+
strategy_detail=strategy_detail,
|
|
670
|
+
lossy=lossy,
|
|
671
|
+
)
|
|
553
672
|
return metadata
|
|
554
673
|
|
|
555
674
|
|
|
@@ -562,6 +681,7 @@ def compress_text(
|
|
|
562
681
|
input_bytes: int,
|
|
563
682
|
max_bytes: int,
|
|
564
683
|
protected_policy_enabled: bool = False,
|
|
684
|
+
compression_mode: str = "conservative",
|
|
565
685
|
) -> tuple[str, dict[str, object]]:
|
|
566
686
|
"""Sanitize first, then classify and compress, then build the receipt.
|
|
567
687
|
|
|
@@ -573,11 +693,24 @@ def compress_text(
|
|
|
573
693
|
content_type, type_source = forced_type, "override"
|
|
574
694
|
else:
|
|
575
695
|
content_type, type_source = classify_content(sanitized), "detected"
|
|
576
|
-
|
|
696
|
+
if compression_mode == "readable" and content_type == "prose":
|
|
697
|
+
compressed, strategy_detail = compress_prose_readable(sanitized)
|
|
698
|
+
else:
|
|
699
|
+
compressed, strategy_detail = STRATEGIES[content_type](sanitized)
|
|
700
|
+
if compression_mode == "readable":
|
|
701
|
+
strategy_detail["readable_mode"] = True
|
|
702
|
+
strategy_detail["readable_strategy"] = "sentence-window-preview"
|
|
703
|
+
strategy_detail["readable_applied"] = False
|
|
704
|
+
strategy_detail["readable_omitted_reason"] = "non_prose_content"
|
|
705
|
+
strategy_detail["readable_blocking_signals"] = {"non_prose_content": 1}
|
|
577
706
|
# 보수성 보장: 어떤 전략도 입력보다 큰 결과를 내보내지 않는다. 작은 입력에서
|
|
578
707
|
# 접기 마커가 원본보다 길어지는 경우 살균된 원본을 그대로 유지한다.
|
|
579
708
|
if byte_length(compressed) >= byte_length(sanitized):
|
|
580
709
|
compressed = sanitized
|
|
710
|
+
if compression_mode == "readable" and strategy_detail.get("readable_applied"):
|
|
711
|
+
strategy_detail["lossy"] = False
|
|
712
|
+
strategy_detail["readable_applied"] = False
|
|
713
|
+
strategy_detail["readable_omitted_reason"] = "not_smaller_than_input"
|
|
581
714
|
strategy_detail["reduced"] = False
|
|
582
715
|
else:
|
|
583
716
|
strategy_detail["reduced"] = True
|
|
@@ -592,6 +725,7 @@ def compress_text(
|
|
|
592
725
|
input_bytes=input_bytes,
|
|
593
726
|
max_bytes=max_bytes,
|
|
594
727
|
protected_policy_enabled=protected_policy_enabled,
|
|
728
|
+
compression_mode=compression_mode,
|
|
595
729
|
)
|
|
596
730
|
return compressed, metadata
|
|
597
731
|
|
|
@@ -623,6 +757,10 @@ def render_text_receipt(metadata: dict[str, object]) -> str:
|
|
|
623
757
|
def run_compress(args: argparse.Namespace) -> int:
|
|
624
758
|
"""Read stdin, compress, then emit JSON or (compressed text + stderr receipt)."""
|
|
625
759
|
max_bytes = bounded_int(args.max_bytes, DEFAULT_MAX_BYTES, 1, MAX_MAX_BYTES)
|
|
760
|
+
compression_mode = args.mode
|
|
761
|
+
if compression_mode not in COMPRESSION_MODES:
|
|
762
|
+
print(f"context-guard-compress: unknown --mode: {compression_mode}", file=sys.stderr)
|
|
763
|
+
return 2
|
|
626
764
|
raw_text, input_truncated, input_bytes = read_bounded_stdin(max_bytes)
|
|
627
765
|
forced_type = args.type
|
|
628
766
|
if forced_type is not None and forced_type not in STRATEGIES:
|
|
@@ -636,6 +774,7 @@ def run_compress(args: argparse.Namespace) -> int:
|
|
|
636
774
|
input_bytes=input_bytes,
|
|
637
775
|
max_bytes=max_bytes,
|
|
638
776
|
protected_policy_enabled=bool(args.protected_policy),
|
|
777
|
+
compression_mode=compression_mode,
|
|
639
778
|
)
|
|
640
779
|
if args.json:
|
|
641
780
|
payload = {"metadata": metadata, "content": compressed}
|
|
@@ -659,6 +798,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
659
798
|
default=None,
|
|
660
799
|
help="force a content type instead of auto-detecting (json/diff/log/search/code/prose)",
|
|
661
800
|
)
|
|
801
|
+
parser.add_argument(
|
|
802
|
+
"--mode",
|
|
803
|
+
choices=COMPRESSION_MODES,
|
|
804
|
+
default="conservative",
|
|
805
|
+
help="compression policy: conservative keeps existing deterministic strategies; readable adds opt-in readable preview/source-verification metadata",
|
|
806
|
+
)
|
|
662
807
|
parser.add_argument("--json", action="store_true", help="emit JSON with metadata and compressed content")
|
|
663
808
|
parser.add_argument(
|
|
664
809
|
"--protected-policy",
|