@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -28,6 +28,9 @@ MAX_MAX_BYTES = 100_000_000
28
28
  # 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
29
29
  TOKEN_PROXY_CHARS_PER_TOKEN = 4
30
30
  CONTENT_TYPES = ("json", "diff", "log", "search", "code", "prose")
31
+ COMPRESSION_MODES = ("conservative", "readable")
32
+ READABLE_COMPRESSION_SCHEMA_VERSION = "contextguard.compress-readable.v1"
33
+ READABLE_SENTENCE_LIMIT = 5
31
34
 
32
35
  # diff 구조 라인(파일 헤더/헝크/변경)을 식별한다. 나머지 context 라인은 접어서 줄인다.
33
36
  DIFF_FILE_HEADER_RE = re.compile(r"^(diff --git |index [0-9a-f]|--- |\+\+\+ |rename |similarity |new file|deleted file)")
@@ -93,6 +96,20 @@ PROTECTED_DENIED_TRANSFORMS = (
93
96
  "path_rewrite",
94
97
  "quoted_literal_rewrite",
95
98
  )
99
+ READABLE_BLOCKING_PROTECTED_KEYS = (
100
+ "code_fence",
101
+ "diff",
102
+ "hash",
103
+ "path",
104
+ "stack_frame",
105
+ "numeric_constant",
106
+ "quoted_string",
107
+ "json_key",
108
+ )
109
+ PROMPT_LIKE_INSTRUCTION_RE = re.compile(
110
+ r"(?i)\b(ignore (?:all )?(?:previous|above) instructions|system prompt|developer message|"
111
+ r"you are chatgpt|act as (?:a|an)|do not follow|BEGIN (?:SYSTEM|DEVELOPER)|END (?:SYSTEM|DEVELOPER))\b"
112
+ )
96
113
 
97
114
 
98
115
  def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
@@ -301,6 +318,43 @@ def build_transform_policy(protected_policy: dict[str, object]) -> dict[str, obj
301
318
  }
302
319
 
303
320
 
321
+ def build_readable_compression_metadata(
322
+ *,
323
+ content_type: str,
324
+ strategy_detail: dict[str, object],
325
+ lossy: bool,
326
+ ) -> dict[str, object]:
327
+ blocking = strategy_detail.get("readable_blocking_signals", {})
328
+ if not isinstance(blocking, dict):
329
+ blocking = {}
330
+ applied = bool(strategy_detail.get("readable_applied"))
331
+ exact_fallback_required = bool(lossy or applied)
332
+ return {
333
+ "schema_version": READABLE_COMPRESSION_SCHEMA_VERSION,
334
+ "mode": "readable",
335
+ "preview_only": True,
336
+ "applied": applied,
337
+ "content_type": content_type,
338
+ "strategy": strategy_detail.get("strategy"),
339
+ "readable_strategy": strategy_detail.get("readable_strategy", "structural-preview"),
340
+ "omitted_reason": strategy_detail.get("readable_omitted_reason"),
341
+ "blocking_signal_counts": blocking,
342
+ "protected_spans_stored": False,
343
+ "source_verification": {
344
+ "exact_fallback_required": exact_fallback_required,
345
+ "recommended_command": "context-guard-artifact store --command 'readable-mode exact fallback' --json < sanitized-prose.txt",
346
+ "verify_before_edit_or_claim": True,
347
+ },
348
+ "claim_boundary": {
349
+ "deterministic_local_only": True,
350
+ "no_network_model_embedding_or_reranker": True,
351
+ "no_generated_semantic_rewrite": True,
352
+ "byte_and_token_counts_are_local_proxies": True,
353
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
354
+ },
355
+ }
356
+
357
+
304
358
  def _looks_like_json(stripped: str) -> bool:
305
359
  if stripped[0] not in "{[":
306
360
  return False
@@ -434,6 +488,64 @@ def compress_prose(text: str) -> tuple[str, dict[str, object]]:
434
488
  return _whitespace_normalize(text, strategy="prose-whitespace", max_consecutive_blank=1)
435
489
 
436
490
 
491
+ def readable_blocking_signal_counts(text: str, content_type: str) -> dict[str, int]:
492
+ counts = protected_zone_counts(text)
493
+ blocking = {
494
+ key: int(counts.get(key, 0) or 0)
495
+ for key in READABLE_BLOCKING_PROTECTED_KEYS
496
+ if int(counts.get(key, 0) or 0) > 0
497
+ }
498
+ prompt_like = len(PROMPT_LIKE_INSTRUCTION_RE.findall(text))
499
+ if prompt_like:
500
+ blocking["prompt_like_instruction"] = prompt_like
501
+ if content_type != "prose":
502
+ blocking["non_prose_content"] = 1
503
+ return blocking
504
+
505
+
506
+ def split_prose_sentences(text: str) -> list[str]:
507
+ compact = " ".join(text.split())
508
+ if not compact:
509
+ return []
510
+ sentences = re.split(r"(?<=[.!?])\s+", compact)
511
+ return [sentence.strip() for sentence in sentences if sentence.strip()]
512
+
513
+
514
+ def compress_prose_readable(text: str) -> tuple[str, dict[str, object]]:
515
+ """Readable opt-in sentence window for sanitized unprotected prose only."""
516
+ normalized, base_detail = compress_prose(text)
517
+ blocking = readable_blocking_signal_counts(normalized, "prose")
518
+ detail = dict(base_detail)
519
+ detail.update({
520
+ "readable_mode": True,
521
+ "readable_strategy": "sentence-window-preview",
522
+ "readable_blocking_signals": blocking,
523
+ })
524
+ if blocking:
525
+ detail["readable_applied"] = False
526
+ detail["readable_omitted_reason"] = "protected_or_prompt_like_signal"
527
+ return normalized, detail
528
+ sentences = split_prose_sentences(normalized)
529
+ if len(sentences) <= READABLE_SENTENCE_LIMIT:
530
+ detail["readable_applied"] = False
531
+ detail["readable_omitted_reason"] = "short_prose"
532
+ return normalized, detail
533
+ included_sentences = sentences[:3] + sentences[-1:]
534
+ kept = sentences[:3] + [f"[context-guard-readable] {len(sentences) - len(included_sentences)} sentence(s) omitted; retrieve exact source before relying on omitted detail."] + sentences[-1:]
535
+ preview = " ".join(kept)
536
+ if text.endswith("\n"):
537
+ preview += "\n"
538
+ detail.update({
539
+ "strategy": "prose-readable-window",
540
+ "lossy": True,
541
+ "readable_applied": True,
542
+ "sentences_original": len(sentences),
543
+ "sentences_included": len(included_sentences),
544
+ "sentences_omitted": len(sentences) - len(included_sentences),
545
+ })
546
+ return preview, detail
547
+
548
+
437
549
  def _whitespace_normalize(text: str, *, strategy: str, max_consecutive_blank: int) -> tuple[str, dict[str, object]]:
438
550
  out: list[str] = []
439
551
  blank_run = 0
@@ -482,6 +594,7 @@ def build_metadata(
482
594
  input_bytes: int,
483
595
  max_bytes: int,
484
596
  protected_policy_enabled: bool = False,
597
+ compression_mode: str = "conservative",
485
598
  ) -> dict[str, object]:
486
599
  """Assemble the compress receipt: observed byte/line counts plus an estimated token proxy.
487
600
 
@@ -550,6 +663,12 @@ def build_metadata(
550
663
  "Protected lossy structural transform: store the full sanitized text with "
551
664
  "`context-guard-artifact store` and retrieve exact slices before relying on omitted content."
552
665
  )
666
+ if compression_mode == "readable":
667
+ metadata["readable_compression"] = build_readable_compression_metadata(
668
+ content_type=content_type,
669
+ strategy_detail=strategy_detail,
670
+ lossy=lossy,
671
+ )
553
672
  return metadata
554
673
 
555
674
 
@@ -562,6 +681,7 @@ def compress_text(
562
681
  input_bytes: int,
563
682
  max_bytes: int,
564
683
  protected_policy_enabled: bool = False,
684
+ compression_mode: str = "conservative",
565
685
  ) -> tuple[str, dict[str, object]]:
566
686
  """Sanitize first, then classify and compress, then build the receipt.
567
687
 
@@ -573,11 +693,24 @@ def compress_text(
573
693
  content_type, type_source = forced_type, "override"
574
694
  else:
575
695
  content_type, type_source = classify_content(sanitized), "detected"
576
- compressed, strategy_detail = STRATEGIES[content_type](sanitized)
696
+ if compression_mode == "readable" and content_type == "prose":
697
+ compressed, strategy_detail = compress_prose_readable(sanitized)
698
+ else:
699
+ compressed, strategy_detail = STRATEGIES[content_type](sanitized)
700
+ if compression_mode == "readable":
701
+ strategy_detail["readable_mode"] = True
702
+ strategy_detail["readable_strategy"] = "sentence-window-preview"
703
+ strategy_detail["readable_applied"] = False
704
+ strategy_detail["readable_omitted_reason"] = "non_prose_content"
705
+ strategy_detail["readable_blocking_signals"] = {"non_prose_content": 1}
577
706
  # 보수성 보장: 어떤 전략도 입력보다 큰 결과를 내보내지 않는다. 작은 입력에서
578
707
  # 접기 마커가 원본보다 길어지는 경우 살균된 원본을 그대로 유지한다.
579
708
  if byte_length(compressed) >= byte_length(sanitized):
580
709
  compressed = sanitized
710
+ if compression_mode == "readable" and strategy_detail.get("readable_applied"):
711
+ strategy_detail["lossy"] = False
712
+ strategy_detail["readable_applied"] = False
713
+ strategy_detail["readable_omitted_reason"] = "not_smaller_than_input"
581
714
  strategy_detail["reduced"] = False
582
715
  else:
583
716
  strategy_detail["reduced"] = True
@@ -592,6 +725,7 @@ def compress_text(
592
725
  input_bytes=input_bytes,
593
726
  max_bytes=max_bytes,
594
727
  protected_policy_enabled=protected_policy_enabled,
728
+ compression_mode=compression_mode,
595
729
  )
596
730
  return compressed, metadata
597
731
 
@@ -623,6 +757,10 @@ def render_text_receipt(metadata: dict[str, object]) -> str:
623
757
  def run_compress(args: argparse.Namespace) -> int:
624
758
  """Read stdin, compress, then emit JSON or (compressed text + stderr receipt)."""
625
759
  max_bytes = bounded_int(args.max_bytes, DEFAULT_MAX_BYTES, 1, MAX_MAX_BYTES)
760
+ compression_mode = args.mode
761
+ if compression_mode not in COMPRESSION_MODES:
762
+ print(f"context-guard-compress: unknown --mode: {compression_mode}", file=sys.stderr)
763
+ return 2
626
764
  raw_text, input_truncated, input_bytes = read_bounded_stdin(max_bytes)
627
765
  forced_type = args.type
628
766
  if forced_type is not None and forced_type not in STRATEGIES:
@@ -636,6 +774,7 @@ def run_compress(args: argparse.Namespace) -> int:
636
774
  input_bytes=input_bytes,
637
775
  max_bytes=max_bytes,
638
776
  protected_policy_enabled=bool(args.protected_policy),
777
+ compression_mode=compression_mode,
639
778
  )
640
779
  if args.json:
641
780
  payload = {"metadata": metadata, "content": compressed}
@@ -659,6 +798,12 @@ def build_parser() -> argparse.ArgumentParser:
659
798
  default=None,
660
799
  help="force a content type instead of auto-detecting (json/diff/log/search/code/prose)",
661
800
  )
801
+ parser.add_argument(
802
+ "--mode",
803
+ choices=COMPRESSION_MODES,
804
+ default="conservative",
805
+ help="compression policy: conservative keeps existing deterministic strategies; readable adds opt-in readable preview/source-verification metadata",
806
+ )
662
807
  parser.add_argument("--json", action="store_true", help="emit JSON with metadata and compressed content")
663
808
  parser.add_argument(
664
809
  "--protected-policy",