@ictechgy/context-guard 0.4.10 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/CHANGELOG.md +17 -1
  2. package/README.ko.md +46 -28
  3. package/README.md +42 -33
  4. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  5. package/docs/benchmark-workflow-examples.md +3 -0
  6. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  7. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  8. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  9. package/docs/experimental-benchmark-fixtures.md +24 -7
  10. package/package.json +2 -1
  11. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  12. package/plugins/context-guard/README.ko.md +14 -11
  13. package/plugins/context-guard/README.md +15 -14
  14. package/plugins/context-guard/bin/context-guard +48 -17
  15. package/plugins/context-guard/bin/context-guard-artifact +342 -33
  16. package/plugins/context-guard/bin/context-guard-audit +36 -5
  17. package/plugins/context-guard/bin/context-guard-bench +1675 -44
  18. package/plugins/context-guard/bin/context-guard-cache-score +347 -35
  19. package/plugins/context-guard/bin/context-guard-compress +89 -27
  20. package/plugins/context-guard/bin/context-guard-cost +7 -2
  21. package/plugins/context-guard/bin/context-guard-experiments +364 -8
  22. package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
  23. package/plugins/context-guard/bin/context-guard-filter +88 -18
  24. package/plugins/context-guard/bin/context-guard-pack +329 -19
  25. package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
  26. package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
  27. package/plugins/context-guard/bin/context-guard-setup +21 -5
  28. package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
  29. package/plugins/context-guard/bin/context-guard-trim-output +394 -90
  30. package/plugins/context-guard/brief/README.md +5 -5
  31. package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
  32. package/plugins/context-guard/lib/context_guard_commands.py +217 -190
@@ -23,6 +23,10 @@ TOOL_NAME = "context-guard-cache-score"
23
23
  SCHEMA_VERSION = "contextguard.cache-score.v1"
24
24
  DEFAULT_MAX_INPUT_BYTES = 1_000_000
25
25
  TOKEN_PROXY_CHARS_PER_TOKEN = 4
26
+ DEFAULT_EXPECTED_REUSES = 1
27
+ MAX_EXPECTED_REUSES = 1_000_000
28
+ MAX_CACHE_MULTIPLIER = 1_000_000.0
29
+ SAVINGS_EPSILON = 1e-12
26
30
  PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
27
31
  # Provider and model minimums move over time. These defaults are advisory
28
32
  # and can be overridden with --minimum-cacheable-tokens.
@@ -56,6 +60,10 @@ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
56
60
  "var": Path("/private/var"),
57
61
  }
58
62
  MAX_JSON_PATH_SEGMENT_CHARS = 64
63
+ MAX_JSON_WALK_NODES = 10_000
64
+ MAX_JSON_WALK_DEPTH = 64
65
+ MAX_JSON_SHAPE_WARNINGS = 200
66
+ MAX_JSON_CANONICAL_COMPARE_BYTES = 200_000
59
67
  SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
60
68
  DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
61
69
  SENSITIVE_JSON_KEY_RE = re.compile(
@@ -86,6 +94,22 @@ def json_bytes(data: Any, *, indent: int | None = None) -> str:
86
94
  return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":") if indent is None else None, indent=indent)
87
95
 
88
96
 
97
+ def bounded_canonical_json(data: Any, *, max_bytes: int) -> str | None:
98
+ encoder = json.JSONEncoder(ensure_ascii=False, sort_keys=True, indent=2)
99
+ chunks: list[str] = []
100
+ size = 0
101
+ for chunk in encoder.iterencode(data):
102
+ size += byte_len_text(chunk)
103
+ if size > max_bytes:
104
+ return None
105
+ chunks.append(chunk)
106
+ size += 1
107
+ if size > max_bytes:
108
+ return None
109
+ chunks.append("\n")
110
+ return "".join(chunks)
111
+
112
+
89
113
  def json_path_child(path: str, key: object) -> str:
90
114
  """Return a JSON warning path segment without echoing sensitive/dynamic keys."""
91
115
  text = str(key)
@@ -110,6 +134,30 @@ def bounded_int(value: object, *, default: int, minimum: int, maximum: int, name
110
134
  return number
111
135
 
112
136
 
137
+ def bounded_float(
138
+ value: object,
139
+ *,
140
+ minimum: float,
141
+ maximum: float,
142
+ name: str,
143
+ ) -> float | None:
144
+ if value is None:
145
+ return None
146
+ if isinstance(value, bool):
147
+ fail(f"{name} must be a finite number")
148
+ try:
149
+ number = float(value)
150
+ except (TypeError, ValueError, OverflowError):
151
+ fail(f"{name} must be a finite number")
152
+ if not math.isfinite(number):
153
+ fail(f"{name} must be finite")
154
+ if number < minimum:
155
+ fail(f"{name} must be >= {minimum:g}")
156
+ if number > maximum:
157
+ fail(f"{name} must be <= {maximum:g}")
158
+ return number
159
+
160
+
113
161
  def normalized_link_target(parent: Path, raw_target: str) -> Path:
114
162
  target = Path(raw_target)
115
163
  if not target.is_absolute():
@@ -197,39 +245,102 @@ def first_dynamic_marker(text: str) -> tuple[int | None, str | None]:
197
245
  return best_offset, best_name
198
246
 
199
247
 
200
- def _walk_json(value: Any, path: str = "$") -> list[dict[str, Any]]:
248
+ def _walk_json(
249
+ value: Any,
250
+ path: str = "$",
251
+ *,
252
+ max_nodes: int = MAX_JSON_WALK_NODES,
253
+ max_depth: int = MAX_JSON_WALK_DEPTH,
254
+ max_warnings: int = MAX_JSON_SHAPE_WARNINGS,
255
+ ) -> list[dict[str, Any]]:
201
256
  warnings: list[dict[str, Any]] = []
202
- if isinstance(value, dict):
203
- keys = [str(key) for key in value]
204
- if keys != sorted(keys):
205
- warnings.append({
206
- "code": "json_object_key_order_not_sorted",
207
- "path": path,
208
- "severity": "info",
209
- "message": "Object keys are not in deterministic sorted order; keep generated JSON stable across runs.",
210
- })
211
- for key, item in value.items():
212
- child_path = json_path_child(path, key)
213
- if DYNAMIC_JSON_KEY_RE.search(str(key)):
214
- warnings.append({
215
- "code": "dynamic_json_key",
216
- "path": child_path,
217
- "severity": "warn",
218
- "message": "Dynamic-looking JSON key appears in the prompt/request; place dynamic values after the reusable prefix.",
219
- })
220
- warnings.extend(_walk_json(item, child_path))
221
- elif isinstance(value, list):
222
- if path.endswith(".tools") and all(isinstance(item, dict) and "name" in item for item in value):
223
- names = [str(item.get("name")) for item in value]
224
- if names != sorted(names):
225
- warnings.append({
226
- "code": "tool_order_not_sorted",
227
- "path": path,
257
+ capped_nodes = False
258
+ capped_depth = False
259
+ capped_warnings = False
260
+
261
+ def add_warning(item: dict[str, Any]) -> None:
262
+ nonlocal capped_warnings
263
+ if len(warnings) < max_warnings:
264
+ warnings.append(item)
265
+ else:
266
+ capped_warnings = True
267
+
268
+ stack: list[tuple[Any, str, int]] = [(value, path, 0)]
269
+ visited = 0
270
+ while stack:
271
+ if visited >= max_nodes:
272
+ capped_nodes = True
273
+ break
274
+ current, current_path, depth = stack.pop()
275
+ visited += 1
276
+ if depth >= max_depth and isinstance(current, (dict, list)) and current:
277
+ capped_depth = True
278
+ continue
279
+ if isinstance(current, dict):
280
+ previous_key: str | None = None
281
+ keys_sorted = True
282
+ remaining_child_slots = max(0, max_nodes - visited - len(stack))
283
+ child_items: list[tuple[Any, str, int]] = []
284
+ for key, item in current.items():
285
+ text_key = str(key)
286
+ if previous_key is not None and text_key < previous_key:
287
+ keys_sorted = False
288
+ previous_key = text_key
289
+ child_path = json_path_child(current_path, key)
290
+ if DYNAMIC_JSON_KEY_RE.search(text_key):
291
+ add_warning({
292
+ "code": "dynamic_json_key",
293
+ "path": child_path,
294
+ "severity": "warn",
295
+ "message": "Dynamic-looking JSON key appears in the prompt/request; place dynamic values after the reusable prefix.",
296
+ })
297
+ if len(child_items) >= remaining_child_slots:
298
+ capped_nodes = True
299
+ break
300
+ child_items.append((item, child_path, depth + 1))
301
+ if not keys_sorted:
302
+ add_warning({
303
+ "code": "json_object_key_order_not_sorted",
304
+ "path": current_path,
228
305
  "severity": "info",
229
- "message": "Tool definitions are not sorted by name; deterministic ordering improves prefix reuse.",
306
+ "message": "Object keys are not in deterministic sorted order; keep generated JSON stable across runs.",
230
307
  })
231
- for index, item in enumerate(value):
232
- warnings.extend(_walk_json(item, f"{path}[{index}]"))
308
+ stack.extend(reversed(child_items))
309
+ elif isinstance(current, list):
310
+ if current_path.endswith(".tools") and all(isinstance(item, dict) and "name" in item for item in current):
311
+ names = [str(item.get("name")) for item in current]
312
+ if names != sorted(names):
313
+ add_warning({
314
+ "code": "tool_order_not_sorted",
315
+ "path": current_path,
316
+ "severity": "info",
317
+ "message": "Tool definitions are not sorted by name; deterministic ordering improves prefix reuse.",
318
+ })
319
+ remaining_child_slots = max(0, max_nodes - visited - len(stack))
320
+ child_items = []
321
+ for index, item in enumerate(current):
322
+ if len(child_items) >= remaining_child_slots:
323
+ capped_nodes = True
324
+ break
325
+ child_items.append((item, f"{current_path}[{index}]", depth + 1))
326
+ stack.extend(reversed(child_items))
327
+ if capped_nodes or capped_depth or capped_warnings:
328
+ cap_warning = {
329
+ "code": "json_walk_truncated",
330
+ "path": "$",
331
+ "severity": "warn",
332
+ "message": "JSON shape analysis was capped by node, depth, or warning limits; rerun on a narrower prompt fixture for complete linting.",
333
+ "nodes_visited": visited,
334
+ "max_nodes": max_nodes,
335
+ "max_depth": max_depth,
336
+ "max_warnings": max_warnings,
337
+ }
338
+ if max_warnings <= 0:
339
+ return warnings
340
+ if len(warnings) < max_warnings:
341
+ warnings.append(cap_warning)
342
+ elif warnings:
343
+ warnings[-1] = cap_warning
233
344
  return warnings
234
345
 
235
346
 
@@ -241,8 +352,18 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
241
352
  if not isinstance(data, (dict, list)):
242
353
  return "json-scalar", []
243
354
  warnings = _walk_json(data)
244
- canonical = json_bytes(data, indent=2) + "\n"
245
- if canonical != text:
355
+ input_bytes = byte_len_text(text)
356
+ canonical = bounded_canonical_json(data, max_bytes=MAX_JSON_CANONICAL_COMPARE_BYTES)
357
+ if canonical is None:
358
+ warnings.append({
359
+ "code": "json_canonical_check_skipped",
360
+ "path": "$",
361
+ "severity": "info",
362
+ "message": "JSON input is parseable but canonical formatting would exceed the comparison byte cap.",
363
+ "input_bytes": input_bytes,
364
+ "max_bytes": MAX_JSON_CANONICAL_COMPARE_BYTES,
365
+ })
366
+ elif canonical != text:
246
367
  warnings.append({
247
368
  "code": "json_not_canonical",
248
369
  "path": "$",
@@ -252,7 +373,141 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
252
373
  return "json", warnings
253
374
 
254
375
 
255
- def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) -> dict[str, Any]:
376
+ def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
377
+ return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))
378
+
379
+
380
+ def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
381
+ """Return the largest reuse count with strictly positive relative savings."""
382
+ candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
383
+ while candidate > 0 and read_premium_relative_savings(
384
+ candidate,
385
+ write_multiplier=write_multiplier,
386
+ read_multiplier=read_multiplier,
387
+ ) <= SAVINGS_EPSILON:
388
+ candidate -= 1
389
+ while read_premium_relative_savings(
390
+ candidate + 1,
391
+ write_multiplier=write_multiplier,
392
+ read_multiplier=read_multiplier,
393
+ ) > SAVINGS_EPSILON:
394
+ candidate += 1
395
+ return candidate
396
+
397
+
398
+ def build_amortization_report(
399
+ *,
400
+ eligible: bool,
401
+ prefix_tokens: int,
402
+ expected_reuses: int,
403
+ cache_write_multiplier: float | None,
404
+ cache_read_multiplier: float | None,
405
+ ) -> dict[str, Any]:
406
+ """Return advisory cache amortization math using user-supplied multipliers.
407
+
408
+ ``expected_reuses`` means future cache reads after the initial cache write.
409
+ Multipliers are relative to uncached prefix input cost = 1.0. Provider
410
+ pricing/cache policies change, so ContextGuard intentionally does not ship
411
+ provider-specific multiplier defaults.
412
+ """
413
+ supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
414
+ break_even_reuses: int | None = None
415
+ max_profitable_reuses: int | None = None
416
+ expected_uncached_relative_cost: float | None = None
417
+ expected_cached_relative_cost: float | None = None
418
+ expected_relative_savings: float | None = None
419
+ status = "multipliers_not_supplied"
420
+ risk = "unknown"
421
+
422
+ if not eligible:
423
+ status = "not_cacheable"
424
+ risk = "high"
425
+ elif not supplied:
426
+ status = "multipliers_not_supplied"
427
+ risk = "unknown"
428
+ else:
429
+ expected_uncached_relative_cost = 1.0 + expected_reuses
430
+ expected_cached_relative_cost = cache_write_multiplier + (expected_reuses * cache_read_multiplier)
431
+ expected_relative_savings = expected_uncached_relative_cost - expected_cached_relative_cost
432
+ if cache_read_multiplier < 1.0:
433
+ if cache_write_multiplier <= 1.0:
434
+ break_even_reuses = 0
435
+ else:
436
+ break_even_reuses = int(math.ceil((cache_write_multiplier - 1.0) / (1.0 - cache_read_multiplier)))
437
+ if expected_reuses >= break_even_reuses:
438
+ status = "already_break_even_on_write" if break_even_reuses == 0 else "amortizes_with_expected_reuses"
439
+ risk = "low"
440
+ elif expected_reuses > 0:
441
+ status = "not_enough_expected_reuses"
442
+ risk = "medium"
443
+ else:
444
+ status = "not_enough_expected_reuses"
445
+ risk = "high"
446
+ elif cache_read_multiplier == 1.0 and cache_write_multiplier <= 1.0:
447
+ break_even_reuses = 0
448
+ status = "already_break_even_on_write"
449
+ risk = "low"
450
+ elif cache_read_multiplier > 1.0:
451
+ if cache_write_multiplier < 1.0:
452
+ max_profitable_reuses = max_profitable_read_premium_reuses(
453
+ write_multiplier=cache_write_multiplier,
454
+ read_multiplier=cache_read_multiplier,
455
+ )
456
+ if expected_relative_savings < -SAVINGS_EPSILON:
457
+ status = "no_read_discount"
458
+ risk = "high"
459
+ elif expected_reuses == 0:
460
+ if expected_relative_savings > SAVINGS_EPSILON:
461
+ status = "write_discount_only_no_expected_reads"
462
+ risk = "low"
463
+ else:
464
+ status = "break_even_only_no_expected_reads"
465
+ risk = "medium"
466
+ elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
467
+ status = "break_even_only_with_limited_reuses"
468
+ risk = "medium"
469
+ else:
470
+ status = "positive_only_with_limited_reuses"
471
+ risk = "medium"
472
+ else:
473
+ status = "no_read_discount"
474
+ risk = "high"
475
+
476
+ return {
477
+ "expected_reuses": expected_reuses,
478
+ "expected_reuses_semantics": "future_cache_reads_after_initial_write",
479
+ "cacheable_prefix_tokens": prefix_tokens,
480
+ "break_even_reuses": break_even_reuses,
481
+ "max_profitable_reuses": max_profitable_reuses,
482
+ "status": status,
483
+ "risk": risk,
484
+ "cache_write_multiplier": cache_write_multiplier,
485
+ "cache_read_multiplier": cache_read_multiplier,
486
+ "expected_uncached_relative_cost": expected_uncached_relative_cost,
487
+ "expected_cached_relative_cost": expected_cached_relative_cost,
488
+ "expected_relative_savings": expected_relative_savings,
489
+ "multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
490
+ "user_supplied_multipliers": supplied,
491
+ "formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
492
+ "claim_boundary": {
493
+ "advisory_only": True,
494
+ "provider_pricing_defaults_included": False,
495
+ "provider_measured_cache_hit": False,
496
+ "hosted_api_token_or_cost_savings_claim_allowed": False,
497
+ "requires_user_supplied_or_provider_documented_multipliers": True,
498
+ },
499
+ }
500
+
501
+
502
+ def score_prompt(
503
+ text: str,
504
+ *,
505
+ provider: str,
506
+ minimum_cacheable_tokens: int,
507
+ expected_reuses: int = DEFAULT_EXPECTED_REUSES,
508
+ cache_write_multiplier: float | None = None,
509
+ cache_read_multiplier: float | None = None,
510
+ ) -> dict[str, Any]:
256
511
  prompt_kind, shape_warnings = json_shape_warnings(text)
257
512
  dynamic_offset, dynamic_marker = first_dynamic_marker(text)
258
513
  prefix_text = text if dynamic_offset is None else text[:dynamic_offset]
@@ -282,13 +537,14 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
282
537
  "message": "Anthropic caching usually requires cache_control around the reusable prefix.",
283
538
  })
284
539
 
540
+ eligible = prefix_estimated >= minimum_cacheable_tokens
285
541
  return {
286
542
  "tool": TOOL_NAME,
287
543
  "schema_version": SCHEMA_VERSION,
288
544
  "provider": provider,
289
545
  "prompt_kind": prompt_kind,
290
546
  "minimum_cacheable_tokens": minimum_cacheable_tokens,
291
- "eligible": prefix_estimated >= minimum_cacheable_tokens,
547
+ "eligible": eligible,
292
548
  "estimated_tokens": estimated,
293
549
  "cacheable_prefix_tokens": prefix_estimated,
294
550
  "token_estimate": {
@@ -305,6 +561,13 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
305
561
  "static_prefix_ratio": round(static_ratio, 6),
306
562
  "warnings": warnings,
307
563
  "provider_caveat": PROVIDER_CAVEATS[provider],
564
+ "amortization": build_amortization_report(
565
+ eligible=eligible,
566
+ prefix_tokens=prefix_estimated,
567
+ expected_reuses=expected_reuses,
568
+ cache_write_multiplier=cache_write_multiplier,
569
+ cache_read_multiplier=cache_read_multiplier,
570
+ ),
308
571
  "raw_prompt_stored": False,
309
572
  "claim_boundary": {
310
573
  "advisory_only": True,
@@ -320,11 +583,16 @@ def render_text(report: dict[str, Any]) -> str:
320
583
  status = "eligible" if report.get("eligible") else "not eligible"
321
584
  warnings = report.get("warnings") if isinstance(report.get("warnings"), list) else []
322
585
  warning_codes = ", ".join(str(item.get("code")) for item in warnings if isinstance(item, dict)) or "none"
586
+ amortization = report.get("amortization") if isinstance(report.get("amortization"), dict) else {}
323
587
  return (
324
588
  f"{TOOL_NAME}: {status} for {report['provider']} "
325
589
  f"(static_prefix≈{report['cacheable_prefix_tokens']} char/4 tokens, "
326
590
  f"minimum={report['minimum_cacheable_tokens']})\n"
327
591
  f"warnings: {warning_codes}\n"
592
+ f"amortization: {amortization.get('status', 'unknown')} "
593
+ f"(risk={amortization.get('risk', 'unknown')}, "
594
+ f"break_even_reuses={amortization.get('break_even_reuses')}, "
595
+ f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
328
596
  "claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
329
597
  )
330
598
 
@@ -344,6 +612,24 @@ def build_parser() -> argparse.ArgumentParser:
344
612
  help="override provider threshold for model/platform-specific cache minimums",
345
613
  )
346
614
  parser.add_argument("--max-input-bytes", default=DEFAULT_MAX_INPUT_BYTES, help=f"maximum input bytes (default: {DEFAULT_MAX_INPUT_BYTES})")
615
+ parser.add_argument(
616
+ "--expected-reuses",
617
+ default=DEFAULT_EXPECTED_REUSES,
618
+ help=(
619
+ "future cache reads expected after the initial write; advisory only "
620
+ f"(default: {DEFAULT_EXPECTED_REUSES})"
621
+ ),
622
+ )
623
+ parser.add_argument(
624
+ "--cache-write-multiplier",
625
+ default=None,
626
+ help="optional user-supplied cache write multiplier relative to uncached prefix input cost=1.0",
627
+ )
628
+ parser.add_argument(
629
+ "--cache-read-multiplier",
630
+ default=None,
631
+ help="optional user-supplied cache read multiplier relative to uncached prefix input cost=1.0",
632
+ )
347
633
  parser.add_argument("--json", action="store_true", help="emit stable JSON")
348
634
  return parser
349
635
 
@@ -362,8 +648,34 @@ def main(argv: list[str] | None = None) -> int:
362
648
  maximum=10_000_000,
363
649
  name="--minimum-cacheable-tokens",
364
650
  )
651
+ expected_reuses = bounded_int(
652
+ args.expected_reuses,
653
+ default=DEFAULT_EXPECTED_REUSES,
654
+ minimum=0,
655
+ maximum=MAX_EXPECTED_REUSES,
656
+ name="--expected-reuses",
657
+ )
658
+ cache_write_multiplier = bounded_float(
659
+ args.cache_write_multiplier,
660
+ minimum=0.0,
661
+ maximum=MAX_CACHE_MULTIPLIER,
662
+ name="--cache-write-multiplier",
663
+ )
664
+ cache_read_multiplier = bounded_float(
665
+ args.cache_read_multiplier,
666
+ minimum=0.0,
667
+ maximum=MAX_CACHE_MULTIPLIER,
668
+ name="--cache-read-multiplier",
669
+ )
365
670
  text = read_limited_path(Path(args.input), max_input_bytes) if args.input else read_limited_stdin(max_input_bytes)
366
- report = score_prompt(text, provider=provider, minimum_cacheable_tokens=minimum)
671
+ report = score_prompt(
672
+ text,
673
+ provider=provider,
674
+ minimum_cacheable_tokens=minimum,
675
+ expected_reuses=expected_reuses,
676
+ cache_write_multiplier=cache_write_multiplier,
677
+ cache_read_multiplier=cache_read_multiplier,
678
+ )
367
679
  if args.json:
368
680
  sys.stdout.write(json_bytes(report, indent=2) + "\n")
369
681
  else:
@@ -20,10 +20,12 @@ import os
20
20
  from pathlib import Path
21
21
  import re
22
22
  import sys
23
- from typing import Callable
23
+ from typing import Callable, Iterable
24
24
 
25
25
  DEFAULT_MAX_BYTES = 10_000_000
26
26
  MAX_MAX_BYTES = 100_000_000
27
+ MAX_SEARCH_DEDUPE_KEYS = 50_000
28
+ JSON_PARSE_FAILED = object()
27
29
  # 토큰 추정은 보수적 proxy 일 뿐이다(관측값 아님). 평균 ~4 chars/token 휴리스틱을 쓰되
28
30
  # 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
29
31
  TOKEN_PROXY_CHARS_PER_TOKEN = 4
@@ -214,20 +216,57 @@ def token_proxy(text: str) -> int:
214
216
  return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
215
217
 
216
218
 
219
+ LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
220
+
221
+
222
+ def iter_text_lines(text: str) -> Iterable[str]:
223
+ """Yield lines with str.splitlines() boundaries without building a line list."""
224
+ start = 0
225
+ index = 0
226
+ length = len(text)
227
+ while index < length:
228
+ char = text[index]
229
+ if char == "\r" and index + 1 < length and text[index + 1] == "\n":
230
+ yield text[start:index]
231
+ index += 2
232
+ start = index
233
+ continue
234
+ if char in LINE_BOUNDARY_CHARS:
235
+ yield text[start:index]
236
+ index += 1
237
+ start = index
238
+ continue
239
+ index += 1
240
+ if start < length:
241
+ yield text[start:]
242
+
243
+
244
+ def sample_text_lines(text: str, limit: int) -> list[str]:
245
+ sample: list[str] = []
246
+ for line in iter_text_lines(text):
247
+ sample.append(line)
248
+ if len(sample) >= limit:
249
+ break
250
+ return sample
251
+
252
+
217
253
  def classify_content(text: str) -> str:
218
254
  """Best-effort content classification into one of CONTENT_TYPES.
219
255
 
220
- Order matters: JSON and diff have the strongest unambiguous signals and are
221
- checked first; search/log/code are sampled over the first lines; prose is the
222
- conservative default so unknown text is never over-compressed.
256
+ Order matters: valid JSON and diff have the strongest unambiguous signals;
257
+ search/log/code are sampled over the first lines; prose is the conservative
258
+ default so unknown text is never over-compressed.
223
259
  """
224
260
  stripped = text.strip()
225
261
  if not stripped:
226
262
  return "prose"
227
263
  if _looks_like_json(stripped):
228
264
  return "json"
229
- lines = stripped.splitlines()
230
- sample = lines[:200]
265
+ return classify_non_json_content(stripped)
266
+
267
+
268
+ def classify_non_json_content(stripped: str) -> str:
269
+ sample = sample_text_lines(stripped, 200)
231
270
  if _looks_like_diff(sample):
232
271
  return "diff"
233
272
  if _looks_like_search(sample):
@@ -355,14 +394,17 @@ def build_readable_compression_metadata(
355
394
  }
356
395
 
357
396
 
358
- def _looks_like_json(stripped: str) -> bool:
359
- if stripped[0] not in "{[":
360
- return False
397
+ def parse_json_candidate(stripped: str) -> object:
398
+ if not stripped or stripped[0] not in "{[":
399
+ return JSON_PARSE_FAILED
361
400
  try:
362
- json.loads(stripped)
401
+ return json.loads(stripped)
363
402
  except (ValueError, RecursionError):
364
- return False
365
- return True
403
+ return JSON_PARSE_FAILED
404
+
405
+
406
+ def _looks_like_json(stripped: str) -> bool:
407
+ return parse_json_candidate(stripped) is not JSON_PARSE_FAILED
366
408
 
367
409
 
368
410
  def _ratio(matches: int, total: int, threshold: float) -> bool:
@@ -390,15 +432,7 @@ def _looks_like_code(sample: list[str]) -> bool:
390
432
  return _ratio(matches, len(sample), 0.25)
391
433
 
392
434
 
393
- def compress_json(text: str) -> tuple[str, dict[str, object]]:
394
- """Re-serialize JSON without insignificant whitespace (data-preserving)."""
395
- try:
396
- parsed = json.loads(text)
397
- except (ValueError, RecursionError):
398
- # 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
399
- compressed, detail = compress_prose(text)
400
- detail["fallback_from"] = "json"
401
- return compressed, detail
435
+ def compress_parsed_json(text: str, parsed: object) -> tuple[str, dict[str, object]]:
402
436
  compact = json.dumps(parsed, ensure_ascii=False, separators=(",", ":"))
403
437
  if not text.endswith("\n"):
404
438
  trailing = ""
@@ -407,6 +441,17 @@ def compress_json(text: str) -> tuple[str, dict[str, object]]:
407
441
  return compact + trailing, {"strategy": "json-compact", "lossy": False, "json_parse_ok": True}
408
442
 
409
443
 
444
+ def compress_json(text: str) -> tuple[str, dict[str, object]]:
445
+ """Re-serialize JSON without insignificant whitespace (data-preserving)."""
446
+ parsed = parse_json_candidate(text.strip())
447
+ if parsed is JSON_PARSE_FAILED:
448
+ # 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
449
+ compressed, detail = compress_prose(text)
450
+ detail["fallback_from"] = "json"
451
+ return compressed, detail
452
+ return compress_parsed_json(text, parsed)
453
+
454
+
410
455
  def compress_diff(text: str) -> tuple[str, dict[str, object]]:
411
456
  """Keep file headers, hunk headers, and +/- changes; collapse context runs."""
412
457
  out: list[str] = []
@@ -464,18 +509,28 @@ def compress_log(text: str) -> tuple[str, dict[str, object]]:
464
509
 
465
510
 
466
511
  def compress_search(text: str) -> tuple[str, dict[str, object]]:
467
- """Drop exact-duplicate match lines while preserving first-seen order."""
512
+ """Drop exact-duplicate match lines while preserving first-seen order with bounded keys."""
468
513
  out: list[str] = []
469
514
  seen: set[str] = set()
470
515
  dropped = 0
471
- for line in text.splitlines():
516
+ dedupe_limit_reached = False
517
+ for line in iter_text_lines(text):
472
518
  key = line.rstrip()
473
519
  if key in seen:
474
520
  dropped += 1
475
521
  continue
476
- seen.add(key)
522
+ if len(seen) < MAX_SEARCH_DEDUPE_KEYS:
523
+ seen.add(key)
524
+ else:
525
+ dedupe_limit_reached = True
477
526
  out.append(line)
478
- return _join_lines(out, text), {"strategy": "search-dedupe", "lossy": dropped > 0, "duplicate_lines_dropped": dropped}
527
+ return _join_lines(out, text), {
528
+ "strategy": "search-dedupe",
529
+ "lossy": dropped > 0,
530
+ "duplicate_lines_dropped": dropped,
531
+ "dedupe_key_limit": MAX_SEARCH_DEDUPE_KEYS,
532
+ "dedupe_key_limit_reached": dedupe_limit_reached,
533
+ }
479
534
 
480
535
 
481
536
  def compress_code(text: str) -> tuple[str, dict[str, object]]:
@@ -689,14 +744,21 @@ def compress_text(
689
744
  the compressed body, or the metadata that follows.
690
745
  """
691
746
  sanitized, redacted_lines = sanitize_text(text, show_paths=show_paths)
747
+ parsed_json: object = JSON_PARSE_FAILED
692
748
  if forced_type is not None:
693
749
  content_type, type_source = forced_type, "override"
694
750
  else:
695
- content_type, type_source = classify_content(sanitized), "detected"
751
+ stripped = sanitized.strip()
752
+ parsed_json = parse_json_candidate(stripped)
753
+ content_type = "json" if parsed_json is not JSON_PARSE_FAILED else classify_non_json_content(stripped)
754
+ type_source = "detected"
696
755
  if compression_mode == "readable" and content_type == "prose":
697
756
  compressed, strategy_detail = compress_prose_readable(sanitized)
698
757
  else:
699
- compressed, strategy_detail = STRATEGIES[content_type](sanitized)
758
+ if content_type == "json" and parsed_json is not JSON_PARSE_FAILED:
759
+ compressed, strategy_detail = compress_parsed_json(sanitized, parsed_json)
760
+ else:
761
+ compressed, strategy_detail = STRATEGIES[content_type](sanitized)
700
762
  if compression_mode == "readable":
701
763
  strategy_detail["readable_mode"] = True
702
764
  strategy_detail["readable_strategy"] = "sentence-window-preview"