@ictechgy/context-guard 0.4.10 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +17 -1
- package/README.ko.md +46 -28
- package/README.md +42 -33
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/experimental-benchmark-fixtures.md +24 -7
- package/package.json +2 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +14 -11
- package/plugins/context-guard/README.md +15 -14
- package/plugins/context-guard/bin/context-guard +48 -17
- package/plugins/context-guard/bin/context-guard-artifact +342 -33
- package/plugins/context-guard/bin/context-guard-audit +36 -5
- package/plugins/context-guard/bin/context-guard-bench +1675 -44
- package/plugins/context-guard/bin/context-guard-cache-score +347 -35
- package/plugins/context-guard/bin/context-guard-compress +89 -27
- package/plugins/context-guard/bin/context-guard-cost +7 -2
- package/plugins/context-guard/bin/context-guard-experiments +364 -8
- package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
- package/plugins/context-guard/bin/context-guard-filter +88 -18
- package/plugins/context-guard/bin/context-guard-pack +329 -19
- package/plugins/context-guard/bin/context-guard-read-symbol +27 -0
- package/plugins/context-guard/bin/context-guard-sanitize-output +245 -18
- package/plugins/context-guard/bin/context-guard-setup +21 -5
- package/plugins/context-guard/bin/context-guard-tool-prune +287 -62
- package/plugins/context-guard/bin/context-guard-trim-output +394 -90
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_command_manifest_loader.py +123 -0
- package/plugins/context-guard/lib/context_guard_commands.py +217 -190
|
@@ -23,6 +23,10 @@ TOOL_NAME = "context-guard-cache-score"
|
|
|
23
23
|
SCHEMA_VERSION = "contextguard.cache-score.v1"
|
|
24
24
|
DEFAULT_MAX_INPUT_BYTES = 1_000_000
|
|
25
25
|
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
26
|
+
DEFAULT_EXPECTED_REUSES = 1
|
|
27
|
+
MAX_EXPECTED_REUSES = 1_000_000
|
|
28
|
+
MAX_CACHE_MULTIPLIER = 1_000_000.0
|
|
29
|
+
SAVINGS_EPSILON = 1e-12
|
|
26
30
|
PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
|
|
27
31
|
# Provider and model minimums move over time. These defaults are advisory
|
|
28
32
|
# and can be overridden with --minimum-cacheable-tokens.
|
|
@@ -56,6 +60,10 @@ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
|
56
60
|
"var": Path("/private/var"),
|
|
57
61
|
}
|
|
58
62
|
MAX_JSON_PATH_SEGMENT_CHARS = 64
|
|
63
|
+
MAX_JSON_WALK_NODES = 10_000
|
|
64
|
+
MAX_JSON_WALK_DEPTH = 64
|
|
65
|
+
MAX_JSON_SHAPE_WARNINGS = 200
|
|
66
|
+
MAX_JSON_CANONICAL_COMPARE_BYTES = 200_000
|
|
59
67
|
SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
|
|
60
68
|
DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
|
|
61
69
|
SENSITIVE_JSON_KEY_RE = re.compile(
|
|
@@ -86,6 +94,22 @@ def json_bytes(data: Any, *, indent: int | None = None) -> str:
|
|
|
86
94
|
return json.dumps(data, ensure_ascii=False, sort_keys=True, separators=(",", ":") if indent is None else None, indent=indent)
|
|
87
95
|
|
|
88
96
|
|
|
97
|
+
def bounded_canonical_json(data: Any, *, max_bytes: int) -> str | None:
|
|
98
|
+
encoder = json.JSONEncoder(ensure_ascii=False, sort_keys=True, indent=2)
|
|
99
|
+
chunks: list[str] = []
|
|
100
|
+
size = 0
|
|
101
|
+
for chunk in encoder.iterencode(data):
|
|
102
|
+
size += byte_len_text(chunk)
|
|
103
|
+
if size > max_bytes:
|
|
104
|
+
return None
|
|
105
|
+
chunks.append(chunk)
|
|
106
|
+
size += 1
|
|
107
|
+
if size > max_bytes:
|
|
108
|
+
return None
|
|
109
|
+
chunks.append("\n")
|
|
110
|
+
return "".join(chunks)
|
|
111
|
+
|
|
112
|
+
|
|
89
113
|
def json_path_child(path: str, key: object) -> str:
|
|
90
114
|
"""Return a JSON warning path segment without echoing sensitive/dynamic keys."""
|
|
91
115
|
text = str(key)
|
|
@@ -110,6 +134,30 @@ def bounded_int(value: object, *, default: int, minimum: int, maximum: int, name
|
|
|
110
134
|
return number
|
|
111
135
|
|
|
112
136
|
|
|
137
|
+
def bounded_float(
|
|
138
|
+
value: object,
|
|
139
|
+
*,
|
|
140
|
+
minimum: float,
|
|
141
|
+
maximum: float,
|
|
142
|
+
name: str,
|
|
143
|
+
) -> float | None:
|
|
144
|
+
if value is None:
|
|
145
|
+
return None
|
|
146
|
+
if isinstance(value, bool):
|
|
147
|
+
fail(f"{name} must be a finite number")
|
|
148
|
+
try:
|
|
149
|
+
number = float(value)
|
|
150
|
+
except (TypeError, ValueError, OverflowError):
|
|
151
|
+
fail(f"{name} must be a finite number")
|
|
152
|
+
if not math.isfinite(number):
|
|
153
|
+
fail(f"{name} must be finite")
|
|
154
|
+
if number < minimum:
|
|
155
|
+
fail(f"{name} must be >= {minimum:g}")
|
|
156
|
+
if number > maximum:
|
|
157
|
+
fail(f"{name} must be <= {maximum:g}")
|
|
158
|
+
return number
|
|
159
|
+
|
|
160
|
+
|
|
113
161
|
def normalized_link_target(parent: Path, raw_target: str) -> Path:
|
|
114
162
|
target = Path(raw_target)
|
|
115
163
|
if not target.is_absolute():
|
|
@@ -197,39 +245,102 @@ def first_dynamic_marker(text: str) -> tuple[int | None, str | None]:
|
|
|
197
245
|
return best_offset, best_name
|
|
198
246
|
|
|
199
247
|
|
|
200
|
-
def _walk_json(
|
|
248
|
+
def _walk_json(
|
|
249
|
+
value: Any,
|
|
250
|
+
path: str = "$",
|
|
251
|
+
*,
|
|
252
|
+
max_nodes: int = MAX_JSON_WALK_NODES,
|
|
253
|
+
max_depth: int = MAX_JSON_WALK_DEPTH,
|
|
254
|
+
max_warnings: int = MAX_JSON_SHAPE_WARNINGS,
|
|
255
|
+
) -> list[dict[str, Any]]:
|
|
201
256
|
warnings: list[dict[str, Any]] = []
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
257
|
+
capped_nodes = False
|
|
258
|
+
capped_depth = False
|
|
259
|
+
capped_warnings = False
|
|
260
|
+
|
|
261
|
+
def add_warning(item: dict[str, Any]) -> None:
|
|
262
|
+
nonlocal capped_warnings
|
|
263
|
+
if len(warnings) < max_warnings:
|
|
264
|
+
warnings.append(item)
|
|
265
|
+
else:
|
|
266
|
+
capped_warnings = True
|
|
267
|
+
|
|
268
|
+
stack: list[tuple[Any, str, int]] = [(value, path, 0)]
|
|
269
|
+
visited = 0
|
|
270
|
+
while stack:
|
|
271
|
+
if visited >= max_nodes:
|
|
272
|
+
capped_nodes = True
|
|
273
|
+
break
|
|
274
|
+
current, current_path, depth = stack.pop()
|
|
275
|
+
visited += 1
|
|
276
|
+
if depth >= max_depth and isinstance(current, (dict, list)) and current:
|
|
277
|
+
capped_depth = True
|
|
278
|
+
continue
|
|
279
|
+
if isinstance(current, dict):
|
|
280
|
+
previous_key: str | None = None
|
|
281
|
+
keys_sorted = True
|
|
282
|
+
remaining_child_slots = max(0, max_nodes - visited - len(stack))
|
|
283
|
+
child_items: list[tuple[Any, str, int]] = []
|
|
284
|
+
for key, item in current.items():
|
|
285
|
+
text_key = str(key)
|
|
286
|
+
if previous_key is not None and text_key < previous_key:
|
|
287
|
+
keys_sorted = False
|
|
288
|
+
previous_key = text_key
|
|
289
|
+
child_path = json_path_child(current_path, key)
|
|
290
|
+
if DYNAMIC_JSON_KEY_RE.search(text_key):
|
|
291
|
+
add_warning({
|
|
292
|
+
"code": "dynamic_json_key",
|
|
293
|
+
"path": child_path,
|
|
294
|
+
"severity": "warn",
|
|
295
|
+
"message": "Dynamic-looking JSON key appears in the prompt/request; place dynamic values after the reusable prefix.",
|
|
296
|
+
})
|
|
297
|
+
if len(child_items) >= remaining_child_slots:
|
|
298
|
+
capped_nodes = True
|
|
299
|
+
break
|
|
300
|
+
child_items.append((item, child_path, depth + 1))
|
|
301
|
+
if not keys_sorted:
|
|
302
|
+
add_warning({
|
|
303
|
+
"code": "json_object_key_order_not_sorted",
|
|
304
|
+
"path": current_path,
|
|
228
305
|
"severity": "info",
|
|
229
|
-
"message": "
|
|
306
|
+
"message": "Object keys are not in deterministic sorted order; keep generated JSON stable across runs.",
|
|
230
307
|
})
|
|
231
|
-
|
|
232
|
-
|
|
308
|
+
stack.extend(reversed(child_items))
|
|
309
|
+
elif isinstance(current, list):
|
|
310
|
+
if current_path.endswith(".tools") and all(isinstance(item, dict) and "name" in item for item in current):
|
|
311
|
+
names = [str(item.get("name")) for item in current]
|
|
312
|
+
if names != sorted(names):
|
|
313
|
+
add_warning({
|
|
314
|
+
"code": "tool_order_not_sorted",
|
|
315
|
+
"path": current_path,
|
|
316
|
+
"severity": "info",
|
|
317
|
+
"message": "Tool definitions are not sorted by name; deterministic ordering improves prefix reuse.",
|
|
318
|
+
})
|
|
319
|
+
remaining_child_slots = max(0, max_nodes - visited - len(stack))
|
|
320
|
+
child_items = []
|
|
321
|
+
for index, item in enumerate(current):
|
|
322
|
+
if len(child_items) >= remaining_child_slots:
|
|
323
|
+
capped_nodes = True
|
|
324
|
+
break
|
|
325
|
+
child_items.append((item, f"{current_path}[{index}]", depth + 1))
|
|
326
|
+
stack.extend(reversed(child_items))
|
|
327
|
+
if capped_nodes or capped_depth or capped_warnings:
|
|
328
|
+
cap_warning = {
|
|
329
|
+
"code": "json_walk_truncated",
|
|
330
|
+
"path": "$",
|
|
331
|
+
"severity": "warn",
|
|
332
|
+
"message": "JSON shape analysis was capped by node, depth, or warning limits; rerun on a narrower prompt fixture for complete linting.",
|
|
333
|
+
"nodes_visited": visited,
|
|
334
|
+
"max_nodes": max_nodes,
|
|
335
|
+
"max_depth": max_depth,
|
|
336
|
+
"max_warnings": max_warnings,
|
|
337
|
+
}
|
|
338
|
+
if max_warnings <= 0:
|
|
339
|
+
return warnings
|
|
340
|
+
if len(warnings) < max_warnings:
|
|
341
|
+
warnings.append(cap_warning)
|
|
342
|
+
elif warnings:
|
|
343
|
+
warnings[-1] = cap_warning
|
|
233
344
|
return warnings
|
|
234
345
|
|
|
235
346
|
|
|
@@ -241,8 +352,18 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
|
241
352
|
if not isinstance(data, (dict, list)):
|
|
242
353
|
return "json-scalar", []
|
|
243
354
|
warnings = _walk_json(data)
|
|
244
|
-
|
|
245
|
-
|
|
355
|
+
input_bytes = byte_len_text(text)
|
|
356
|
+
canonical = bounded_canonical_json(data, max_bytes=MAX_JSON_CANONICAL_COMPARE_BYTES)
|
|
357
|
+
if canonical is None:
|
|
358
|
+
warnings.append({
|
|
359
|
+
"code": "json_canonical_check_skipped",
|
|
360
|
+
"path": "$",
|
|
361
|
+
"severity": "info",
|
|
362
|
+
"message": "JSON input is parseable but canonical formatting would exceed the comparison byte cap.",
|
|
363
|
+
"input_bytes": input_bytes,
|
|
364
|
+
"max_bytes": MAX_JSON_CANONICAL_COMPARE_BYTES,
|
|
365
|
+
})
|
|
366
|
+
elif canonical != text:
|
|
246
367
|
warnings.append({
|
|
247
368
|
"code": "json_not_canonical",
|
|
248
369
|
"path": "$",
|
|
@@ -252,7 +373,141 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
|
252
373
|
return "json", warnings
|
|
253
374
|
|
|
254
375
|
|
|
255
|
-
def
|
|
376
|
+
def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
|
|
377
|
+
return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
|
|
381
|
+
"""Return the largest reuse count with strictly positive relative savings."""
|
|
382
|
+
candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
|
|
383
|
+
while candidate > 0 and read_premium_relative_savings(
|
|
384
|
+
candidate,
|
|
385
|
+
write_multiplier=write_multiplier,
|
|
386
|
+
read_multiplier=read_multiplier,
|
|
387
|
+
) <= SAVINGS_EPSILON:
|
|
388
|
+
candidate -= 1
|
|
389
|
+
while read_premium_relative_savings(
|
|
390
|
+
candidate + 1,
|
|
391
|
+
write_multiplier=write_multiplier,
|
|
392
|
+
read_multiplier=read_multiplier,
|
|
393
|
+
) > SAVINGS_EPSILON:
|
|
394
|
+
candidate += 1
|
|
395
|
+
return candidate
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def build_amortization_report(
|
|
399
|
+
*,
|
|
400
|
+
eligible: bool,
|
|
401
|
+
prefix_tokens: int,
|
|
402
|
+
expected_reuses: int,
|
|
403
|
+
cache_write_multiplier: float | None,
|
|
404
|
+
cache_read_multiplier: float | None,
|
|
405
|
+
) -> dict[str, Any]:
|
|
406
|
+
"""Return advisory cache amortization math using user-supplied multipliers.
|
|
407
|
+
|
|
408
|
+
``expected_reuses`` means future cache reads after the initial cache write.
|
|
409
|
+
Multipliers are relative to uncached prefix input cost = 1.0. Provider
|
|
410
|
+
pricing/cache policies change, so ContextGuard intentionally does not ship
|
|
411
|
+
provider-specific multiplier defaults.
|
|
412
|
+
"""
|
|
413
|
+
supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
|
|
414
|
+
break_even_reuses: int | None = None
|
|
415
|
+
max_profitable_reuses: int | None = None
|
|
416
|
+
expected_uncached_relative_cost: float | None = None
|
|
417
|
+
expected_cached_relative_cost: float | None = None
|
|
418
|
+
expected_relative_savings: float | None = None
|
|
419
|
+
status = "multipliers_not_supplied"
|
|
420
|
+
risk = "unknown"
|
|
421
|
+
|
|
422
|
+
if not eligible:
|
|
423
|
+
status = "not_cacheable"
|
|
424
|
+
risk = "high"
|
|
425
|
+
elif not supplied:
|
|
426
|
+
status = "multipliers_not_supplied"
|
|
427
|
+
risk = "unknown"
|
|
428
|
+
else:
|
|
429
|
+
expected_uncached_relative_cost = 1.0 + expected_reuses
|
|
430
|
+
expected_cached_relative_cost = cache_write_multiplier + (expected_reuses * cache_read_multiplier)
|
|
431
|
+
expected_relative_savings = expected_uncached_relative_cost - expected_cached_relative_cost
|
|
432
|
+
if cache_read_multiplier < 1.0:
|
|
433
|
+
if cache_write_multiplier <= 1.0:
|
|
434
|
+
break_even_reuses = 0
|
|
435
|
+
else:
|
|
436
|
+
break_even_reuses = int(math.ceil((cache_write_multiplier - 1.0) / (1.0 - cache_read_multiplier)))
|
|
437
|
+
if expected_reuses >= break_even_reuses:
|
|
438
|
+
status = "already_break_even_on_write" if break_even_reuses == 0 else "amortizes_with_expected_reuses"
|
|
439
|
+
risk = "low"
|
|
440
|
+
elif expected_reuses > 0:
|
|
441
|
+
status = "not_enough_expected_reuses"
|
|
442
|
+
risk = "medium"
|
|
443
|
+
else:
|
|
444
|
+
status = "not_enough_expected_reuses"
|
|
445
|
+
risk = "high"
|
|
446
|
+
elif cache_read_multiplier == 1.0 and cache_write_multiplier <= 1.0:
|
|
447
|
+
break_even_reuses = 0
|
|
448
|
+
status = "already_break_even_on_write"
|
|
449
|
+
risk = "low"
|
|
450
|
+
elif cache_read_multiplier > 1.0:
|
|
451
|
+
if cache_write_multiplier < 1.0:
|
|
452
|
+
max_profitable_reuses = max_profitable_read_premium_reuses(
|
|
453
|
+
write_multiplier=cache_write_multiplier,
|
|
454
|
+
read_multiplier=cache_read_multiplier,
|
|
455
|
+
)
|
|
456
|
+
if expected_relative_savings < -SAVINGS_EPSILON:
|
|
457
|
+
status = "no_read_discount"
|
|
458
|
+
risk = "high"
|
|
459
|
+
elif expected_reuses == 0:
|
|
460
|
+
if expected_relative_savings > SAVINGS_EPSILON:
|
|
461
|
+
status = "write_discount_only_no_expected_reads"
|
|
462
|
+
risk = "low"
|
|
463
|
+
else:
|
|
464
|
+
status = "break_even_only_no_expected_reads"
|
|
465
|
+
risk = "medium"
|
|
466
|
+
elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
|
|
467
|
+
status = "break_even_only_with_limited_reuses"
|
|
468
|
+
risk = "medium"
|
|
469
|
+
else:
|
|
470
|
+
status = "positive_only_with_limited_reuses"
|
|
471
|
+
risk = "medium"
|
|
472
|
+
else:
|
|
473
|
+
status = "no_read_discount"
|
|
474
|
+
risk = "high"
|
|
475
|
+
|
|
476
|
+
return {
|
|
477
|
+
"expected_reuses": expected_reuses,
|
|
478
|
+
"expected_reuses_semantics": "future_cache_reads_after_initial_write",
|
|
479
|
+
"cacheable_prefix_tokens": prefix_tokens,
|
|
480
|
+
"break_even_reuses": break_even_reuses,
|
|
481
|
+
"max_profitable_reuses": max_profitable_reuses,
|
|
482
|
+
"status": status,
|
|
483
|
+
"risk": risk,
|
|
484
|
+
"cache_write_multiplier": cache_write_multiplier,
|
|
485
|
+
"cache_read_multiplier": cache_read_multiplier,
|
|
486
|
+
"expected_uncached_relative_cost": expected_uncached_relative_cost,
|
|
487
|
+
"expected_cached_relative_cost": expected_cached_relative_cost,
|
|
488
|
+
"expected_relative_savings": expected_relative_savings,
|
|
489
|
+
"multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
|
|
490
|
+
"user_supplied_multipliers": supplied,
|
|
491
|
+
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
|
|
492
|
+
"claim_boundary": {
|
|
493
|
+
"advisory_only": True,
|
|
494
|
+
"provider_pricing_defaults_included": False,
|
|
495
|
+
"provider_measured_cache_hit": False,
|
|
496
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
497
|
+
"requires_user_supplied_or_provider_documented_multipliers": True,
|
|
498
|
+
},
|
|
499
|
+
}
|
|
500
|
+
|
|
501
|
+
|
|
502
|
+
def score_prompt(
|
|
503
|
+
text: str,
|
|
504
|
+
*,
|
|
505
|
+
provider: str,
|
|
506
|
+
minimum_cacheable_tokens: int,
|
|
507
|
+
expected_reuses: int = DEFAULT_EXPECTED_REUSES,
|
|
508
|
+
cache_write_multiplier: float | None = None,
|
|
509
|
+
cache_read_multiplier: float | None = None,
|
|
510
|
+
) -> dict[str, Any]:
|
|
256
511
|
prompt_kind, shape_warnings = json_shape_warnings(text)
|
|
257
512
|
dynamic_offset, dynamic_marker = first_dynamic_marker(text)
|
|
258
513
|
prefix_text = text if dynamic_offset is None else text[:dynamic_offset]
|
|
@@ -282,13 +537,14 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
|
|
|
282
537
|
"message": "Anthropic caching usually requires cache_control around the reusable prefix.",
|
|
283
538
|
})
|
|
284
539
|
|
|
540
|
+
eligible = prefix_estimated >= minimum_cacheable_tokens
|
|
285
541
|
return {
|
|
286
542
|
"tool": TOOL_NAME,
|
|
287
543
|
"schema_version": SCHEMA_VERSION,
|
|
288
544
|
"provider": provider,
|
|
289
545
|
"prompt_kind": prompt_kind,
|
|
290
546
|
"minimum_cacheable_tokens": minimum_cacheable_tokens,
|
|
291
|
-
"eligible":
|
|
547
|
+
"eligible": eligible,
|
|
292
548
|
"estimated_tokens": estimated,
|
|
293
549
|
"cacheable_prefix_tokens": prefix_estimated,
|
|
294
550
|
"token_estimate": {
|
|
@@ -305,6 +561,13 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
|
|
|
305
561
|
"static_prefix_ratio": round(static_ratio, 6),
|
|
306
562
|
"warnings": warnings,
|
|
307
563
|
"provider_caveat": PROVIDER_CAVEATS[provider],
|
|
564
|
+
"amortization": build_amortization_report(
|
|
565
|
+
eligible=eligible,
|
|
566
|
+
prefix_tokens=prefix_estimated,
|
|
567
|
+
expected_reuses=expected_reuses,
|
|
568
|
+
cache_write_multiplier=cache_write_multiplier,
|
|
569
|
+
cache_read_multiplier=cache_read_multiplier,
|
|
570
|
+
),
|
|
308
571
|
"raw_prompt_stored": False,
|
|
309
572
|
"claim_boundary": {
|
|
310
573
|
"advisory_only": True,
|
|
@@ -320,11 +583,16 @@ def render_text(report: dict[str, Any]) -> str:
|
|
|
320
583
|
status = "eligible" if report.get("eligible") else "not eligible"
|
|
321
584
|
warnings = report.get("warnings") if isinstance(report.get("warnings"), list) else []
|
|
322
585
|
warning_codes = ", ".join(str(item.get("code")) for item in warnings if isinstance(item, dict)) or "none"
|
|
586
|
+
amortization = report.get("amortization") if isinstance(report.get("amortization"), dict) else {}
|
|
323
587
|
return (
|
|
324
588
|
f"{TOOL_NAME}: {status} for {report['provider']} "
|
|
325
589
|
f"(static_prefix≈{report['cacheable_prefix_tokens']} char/4 tokens, "
|
|
326
590
|
f"minimum={report['minimum_cacheable_tokens']})\n"
|
|
327
591
|
f"warnings: {warning_codes}\n"
|
|
592
|
+
f"amortization: {amortization.get('status', 'unknown')} "
|
|
593
|
+
f"(risk={amortization.get('risk', 'unknown')}, "
|
|
594
|
+
f"break_even_reuses={amortization.get('break_even_reuses')}, "
|
|
595
|
+
f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
|
|
328
596
|
"claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
|
|
329
597
|
)
|
|
330
598
|
|
|
@@ -344,6 +612,24 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
344
612
|
help="override provider threshold for model/platform-specific cache minimums",
|
|
345
613
|
)
|
|
346
614
|
parser.add_argument("--max-input-bytes", default=DEFAULT_MAX_INPUT_BYTES, help=f"maximum input bytes (default: {DEFAULT_MAX_INPUT_BYTES})")
|
|
615
|
+
parser.add_argument(
|
|
616
|
+
"--expected-reuses",
|
|
617
|
+
default=DEFAULT_EXPECTED_REUSES,
|
|
618
|
+
help=(
|
|
619
|
+
"future cache reads expected after the initial write; advisory only "
|
|
620
|
+
f"(default: {DEFAULT_EXPECTED_REUSES})"
|
|
621
|
+
),
|
|
622
|
+
)
|
|
623
|
+
parser.add_argument(
|
|
624
|
+
"--cache-write-multiplier",
|
|
625
|
+
default=None,
|
|
626
|
+
help="optional user-supplied cache write multiplier relative to uncached prefix input cost=1.0",
|
|
627
|
+
)
|
|
628
|
+
parser.add_argument(
|
|
629
|
+
"--cache-read-multiplier",
|
|
630
|
+
default=None,
|
|
631
|
+
help="optional user-supplied cache read multiplier relative to uncached prefix input cost=1.0",
|
|
632
|
+
)
|
|
347
633
|
parser.add_argument("--json", action="store_true", help="emit stable JSON")
|
|
348
634
|
return parser
|
|
349
635
|
|
|
@@ -362,8 +648,34 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
362
648
|
maximum=10_000_000,
|
|
363
649
|
name="--minimum-cacheable-tokens",
|
|
364
650
|
)
|
|
651
|
+
expected_reuses = bounded_int(
|
|
652
|
+
args.expected_reuses,
|
|
653
|
+
default=DEFAULT_EXPECTED_REUSES,
|
|
654
|
+
minimum=0,
|
|
655
|
+
maximum=MAX_EXPECTED_REUSES,
|
|
656
|
+
name="--expected-reuses",
|
|
657
|
+
)
|
|
658
|
+
cache_write_multiplier = bounded_float(
|
|
659
|
+
args.cache_write_multiplier,
|
|
660
|
+
minimum=0.0,
|
|
661
|
+
maximum=MAX_CACHE_MULTIPLIER,
|
|
662
|
+
name="--cache-write-multiplier",
|
|
663
|
+
)
|
|
664
|
+
cache_read_multiplier = bounded_float(
|
|
665
|
+
args.cache_read_multiplier,
|
|
666
|
+
minimum=0.0,
|
|
667
|
+
maximum=MAX_CACHE_MULTIPLIER,
|
|
668
|
+
name="--cache-read-multiplier",
|
|
669
|
+
)
|
|
365
670
|
text = read_limited_path(Path(args.input), max_input_bytes) if args.input else read_limited_stdin(max_input_bytes)
|
|
366
|
-
report = score_prompt(
|
|
671
|
+
report = score_prompt(
|
|
672
|
+
text,
|
|
673
|
+
provider=provider,
|
|
674
|
+
minimum_cacheable_tokens=minimum,
|
|
675
|
+
expected_reuses=expected_reuses,
|
|
676
|
+
cache_write_multiplier=cache_write_multiplier,
|
|
677
|
+
cache_read_multiplier=cache_read_multiplier,
|
|
678
|
+
)
|
|
367
679
|
if args.json:
|
|
368
680
|
sys.stdout.write(json_bytes(report, indent=2) + "\n")
|
|
369
681
|
else:
|
|
@@ -20,10 +20,12 @@ import os
|
|
|
20
20
|
from pathlib import Path
|
|
21
21
|
import re
|
|
22
22
|
import sys
|
|
23
|
-
from typing import Callable
|
|
23
|
+
from typing import Callable, Iterable
|
|
24
24
|
|
|
25
25
|
DEFAULT_MAX_BYTES = 10_000_000
|
|
26
26
|
MAX_MAX_BYTES = 100_000_000
|
|
27
|
+
MAX_SEARCH_DEDUPE_KEYS = 50_000
|
|
28
|
+
JSON_PARSE_FAILED = object()
|
|
27
29
|
# 토큰 추정은 보수적 proxy 일 뿐이다(관측값 아님). 평균 ~4 chars/token 휴리스틱을 쓰되
|
|
28
30
|
# 메타데이터에 measurement="estimated" 로 명시해 관측 토큰 수와 혼동되지 않게 한다.
|
|
29
31
|
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
@@ -214,20 +216,57 @@ def token_proxy(text: str) -> int:
|
|
|
214
216
|
return max(1, round(len(text) / TOKEN_PROXY_CHARS_PER_TOKEN))
|
|
215
217
|
|
|
216
218
|
|
|
219
|
+
LINE_BOUNDARY_CHARS = {"\n", "\r", "\v", "\f", "\x1c", "\x1d", "\x1e", "\x85", "\u2028", "\u2029"}
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def iter_text_lines(text: str) -> Iterable[str]:
|
|
223
|
+
"""Yield lines with str.splitlines() boundaries without building a line list."""
|
|
224
|
+
start = 0
|
|
225
|
+
index = 0
|
|
226
|
+
length = len(text)
|
|
227
|
+
while index < length:
|
|
228
|
+
char = text[index]
|
|
229
|
+
if char == "\r" and index + 1 < length and text[index + 1] == "\n":
|
|
230
|
+
yield text[start:index]
|
|
231
|
+
index += 2
|
|
232
|
+
start = index
|
|
233
|
+
continue
|
|
234
|
+
if char in LINE_BOUNDARY_CHARS:
|
|
235
|
+
yield text[start:index]
|
|
236
|
+
index += 1
|
|
237
|
+
start = index
|
|
238
|
+
continue
|
|
239
|
+
index += 1
|
|
240
|
+
if start < length:
|
|
241
|
+
yield text[start:]
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def sample_text_lines(text: str, limit: int) -> list[str]:
|
|
245
|
+
sample: list[str] = []
|
|
246
|
+
for line in iter_text_lines(text):
|
|
247
|
+
sample.append(line)
|
|
248
|
+
if len(sample) >= limit:
|
|
249
|
+
break
|
|
250
|
+
return sample
|
|
251
|
+
|
|
252
|
+
|
|
217
253
|
def classify_content(text: str) -> str:
|
|
218
254
|
"""Best-effort content classification into one of CONTENT_TYPES.
|
|
219
255
|
|
|
220
|
-
Order matters: JSON and diff have the strongest unambiguous signals
|
|
221
|
-
|
|
222
|
-
|
|
256
|
+
Order matters: valid JSON and diff have the strongest unambiguous signals;
|
|
257
|
+
search/log/code are sampled over the first lines; prose is the conservative
|
|
258
|
+
default so unknown text is never over-compressed.
|
|
223
259
|
"""
|
|
224
260
|
stripped = text.strip()
|
|
225
261
|
if not stripped:
|
|
226
262
|
return "prose"
|
|
227
263
|
if _looks_like_json(stripped):
|
|
228
264
|
return "json"
|
|
229
|
-
|
|
230
|
-
|
|
265
|
+
return classify_non_json_content(stripped)
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def classify_non_json_content(stripped: str) -> str:
|
|
269
|
+
sample = sample_text_lines(stripped, 200)
|
|
231
270
|
if _looks_like_diff(sample):
|
|
232
271
|
return "diff"
|
|
233
272
|
if _looks_like_search(sample):
|
|
@@ -355,14 +394,17 @@ def build_readable_compression_metadata(
|
|
|
355
394
|
}
|
|
356
395
|
|
|
357
396
|
|
|
358
|
-
def
|
|
359
|
-
if stripped[0] not in "{[":
|
|
360
|
-
return
|
|
397
|
+
def parse_json_candidate(stripped: str) -> object:
|
|
398
|
+
if not stripped or stripped[0] not in "{[":
|
|
399
|
+
return JSON_PARSE_FAILED
|
|
361
400
|
try:
|
|
362
|
-
json.loads(stripped)
|
|
401
|
+
return json.loads(stripped)
|
|
363
402
|
except (ValueError, RecursionError):
|
|
364
|
-
return
|
|
365
|
-
|
|
403
|
+
return JSON_PARSE_FAILED
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def _looks_like_json(stripped: str) -> bool:
|
|
407
|
+
return parse_json_candidate(stripped) is not JSON_PARSE_FAILED
|
|
366
408
|
|
|
367
409
|
|
|
368
410
|
def _ratio(matches: int, total: int, threshold: float) -> bool:
|
|
@@ -390,15 +432,7 @@ def _looks_like_code(sample: list[str]) -> bool:
|
|
|
390
432
|
return _ratio(matches, len(sample), 0.25)
|
|
391
433
|
|
|
392
434
|
|
|
393
|
-
def
|
|
394
|
-
"""Re-serialize JSON without insignificant whitespace (data-preserving)."""
|
|
395
|
-
try:
|
|
396
|
-
parsed = json.loads(text)
|
|
397
|
-
except (ValueError, RecursionError):
|
|
398
|
-
# 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
|
|
399
|
-
compressed, detail = compress_prose(text)
|
|
400
|
-
detail["fallback_from"] = "json"
|
|
401
|
-
return compressed, detail
|
|
435
|
+
def compress_parsed_json(text: str, parsed: object) -> tuple[str, dict[str, object]]:
|
|
402
436
|
compact = json.dumps(parsed, ensure_ascii=False, separators=(",", ":"))
|
|
403
437
|
if not text.endswith("\n"):
|
|
404
438
|
trailing = ""
|
|
@@ -407,6 +441,17 @@ def compress_json(text: str) -> tuple[str, dict[str, object]]:
|
|
|
407
441
|
return compact + trailing, {"strategy": "json-compact", "lossy": False, "json_parse_ok": True}
|
|
408
442
|
|
|
409
443
|
|
|
444
|
+
def compress_json(text: str) -> tuple[str, dict[str, object]]:
|
|
445
|
+
"""Re-serialize JSON without insignificant whitespace (data-preserving)."""
|
|
446
|
+
parsed = parse_json_candidate(text.strip())
|
|
447
|
+
if parsed is JSON_PARSE_FAILED:
|
|
448
|
+
# 파싱 불가 시 무손실을 깨지 않도록 prose 전략으로 안전하게 폴백한다.
|
|
449
|
+
compressed, detail = compress_prose(text)
|
|
450
|
+
detail["fallback_from"] = "json"
|
|
451
|
+
return compressed, detail
|
|
452
|
+
return compress_parsed_json(text, parsed)
|
|
453
|
+
|
|
454
|
+
|
|
410
455
|
def compress_diff(text: str) -> tuple[str, dict[str, object]]:
|
|
411
456
|
"""Keep file headers, hunk headers, and +/- changes; collapse context runs."""
|
|
412
457
|
out: list[str] = []
|
|
@@ -464,18 +509,28 @@ def compress_log(text: str) -> tuple[str, dict[str, object]]:
|
|
|
464
509
|
|
|
465
510
|
|
|
466
511
|
def compress_search(text: str) -> tuple[str, dict[str, object]]:
|
|
467
|
-
"""Drop exact-duplicate match lines while preserving first-seen order."""
|
|
512
|
+
"""Drop exact-duplicate match lines while preserving first-seen order with bounded keys."""
|
|
468
513
|
out: list[str] = []
|
|
469
514
|
seen: set[str] = set()
|
|
470
515
|
dropped = 0
|
|
471
|
-
|
|
516
|
+
dedupe_limit_reached = False
|
|
517
|
+
for line in iter_text_lines(text):
|
|
472
518
|
key = line.rstrip()
|
|
473
519
|
if key in seen:
|
|
474
520
|
dropped += 1
|
|
475
521
|
continue
|
|
476
|
-
seen
|
|
522
|
+
if len(seen) < MAX_SEARCH_DEDUPE_KEYS:
|
|
523
|
+
seen.add(key)
|
|
524
|
+
else:
|
|
525
|
+
dedupe_limit_reached = True
|
|
477
526
|
out.append(line)
|
|
478
|
-
return _join_lines(out, text), {
|
|
527
|
+
return _join_lines(out, text), {
|
|
528
|
+
"strategy": "search-dedupe",
|
|
529
|
+
"lossy": dropped > 0,
|
|
530
|
+
"duplicate_lines_dropped": dropped,
|
|
531
|
+
"dedupe_key_limit": MAX_SEARCH_DEDUPE_KEYS,
|
|
532
|
+
"dedupe_key_limit_reached": dedupe_limit_reached,
|
|
533
|
+
}
|
|
479
534
|
|
|
480
535
|
|
|
481
536
|
def compress_code(text: str) -> tuple[str, dict[str, object]]:
|
|
@@ -689,14 +744,21 @@ def compress_text(
|
|
|
689
744
|
the compressed body, or the metadata that follows.
|
|
690
745
|
"""
|
|
691
746
|
sanitized, redacted_lines = sanitize_text(text, show_paths=show_paths)
|
|
747
|
+
parsed_json: object = JSON_PARSE_FAILED
|
|
692
748
|
if forced_type is not None:
|
|
693
749
|
content_type, type_source = forced_type, "override"
|
|
694
750
|
else:
|
|
695
|
-
|
|
751
|
+
stripped = sanitized.strip()
|
|
752
|
+
parsed_json = parse_json_candidate(stripped)
|
|
753
|
+
content_type = "json" if parsed_json is not JSON_PARSE_FAILED else classify_non_json_content(stripped)
|
|
754
|
+
type_source = "detected"
|
|
696
755
|
if compression_mode == "readable" and content_type == "prose":
|
|
697
756
|
compressed, strategy_detail = compress_prose_readable(sanitized)
|
|
698
757
|
else:
|
|
699
|
-
|
|
758
|
+
if content_type == "json" and parsed_json is not JSON_PARSE_FAILED:
|
|
759
|
+
compressed, strategy_detail = compress_parsed_json(sanitized, parsed_json)
|
|
760
|
+
else:
|
|
761
|
+
compressed, strategy_detail = STRATEGIES[content_type](sanitized)
|
|
700
762
|
if compression_mode == "readable":
|
|
701
763
|
strategy_detail["readable_mode"] = True
|
|
702
764
|
strategy_detail["readable_strategy"] = "sentence-window-preview"
|