@ictechgy/context-guard 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -1
- package/README.ko.md +32 -21
- package/README.md +38 -29
- package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
- package/docs/benchmark-workflow-examples.md +3 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
- package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
- package/docs/experimental-benchmark-fixtures.md +24 -7
- package/package.json +2 -1
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +14 -11
- package/plugins/context-guard/README.md +15 -14
- package/plugins/context-guard/bin/context-guard +46 -11
- package/plugins/context-guard/bin/context-guard-artifact +342 -33
- package/plugins/context-guard/bin/context-guard-audit +33 -2
- package/plugins/context-guard/bin/context-guard-bench +1542 -31
- package/plugins/context-guard/bin/context-guard-cache-score +318 -33
- package/plugins/context-guard/bin/context-guard-cost +7 -2
- package/plugins/context-guard/bin/context-guard-experiments +364 -8
- package/plugins/context-guard/bin/context-guard-failed-nudge +6 -2
- package/plugins/context-guard/bin/context-guard-pack +301 -17
- package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
- package/plugins/context-guard/bin/context-guard-tool-prune +241 -54
- package/plugins/context-guard/bin/context-guard-trim-output +288 -41
- package/plugins/context-guard/brief/README.md +5 -5
- package/plugins/context-guard/lib/context_guard_commands.py +214 -190
|
@@ -23,6 +23,10 @@ TOOL_NAME = "context-guard-cache-score"
|
|
|
23
23
|
SCHEMA_VERSION = "contextguard.cache-score.v1"
|
|
24
24
|
DEFAULT_MAX_INPUT_BYTES = 1_000_000
|
|
25
25
|
TOKEN_PROXY_CHARS_PER_TOKEN = 4
|
|
26
|
+
DEFAULT_EXPECTED_REUSES = 1
|
|
27
|
+
MAX_EXPECTED_REUSES = 1_000_000
|
|
28
|
+
MAX_CACHE_MULTIPLIER = 1_000_000.0
|
|
29
|
+
SAVINGS_EPSILON = 1e-12
|
|
26
30
|
PROVIDER_MINIMUM_CACHEABLE_TOKENS = {
|
|
27
31
|
# Provider and model minimums move over time. These defaults are advisory
|
|
28
32
|
# and can be overridden with --minimum-cacheable-tokens.
|
|
@@ -56,6 +60,9 @@ ALLOWED_FIRST_ABSOLUTE_SYMLINKS = {
|
|
|
56
60
|
"var": Path("/private/var"),
|
|
57
61
|
}
|
|
58
62
|
MAX_JSON_PATH_SEGMENT_CHARS = 64
|
|
63
|
+
MAX_JSON_WALK_NODES = 10_000
|
|
64
|
+
MAX_JSON_WALK_DEPTH = 64
|
|
65
|
+
MAX_JSON_SHAPE_WARNINGS = 200
|
|
59
66
|
SAFE_JSON_PATH_SEGMENT_RE = re.compile(r"^[A-Za-z_][A-Za-z0-9_-]{0,63}$")
|
|
60
67
|
DYNAMIC_JSON_KEY_RE = re.compile(r"(?i)(request|trace|nonce|random|timestamp|created[_-]?at|updated[_-]?at|date)")
|
|
61
68
|
SENSITIVE_JSON_KEY_RE = re.compile(
|
|
@@ -110,6 +117,30 @@ def bounded_int(value: object, *, default: int, minimum: int, maximum: int, name
|
|
|
110
117
|
return number
|
|
111
118
|
|
|
112
119
|
|
|
120
|
+
def bounded_float(
|
|
121
|
+
value: object,
|
|
122
|
+
*,
|
|
123
|
+
minimum: float,
|
|
124
|
+
maximum: float,
|
|
125
|
+
name: str,
|
|
126
|
+
) -> float | None:
|
|
127
|
+
if value is None:
|
|
128
|
+
return None
|
|
129
|
+
if isinstance(value, bool):
|
|
130
|
+
fail(f"{name} must be a finite number")
|
|
131
|
+
try:
|
|
132
|
+
number = float(value)
|
|
133
|
+
except (TypeError, ValueError, OverflowError):
|
|
134
|
+
fail(f"{name} must be a finite number")
|
|
135
|
+
if not math.isfinite(number):
|
|
136
|
+
fail(f"{name} must be finite")
|
|
137
|
+
if number < minimum:
|
|
138
|
+
fail(f"{name} must be >= {minimum:g}")
|
|
139
|
+
if number > maximum:
|
|
140
|
+
fail(f"{name} must be <= {maximum:g}")
|
|
141
|
+
return number
|
|
142
|
+
|
|
143
|
+
|
|
113
144
|
def normalized_link_target(parent: Path, raw_target: str) -> Path:
|
|
114
145
|
target = Path(raw_target)
|
|
115
146
|
if not target.is_absolute():
|
|
@@ -197,39 +228,102 @@ def first_dynamic_marker(text: str) -> tuple[int | None, str | None]:
|
|
|
197
228
|
return best_offset, best_name
|
|
198
229
|
|
|
199
230
|
|
|
200
|
-
def _walk_json(
|
|
231
|
+
def _walk_json(
|
|
232
|
+
value: Any,
|
|
233
|
+
path: str = "$",
|
|
234
|
+
*,
|
|
235
|
+
max_nodes: int = MAX_JSON_WALK_NODES,
|
|
236
|
+
max_depth: int = MAX_JSON_WALK_DEPTH,
|
|
237
|
+
max_warnings: int = MAX_JSON_SHAPE_WARNINGS,
|
|
238
|
+
) -> list[dict[str, Any]]:
|
|
201
239
|
warnings: list[dict[str, Any]] = []
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
240
|
+
capped_nodes = False
|
|
241
|
+
capped_depth = False
|
|
242
|
+
capped_warnings = False
|
|
243
|
+
|
|
244
|
+
def add_warning(item: dict[str, Any]) -> None:
|
|
245
|
+
nonlocal capped_warnings
|
|
246
|
+
if len(warnings) < max_warnings:
|
|
247
|
+
warnings.append(item)
|
|
248
|
+
else:
|
|
249
|
+
capped_warnings = True
|
|
250
|
+
|
|
251
|
+
stack: list[tuple[Any, str, int]] = [(value, path, 0)]
|
|
252
|
+
visited = 0
|
|
253
|
+
while stack:
|
|
254
|
+
if visited >= max_nodes:
|
|
255
|
+
capped_nodes = True
|
|
256
|
+
break
|
|
257
|
+
current, current_path, depth = stack.pop()
|
|
258
|
+
visited += 1
|
|
259
|
+
if depth >= max_depth and isinstance(current, (dict, list)) and current:
|
|
260
|
+
capped_depth = True
|
|
261
|
+
continue
|
|
262
|
+
if isinstance(current, dict):
|
|
263
|
+
previous_key: str | None = None
|
|
264
|
+
keys_sorted = True
|
|
265
|
+
remaining_child_slots = max(0, max_nodes - visited - len(stack))
|
|
266
|
+
child_items: list[tuple[Any, str, int]] = []
|
|
267
|
+
for key, item in current.items():
|
|
268
|
+
text_key = str(key)
|
|
269
|
+
if previous_key is not None and text_key < previous_key:
|
|
270
|
+
keys_sorted = False
|
|
271
|
+
previous_key = text_key
|
|
272
|
+
child_path = json_path_child(current_path, key)
|
|
273
|
+
if DYNAMIC_JSON_KEY_RE.search(text_key):
|
|
274
|
+
add_warning({
|
|
275
|
+
"code": "dynamic_json_key",
|
|
276
|
+
"path": child_path,
|
|
277
|
+
"severity": "warn",
|
|
278
|
+
"message": "Dynamic-looking JSON key appears in the prompt/request; place dynamic values after the reusable prefix.",
|
|
279
|
+
})
|
|
280
|
+
if len(child_items) >= remaining_child_slots:
|
|
281
|
+
capped_nodes = True
|
|
282
|
+
break
|
|
283
|
+
child_items.append((item, child_path, depth + 1))
|
|
284
|
+
if not keys_sorted:
|
|
285
|
+
add_warning({
|
|
286
|
+
"code": "json_object_key_order_not_sorted",
|
|
287
|
+
"path": current_path,
|
|
228
288
|
"severity": "info",
|
|
229
|
-
"message": "
|
|
289
|
+
"message": "Object keys are not in deterministic sorted order; keep generated JSON stable across runs.",
|
|
230
290
|
})
|
|
231
|
-
|
|
232
|
-
|
|
291
|
+
stack.extend(reversed(child_items))
|
|
292
|
+
elif isinstance(current, list):
|
|
293
|
+
if current_path.endswith(".tools") and all(isinstance(item, dict) and "name" in item for item in current):
|
|
294
|
+
names = [str(item.get("name")) for item in current]
|
|
295
|
+
if names != sorted(names):
|
|
296
|
+
add_warning({
|
|
297
|
+
"code": "tool_order_not_sorted",
|
|
298
|
+
"path": current_path,
|
|
299
|
+
"severity": "info",
|
|
300
|
+
"message": "Tool definitions are not sorted by name; deterministic ordering improves prefix reuse.",
|
|
301
|
+
})
|
|
302
|
+
remaining_child_slots = max(0, max_nodes - visited - len(stack))
|
|
303
|
+
child_items = []
|
|
304
|
+
for index, item in enumerate(current):
|
|
305
|
+
if len(child_items) >= remaining_child_slots:
|
|
306
|
+
capped_nodes = True
|
|
307
|
+
break
|
|
308
|
+
child_items.append((item, f"{current_path}[{index}]", depth + 1))
|
|
309
|
+
stack.extend(reversed(child_items))
|
|
310
|
+
if capped_nodes or capped_depth or capped_warnings:
|
|
311
|
+
cap_warning = {
|
|
312
|
+
"code": "json_walk_truncated",
|
|
313
|
+
"path": "$",
|
|
314
|
+
"severity": "warn",
|
|
315
|
+
"message": "JSON shape analysis was capped by node, depth, or warning limits; rerun on a narrower prompt fixture for complete linting.",
|
|
316
|
+
"nodes_visited": visited,
|
|
317
|
+
"max_nodes": max_nodes,
|
|
318
|
+
"max_depth": max_depth,
|
|
319
|
+
"max_warnings": max_warnings,
|
|
320
|
+
}
|
|
321
|
+
if max_warnings <= 0:
|
|
322
|
+
return warnings
|
|
323
|
+
if len(warnings) < max_warnings:
|
|
324
|
+
warnings.append(cap_warning)
|
|
325
|
+
elif warnings:
|
|
326
|
+
warnings[-1] = cap_warning
|
|
233
327
|
return warnings
|
|
234
328
|
|
|
235
329
|
|
|
@@ -252,7 +346,141 @@ def json_shape_warnings(text: str) -> tuple[str, list[dict[str, Any]]]:
|
|
|
252
346
|
return "json", warnings
|
|
253
347
|
|
|
254
348
|
|
|
255
|
-
def
|
|
349
|
+
def read_premium_relative_savings(reuses: int, *, write_multiplier: float, read_multiplier: float) -> float:
|
|
350
|
+
return (1.0 - write_multiplier) + (reuses * (1.0 - read_multiplier))
|
|
351
|
+
|
|
352
|
+
|
|
353
|
+
def max_profitable_read_premium_reuses(*, write_multiplier: float, read_multiplier: float) -> int:
|
|
354
|
+
"""Return the largest reuse count with strictly positive relative savings."""
|
|
355
|
+
candidate = max(0, int(math.floor((1.0 - write_multiplier) / (read_multiplier - 1.0))))
|
|
356
|
+
while candidate > 0 and read_premium_relative_savings(
|
|
357
|
+
candidate,
|
|
358
|
+
write_multiplier=write_multiplier,
|
|
359
|
+
read_multiplier=read_multiplier,
|
|
360
|
+
) <= SAVINGS_EPSILON:
|
|
361
|
+
candidate -= 1
|
|
362
|
+
while read_premium_relative_savings(
|
|
363
|
+
candidate + 1,
|
|
364
|
+
write_multiplier=write_multiplier,
|
|
365
|
+
read_multiplier=read_multiplier,
|
|
366
|
+
) > SAVINGS_EPSILON:
|
|
367
|
+
candidate += 1
|
|
368
|
+
return candidate
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def build_amortization_report(
|
|
372
|
+
*,
|
|
373
|
+
eligible: bool,
|
|
374
|
+
prefix_tokens: int,
|
|
375
|
+
expected_reuses: int,
|
|
376
|
+
cache_write_multiplier: float | None,
|
|
377
|
+
cache_read_multiplier: float | None,
|
|
378
|
+
) -> dict[str, Any]:
|
|
379
|
+
"""Return advisory cache amortization math using user-supplied multipliers.
|
|
380
|
+
|
|
381
|
+
``expected_reuses`` means future cache reads after the initial cache write.
|
|
382
|
+
Multipliers are relative to uncached prefix input cost = 1.0. Provider
|
|
383
|
+
pricing/cache policies change, so ContextGuard intentionally does not ship
|
|
384
|
+
provider-specific multiplier defaults.
|
|
385
|
+
"""
|
|
386
|
+
supplied = cache_write_multiplier is not None and cache_read_multiplier is not None
|
|
387
|
+
break_even_reuses: int | None = None
|
|
388
|
+
max_profitable_reuses: int | None = None
|
|
389
|
+
expected_uncached_relative_cost: float | None = None
|
|
390
|
+
expected_cached_relative_cost: float | None = None
|
|
391
|
+
expected_relative_savings: float | None = None
|
|
392
|
+
status = "multipliers_not_supplied"
|
|
393
|
+
risk = "unknown"
|
|
394
|
+
|
|
395
|
+
if not eligible:
|
|
396
|
+
status = "not_cacheable"
|
|
397
|
+
risk = "high"
|
|
398
|
+
elif not supplied:
|
|
399
|
+
status = "multipliers_not_supplied"
|
|
400
|
+
risk = "unknown"
|
|
401
|
+
else:
|
|
402
|
+
expected_uncached_relative_cost = 1.0 + expected_reuses
|
|
403
|
+
expected_cached_relative_cost = cache_write_multiplier + (expected_reuses * cache_read_multiplier)
|
|
404
|
+
expected_relative_savings = expected_uncached_relative_cost - expected_cached_relative_cost
|
|
405
|
+
if cache_read_multiplier < 1.0:
|
|
406
|
+
if cache_write_multiplier <= 1.0:
|
|
407
|
+
break_even_reuses = 0
|
|
408
|
+
else:
|
|
409
|
+
break_even_reuses = int(math.ceil((cache_write_multiplier - 1.0) / (1.0 - cache_read_multiplier)))
|
|
410
|
+
if expected_reuses >= break_even_reuses:
|
|
411
|
+
status = "already_break_even_on_write" if break_even_reuses == 0 else "amortizes_with_expected_reuses"
|
|
412
|
+
risk = "low"
|
|
413
|
+
elif expected_reuses > 0:
|
|
414
|
+
status = "not_enough_expected_reuses"
|
|
415
|
+
risk = "medium"
|
|
416
|
+
else:
|
|
417
|
+
status = "not_enough_expected_reuses"
|
|
418
|
+
risk = "high"
|
|
419
|
+
elif cache_read_multiplier == 1.0 and cache_write_multiplier <= 1.0:
|
|
420
|
+
break_even_reuses = 0
|
|
421
|
+
status = "already_break_even_on_write"
|
|
422
|
+
risk = "low"
|
|
423
|
+
elif cache_read_multiplier > 1.0:
|
|
424
|
+
if cache_write_multiplier < 1.0:
|
|
425
|
+
max_profitable_reuses = max_profitable_read_premium_reuses(
|
|
426
|
+
write_multiplier=cache_write_multiplier,
|
|
427
|
+
read_multiplier=cache_read_multiplier,
|
|
428
|
+
)
|
|
429
|
+
if expected_relative_savings < -SAVINGS_EPSILON:
|
|
430
|
+
status = "no_read_discount"
|
|
431
|
+
risk = "high"
|
|
432
|
+
elif expected_reuses == 0:
|
|
433
|
+
if expected_relative_savings > SAVINGS_EPSILON:
|
|
434
|
+
status = "write_discount_only_no_expected_reads"
|
|
435
|
+
risk = "low"
|
|
436
|
+
else:
|
|
437
|
+
status = "break_even_only_no_expected_reads"
|
|
438
|
+
risk = "medium"
|
|
439
|
+
elif abs(expected_relative_savings) <= SAVINGS_EPSILON:
|
|
440
|
+
status = "break_even_only_with_limited_reuses"
|
|
441
|
+
risk = "medium"
|
|
442
|
+
else:
|
|
443
|
+
status = "positive_only_with_limited_reuses"
|
|
444
|
+
risk = "medium"
|
|
445
|
+
else:
|
|
446
|
+
status = "no_read_discount"
|
|
447
|
+
risk = "high"
|
|
448
|
+
|
|
449
|
+
return {
|
|
450
|
+
"expected_reuses": expected_reuses,
|
|
451
|
+
"expected_reuses_semantics": "future_cache_reads_after_initial_write",
|
|
452
|
+
"cacheable_prefix_tokens": prefix_tokens,
|
|
453
|
+
"break_even_reuses": break_even_reuses,
|
|
454
|
+
"max_profitable_reuses": max_profitable_reuses,
|
|
455
|
+
"status": status,
|
|
456
|
+
"risk": risk,
|
|
457
|
+
"cache_write_multiplier": cache_write_multiplier,
|
|
458
|
+
"cache_read_multiplier": cache_read_multiplier,
|
|
459
|
+
"expected_uncached_relative_cost": expected_uncached_relative_cost,
|
|
460
|
+
"expected_cached_relative_cost": expected_cached_relative_cost,
|
|
461
|
+
"expected_relative_savings": expected_relative_savings,
|
|
462
|
+
"multiplier_baseline": "uncached_prefix_input_cost_equals_1.0",
|
|
463
|
+
"user_supplied_multipliers": supplied,
|
|
464
|
+
"formula": "expected_cached=write_multiplier + expected_reuses*read_multiplier; expected_uncached=1 + expected_reuses; break_even=ceil((write_multiplier - 1.0)/(1.0-read_multiplier)) only when read_multiplier<1; max_profitable_reuses is the largest integer reuse count with expected_uncached-expected_cached > 0, only when read_multiplier>1 and write_multiplier<1",
|
|
465
|
+
"claim_boundary": {
|
|
466
|
+
"advisory_only": True,
|
|
467
|
+
"provider_pricing_defaults_included": False,
|
|
468
|
+
"provider_measured_cache_hit": False,
|
|
469
|
+
"hosted_api_token_or_cost_savings_claim_allowed": False,
|
|
470
|
+
"requires_user_supplied_or_provider_documented_multipliers": True,
|
|
471
|
+
},
|
|
472
|
+
}
|
|
473
|
+
|
|
474
|
+
|
|
475
|
+
def score_prompt(
|
|
476
|
+
text: str,
|
|
477
|
+
*,
|
|
478
|
+
provider: str,
|
|
479
|
+
minimum_cacheable_tokens: int,
|
|
480
|
+
expected_reuses: int = DEFAULT_EXPECTED_REUSES,
|
|
481
|
+
cache_write_multiplier: float | None = None,
|
|
482
|
+
cache_read_multiplier: float | None = None,
|
|
483
|
+
) -> dict[str, Any]:
|
|
256
484
|
prompt_kind, shape_warnings = json_shape_warnings(text)
|
|
257
485
|
dynamic_offset, dynamic_marker = first_dynamic_marker(text)
|
|
258
486
|
prefix_text = text if dynamic_offset is None else text[:dynamic_offset]
|
|
@@ -282,13 +510,14 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
|
|
|
282
510
|
"message": "Anthropic caching usually requires cache_control around the reusable prefix.",
|
|
283
511
|
})
|
|
284
512
|
|
|
513
|
+
eligible = prefix_estimated >= minimum_cacheable_tokens
|
|
285
514
|
return {
|
|
286
515
|
"tool": TOOL_NAME,
|
|
287
516
|
"schema_version": SCHEMA_VERSION,
|
|
288
517
|
"provider": provider,
|
|
289
518
|
"prompt_kind": prompt_kind,
|
|
290
519
|
"minimum_cacheable_tokens": minimum_cacheable_tokens,
|
|
291
|
-
"eligible":
|
|
520
|
+
"eligible": eligible,
|
|
292
521
|
"estimated_tokens": estimated,
|
|
293
522
|
"cacheable_prefix_tokens": prefix_estimated,
|
|
294
523
|
"token_estimate": {
|
|
@@ -305,6 +534,13 @@ def score_prompt(text: str, *, provider: str, minimum_cacheable_tokens: int) ->
|
|
|
305
534
|
"static_prefix_ratio": round(static_ratio, 6),
|
|
306
535
|
"warnings": warnings,
|
|
307
536
|
"provider_caveat": PROVIDER_CAVEATS[provider],
|
|
537
|
+
"amortization": build_amortization_report(
|
|
538
|
+
eligible=eligible,
|
|
539
|
+
prefix_tokens=prefix_estimated,
|
|
540
|
+
expected_reuses=expected_reuses,
|
|
541
|
+
cache_write_multiplier=cache_write_multiplier,
|
|
542
|
+
cache_read_multiplier=cache_read_multiplier,
|
|
543
|
+
),
|
|
308
544
|
"raw_prompt_stored": False,
|
|
309
545
|
"claim_boundary": {
|
|
310
546
|
"advisory_only": True,
|
|
@@ -320,11 +556,16 @@ def render_text(report: dict[str, Any]) -> str:
|
|
|
320
556
|
status = "eligible" if report.get("eligible") else "not eligible"
|
|
321
557
|
warnings = report.get("warnings") if isinstance(report.get("warnings"), list) else []
|
|
322
558
|
warning_codes = ", ".join(str(item.get("code")) for item in warnings if isinstance(item, dict)) or "none"
|
|
559
|
+
amortization = report.get("amortization") if isinstance(report.get("amortization"), dict) else {}
|
|
323
560
|
return (
|
|
324
561
|
f"{TOOL_NAME}: {status} for {report['provider']} "
|
|
325
562
|
f"(static_prefix≈{report['cacheable_prefix_tokens']} char/4 tokens, "
|
|
326
563
|
f"minimum={report['minimum_cacheable_tokens']})\n"
|
|
327
564
|
f"warnings: {warning_codes}\n"
|
|
565
|
+
f"amortization: {amortization.get('status', 'unknown')} "
|
|
566
|
+
f"(risk={amortization.get('risk', 'unknown')}, "
|
|
567
|
+
f"break_even_reuses={amortization.get('break_even_reuses')}, "
|
|
568
|
+
f"max_profitable_reuses={amortization.get('max_profitable_reuses')})\n"
|
|
328
569
|
"claim boundary: advisory static lint only; not a measured provider cache hit or cost saving.\n"
|
|
329
570
|
)
|
|
330
571
|
|
|
@@ -344,6 +585,24 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
344
585
|
help="override provider threshold for model/platform-specific cache minimums",
|
|
345
586
|
)
|
|
346
587
|
parser.add_argument("--max-input-bytes", default=DEFAULT_MAX_INPUT_BYTES, help=f"maximum input bytes (default: {DEFAULT_MAX_INPUT_BYTES})")
|
|
588
|
+
parser.add_argument(
|
|
589
|
+
"--expected-reuses",
|
|
590
|
+
default=DEFAULT_EXPECTED_REUSES,
|
|
591
|
+
help=(
|
|
592
|
+
"future cache reads expected after the initial write; advisory only "
|
|
593
|
+
f"(default: {DEFAULT_EXPECTED_REUSES})"
|
|
594
|
+
),
|
|
595
|
+
)
|
|
596
|
+
parser.add_argument(
|
|
597
|
+
"--cache-write-multiplier",
|
|
598
|
+
default=None,
|
|
599
|
+
help="optional user-supplied cache write multiplier relative to uncached prefix input cost=1.0",
|
|
600
|
+
)
|
|
601
|
+
parser.add_argument(
|
|
602
|
+
"--cache-read-multiplier",
|
|
603
|
+
default=None,
|
|
604
|
+
help="optional user-supplied cache read multiplier relative to uncached prefix input cost=1.0",
|
|
605
|
+
)
|
|
347
606
|
parser.add_argument("--json", action="store_true", help="emit stable JSON")
|
|
348
607
|
return parser
|
|
349
608
|
|
|
@@ -362,8 +621,34 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
362
621
|
maximum=10_000_000,
|
|
363
622
|
name="--minimum-cacheable-tokens",
|
|
364
623
|
)
|
|
624
|
+
expected_reuses = bounded_int(
|
|
625
|
+
args.expected_reuses,
|
|
626
|
+
default=DEFAULT_EXPECTED_REUSES,
|
|
627
|
+
minimum=0,
|
|
628
|
+
maximum=MAX_EXPECTED_REUSES,
|
|
629
|
+
name="--expected-reuses",
|
|
630
|
+
)
|
|
631
|
+
cache_write_multiplier = bounded_float(
|
|
632
|
+
args.cache_write_multiplier,
|
|
633
|
+
minimum=0.0,
|
|
634
|
+
maximum=MAX_CACHE_MULTIPLIER,
|
|
635
|
+
name="--cache-write-multiplier",
|
|
636
|
+
)
|
|
637
|
+
cache_read_multiplier = bounded_float(
|
|
638
|
+
args.cache_read_multiplier,
|
|
639
|
+
minimum=0.0,
|
|
640
|
+
maximum=MAX_CACHE_MULTIPLIER,
|
|
641
|
+
name="--cache-read-multiplier",
|
|
642
|
+
)
|
|
365
643
|
text = read_limited_path(Path(args.input), max_input_bytes) if args.input else read_limited_stdin(max_input_bytes)
|
|
366
|
-
report = score_prompt(
|
|
644
|
+
report = score_prompt(
|
|
645
|
+
text,
|
|
646
|
+
provider=provider,
|
|
647
|
+
minimum_cacheable_tokens=minimum,
|
|
648
|
+
expected_reuses=expected_reuses,
|
|
649
|
+
cache_write_multiplier=cache_write_multiplier,
|
|
650
|
+
cache_read_multiplier=cache_read_multiplier,
|
|
651
|
+
)
|
|
367
652
|
if args.json:
|
|
368
653
|
sys.stdout.write(json_bytes(report, indent=2) + "\n")
|
|
369
654
|
else:
|
|
@@ -2855,12 +2855,17 @@ def compile_command(args: argparse.Namespace) -> int:
|
|
|
2855
2855
|
|
|
2856
2856
|
recommended = sorted(sections, key=lambda sec: (bool(sec["volatile"]), 0 if sec["ttl"] == "1h" else 1, -int(sec["bytes"] or 0), str(sec["id"])))
|
|
2857
2857
|
findings: list[dict[str, Any]] = []
|
|
2858
|
+
suffix_has_one_hour_ttl = [False] * (len(sections) + 1)
|
|
2859
|
+
suffix_has_stable_section = [False] * (len(sections) + 1)
|
|
2860
|
+
for index in range(len(sections) - 1, -1, -1):
|
|
2861
|
+
suffix_has_one_hour_ttl[index] = suffix_has_one_hour_ttl[index + 1] or sections[index]["ttl"] == "1h"
|
|
2862
|
+
suffix_has_stable_section[index] = suffix_has_stable_section[index + 1] or not bool(sections[index]["volatile"])
|
|
2858
2863
|
for i, sec in enumerate(sections):
|
|
2859
|
-
if sec["ttl"] == "5m" and
|
|
2864
|
+
if sec["ttl"] == "5m" and suffix_has_one_hour_ttl[i + 1]:
|
|
2860
2865
|
findings.append({"severity": "warn", "code": "ttl_order_violation", "section_id": sec["id"], "message": "place 1h cacheable stable sections before 5m sections"})
|
|
2861
2866
|
break
|
|
2862
2867
|
for i, sec in enumerate(sections):
|
|
2863
|
-
if sec["volatile"] and
|
|
2868
|
+
if sec["volatile"] and suffix_has_stable_section[i + 1]:
|
|
2864
2869
|
findings.append({"severity": "warn", "code": "volatile_prefix_before_stable_context", "section_id": sec["id"], "message": "move volatile context toward the tail so stable prefixes can be reused"})
|
|
2865
2870
|
break
|
|
2866
2871
|
if len(sections) > 4:
|