@ictechgy/context-guard 0.4.0 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +13 -0
- package/README.ko.md +61 -32
- package/README.md +90 -22
- package/context-guard-kit/README.md +39 -26
- package/context-guard-kit/benchmark_runner.py +273 -8
- package/context-guard-kit/claude_transcript_cost_audit.py +325 -12
- package/context-guard-kit/context_compress.py +153 -1
- package/context-guard-kit/context_filter.py +446 -0
- package/context-guard-kit/context_guard_cli.py +3 -0
- package/context-guard-kit/context_guard_diet.py +677 -2
- package/context-guard-kit/context_pack.py +1694 -2
- package/context-guard-kit/cost_guard.py +1870 -0
- package/context-guard-kit/setup_wizard.py +820 -29
- package/context-guard-kit/trim_command_output.py +396 -45
- package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
- package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
- package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
- package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +40 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
- package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
- package/docs/cache-diagnostics-schema.md +75 -0
- package/docs/cache-diagnostics.example.json +116 -0
- package/docs/cache-diagnostics.schema.json +460 -0
- package/docs/distribution.md +4 -2
- package/docs/experimental-benchmark-fixtures.md +36 -0
- package/package.json +11 -2
- package/packaging/homebrew/context-guard.rb.template +3 -2
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +21 -13
- package/plugins/context-guard/README.md +24 -10
- package/plugins/context-guard/bin/context-guard +3 -0
- package/plugins/context-guard/bin/context-guard-audit +325 -12
- package/plugins/context-guard/bin/context-guard-bench +273 -8
- package/plugins/context-guard/bin/context-guard-compress +153 -1
- package/plugins/context-guard/bin/context-guard-cost +1870 -0
- package/plugins/context-guard/bin/context-guard-diet +677 -2
- package/plugins/context-guard/bin/context-guard-filter +446 -0
- package/plugins/context-guard/bin/context-guard-pack +1694 -2
- package/plugins/context-guard/bin/context-guard-setup +820 -29
- package/plugins/context-guard/bin/context-guard-trim-output +396 -45
- package/plugins/context-guard/brief/README.md +10 -3
- package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
- package/plugins/context-guard/skills/setup/SKILL.md +3 -1
|
@@ -45,8 +45,10 @@ TOKEN_TYPE_ALIASES = {
|
|
|
45
45
|
COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
|
|
46
46
|
MODEL_KEYS = ("model", "model_id", "modelId")
|
|
47
47
|
QUERY_SOURCE_KEYS = ("query_source", "querySource")
|
|
48
|
-
|
|
48
|
+
TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
|
|
49
|
+
FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
|
|
49
50
|
FEASIBILITY_PRODUCER = "context-guard-audit"
|
|
51
|
+
CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
|
|
50
52
|
MAX_ERROR_EXAMPLES = 20
|
|
51
53
|
JSON_PARSE_RECURSION_LIMIT = 10_000
|
|
52
54
|
READ_CHUNK_BYTES = 64 * 1024
|
|
@@ -177,8 +179,11 @@ class UsageSummary:
|
|
|
177
179
|
by_tool: Counter[str] = field(default_factory=Counter)
|
|
178
180
|
token_field_presence: Counter[str] = field(default_factory=Counter)
|
|
179
181
|
cost_field_count: int = 0
|
|
182
|
+
cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
|
|
183
|
+
positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
|
|
180
184
|
prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
|
|
181
185
|
cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
186
|
+
cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
182
187
|
|
|
183
188
|
@property
|
|
184
189
|
def total_tokens(self) -> int:
|
|
@@ -295,6 +300,48 @@ def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | floa
|
|
|
295
300
|
return None
|
|
296
301
|
|
|
297
302
|
|
|
303
|
+
def parse_timestamp_value(value: Any) -> _dt.datetime | None:
|
|
304
|
+
if isinstance(value, str):
|
|
305
|
+
text = value.strip()
|
|
306
|
+
if not text:
|
|
307
|
+
return None
|
|
308
|
+
try:
|
|
309
|
+
if text.endswith("Z"):
|
|
310
|
+
text = text[:-1] + "+00:00"
|
|
311
|
+
parsed = _dt.datetime.fromisoformat(text)
|
|
312
|
+
except ValueError:
|
|
313
|
+
return None
|
|
314
|
+
if parsed.tzinfo is None:
|
|
315
|
+
parsed = parsed.replace(tzinfo=_dt.timezone.utc)
|
|
316
|
+
return parsed.astimezone(_dt.timezone.utc)
|
|
317
|
+
metric = finite_nonnegative_number(value, clamp_negative=False)
|
|
318
|
+
if metric is None:
|
|
319
|
+
return None
|
|
320
|
+
seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
|
|
321
|
+
try:
|
|
322
|
+
return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
|
|
323
|
+
except (OverflowError, OSError, ValueError):
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def record_timestamp(root: Any) -> _dt.datetime | None:
|
|
328
|
+
candidates: list[Any] = []
|
|
329
|
+
if isinstance(root, dict):
|
|
330
|
+
for key in TIMESTAMP_KEYS:
|
|
331
|
+
if key in root:
|
|
332
|
+
candidates.append(root.get(key))
|
|
333
|
+
message = root.get("message")
|
|
334
|
+
if isinstance(message, dict):
|
|
335
|
+
for key in TIMESTAMP_KEYS:
|
|
336
|
+
if key in message:
|
|
337
|
+
candidates.append(message.get(key))
|
|
338
|
+
for candidate in candidates:
|
|
339
|
+
parsed = parse_timestamp_value(candidate)
|
|
340
|
+
if parsed is not None:
|
|
341
|
+
return parsed
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
298
345
|
def normalize_token_bucket(raw: str) -> str:
|
|
299
346
|
return TOKEN_TYPE_ALIASES.get(raw, raw)
|
|
300
347
|
|
|
@@ -667,11 +714,15 @@ def add_usage(
|
|
|
667
714
|
) -> RecordUsage:
|
|
668
715
|
root_model = None
|
|
669
716
|
root_query_source = None
|
|
717
|
+
parsed_timestamp = None
|
|
670
718
|
if isinstance(root, dict):
|
|
671
719
|
root_model = first_string(root, MODEL_KEYS)
|
|
672
720
|
root_query_source = first_string(root, QUERY_SOURCE_KEYS)
|
|
721
|
+
parsed_timestamp = record_timestamp(root)
|
|
673
722
|
|
|
674
723
|
record = RecordUsage()
|
|
724
|
+
cache_telemetry_present = False
|
|
725
|
+
positive_cache_telemetry_present = False
|
|
675
726
|
summary.prompt_cache_audit.observe(root)
|
|
676
727
|
for d in walk(root):
|
|
677
728
|
local_tokens: Counter[str] = Counter()
|
|
@@ -695,6 +746,10 @@ def add_usage(
|
|
|
695
746
|
|
|
696
747
|
for bucket in present_buckets:
|
|
697
748
|
summary.token_field_presence[bucket] += 1
|
|
749
|
+
if "cache_read" in present_buckets or "cache_creation" in present_buckets:
|
|
750
|
+
cache_telemetry_present = True
|
|
751
|
+
if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
|
|
752
|
+
positive_cache_telemetry_present = True
|
|
698
753
|
|
|
699
754
|
if local_tokens:
|
|
700
755
|
summary.tokens.update(local_tokens)
|
|
@@ -713,6 +768,10 @@ def add_usage(
|
|
|
713
768
|
record.cost_usd += cost
|
|
714
769
|
summary.cost_field_count += 1
|
|
715
770
|
break
|
|
771
|
+
if parsed_timestamp is not None and cache_telemetry_present:
|
|
772
|
+
summary.cache_record_timestamps.append(parsed_timestamp)
|
|
773
|
+
if parsed_timestamp is not None and positive_cache_telemetry_present:
|
|
774
|
+
summary.positive_cache_record_timestamps.append(parsed_timestamp)
|
|
716
775
|
commands, tools = collect_record_hints(root, show_commands=show_commands)
|
|
717
776
|
record.commands = commands
|
|
718
777
|
record.tools = tools
|
|
@@ -980,6 +1039,7 @@ def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window
|
|
|
980
1039
|
"stability": stability,
|
|
981
1040
|
"volatile_share": 1.0 - stability,
|
|
982
1041
|
"unique_hashes": len(counts),
|
|
1042
|
+
"sample_count": len(values),
|
|
983
1043
|
})
|
|
984
1044
|
return stats
|
|
985
1045
|
|
|
@@ -1143,6 +1203,201 @@ def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
|
1143
1203
|
return summary.cache_friendliness_cache
|
|
1144
1204
|
|
|
1145
1205
|
|
|
1206
|
+
def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
|
|
1207
|
+
if skipped:
|
|
1208
|
+
return "partial"
|
|
1209
|
+
if samples or has_cache:
|
|
1210
|
+
return "hypothesis"
|
|
1211
|
+
return "unavailable"
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
|
|
1215
|
+
timestamped_cache_record_count = len(summary.cache_record_timestamps)
|
|
1216
|
+
timestamps = sorted(summary.positive_cache_record_timestamps)
|
|
1217
|
+
caveats = [
|
|
1218
|
+
"Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
|
|
1219
|
+
"5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
|
|
1220
|
+
]
|
|
1221
|
+
if len(timestamps) < 2:
|
|
1222
|
+
return {
|
|
1223
|
+
"status": "unavailable",
|
|
1224
|
+
"evidence": EVIDENCE_UNAVAILABLE,
|
|
1225
|
+
"confidence": "unavailable" if not skipped else "partial",
|
|
1226
|
+
"timestamped_cache_record_count": timestamped_cache_record_count,
|
|
1227
|
+
"positive_timestamped_cache_record_count": len(timestamps),
|
|
1228
|
+
"timestamped_cache_record_span_seconds": None,
|
|
1229
|
+
"candidate": None,
|
|
1230
|
+
"reason": (
|
|
1231
|
+
"Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
|
|
1232
|
+
),
|
|
1233
|
+
"interval_basis": "positive_timestamped_cache_records",
|
|
1234
|
+
"caveats": caveats,
|
|
1235
|
+
}
|
|
1236
|
+
interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
|
|
1237
|
+
candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
|
|
1238
|
+
return {
|
|
1239
|
+
"status": "hypothesis" if has_cache_any else "unavailable",
|
|
1240
|
+
"evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
|
|
1241
|
+
"confidence": "partial" if skipped else "hypothesis",
|
|
1242
|
+
"timestamped_cache_record_count": timestamped_cache_record_count,
|
|
1243
|
+
"positive_timestamped_cache_record_count": len(timestamps),
|
|
1244
|
+
"timestamped_cache_record_span_seconds": interval,
|
|
1245
|
+
"candidate": candidate,
|
|
1246
|
+
"reason": (
|
|
1247
|
+
"Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
|
|
1248
|
+
),
|
|
1249
|
+
"interval_basis": "positive_timestamped_cache_records",
|
|
1250
|
+
"caveats": caveats,
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
|
|
1255
|
+
if summary.cache_diagnostics_cache is not None:
|
|
1256
|
+
return summary.cache_diagnostics_cache
|
|
1257
|
+
|
|
1258
|
+
availability = build_metric_availability(summary)
|
|
1259
|
+
cache_availability = availability["cache"]
|
|
1260
|
+
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1261
|
+
skipped = bool(
|
|
1262
|
+
summary.skipped_files
|
|
1263
|
+
or summary.skipped_records
|
|
1264
|
+
or summary.parse_errors
|
|
1265
|
+
or cache_friendliness.get("skipped_evidence")
|
|
1266
|
+
)
|
|
1267
|
+
has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
|
|
1268
|
+
has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
|
|
1269
|
+
has_cache_any = has_cache_read or has_cache_creation
|
|
1270
|
+
cache_read = summary.tokens.get("cache_read", 0)
|
|
1271
|
+
cache_creation = summary.tokens.get("cache_creation", 0)
|
|
1272
|
+
samples = summary.prompt_cache_audit.samples
|
|
1273
|
+
prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
|
|
1274
|
+
confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
|
|
1275
|
+
|
|
1276
|
+
stable_prefix_candidates: list[dict[str, Any]] = []
|
|
1277
|
+
for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
|
|
1278
|
+
if stat_item["stability"] < 0.66:
|
|
1279
|
+
continue
|
|
1280
|
+
stable_prefix_candidates.append({
|
|
1281
|
+
"position": stat_item["position"],
|
|
1282
|
+
"stability": round(float(stat_item["stability"]), 4),
|
|
1283
|
+
"volatile_share": round(float(stat_item["volatile_share"]), 4),
|
|
1284
|
+
"unique_hashes": stat_item["unique_hashes"],
|
|
1285
|
+
"sample_count": stat_item["sample_count"],
|
|
1286
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1287
|
+
"confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
|
|
1288
|
+
"action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
|
|
1289
|
+
})
|
|
1290
|
+
|
|
1291
|
+
dynamic_prefix_breakers: list[dict[str, Any]] = []
|
|
1292
|
+
breaker_trigger = "prefix_position"
|
|
1293
|
+
for finding in cache_friendliness.get("findings", []):
|
|
1294
|
+
if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
|
|
1295
|
+
evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
|
|
1296
|
+
breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
|
|
1297
|
+
break
|
|
1298
|
+
for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
|
|
1299
|
+
if stat_item["volatile_share"] < 0.34:
|
|
1300
|
+
continue
|
|
1301
|
+
dynamic_prefix_breakers.append({
|
|
1302
|
+
"position": stat_item["position"],
|
|
1303
|
+
"trigger": breaker_trigger,
|
|
1304
|
+
"volatile_share": round(float(stat_item["volatile_share"]), 4),
|
|
1305
|
+
"stability": round(float(stat_item["stability"]), 4),
|
|
1306
|
+
"unique_hashes": stat_item["unique_hashes"],
|
|
1307
|
+
"sample_count": stat_item["sample_count"],
|
|
1308
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1309
|
+
"confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
|
|
1310
|
+
"heuristic": True,
|
|
1311
|
+
"action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
|
|
1312
|
+
})
|
|
1313
|
+
dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
|
|
1314
|
+
|
|
1315
|
+
hypotheses: list[dict[str, Any]] = []
|
|
1316
|
+
if not has_cache_any:
|
|
1317
|
+
hypotheses.append({
|
|
1318
|
+
"id": "cache-fields-missing",
|
|
1319
|
+
"evidence": EVIDENCE_UNAVAILABLE,
|
|
1320
|
+
"confidence": "unavailable" if not skipped else "partial",
|
|
1321
|
+
"reason": "No cache_read/cache_creation transcript fields were observed.",
|
|
1322
|
+
"action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
|
|
1323
|
+
})
|
|
1324
|
+
if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
|
|
1325
|
+
hypotheses.append({
|
|
1326
|
+
"id": "cache-cold-or-prefix-changed",
|
|
1327
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1328
|
+
"confidence": "hypothesis",
|
|
1329
|
+
"reason": "Cache creation tokens were observed without corresponding cache read tokens.",
|
|
1330
|
+
"action": "Check whether stable instructions changed or whether the session was cache-cold.",
|
|
1331
|
+
})
|
|
1332
|
+
if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
|
|
1333
|
+
hypotheses.append({
|
|
1334
|
+
"id": "cache-read-low-vs-write",
|
|
1335
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1336
|
+
"confidence": "hypothesis",
|
|
1337
|
+
"reason": "Cache reads are small relative to observed cache writes.",
|
|
1338
|
+
"action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
|
|
1339
|
+
})
|
|
1340
|
+
if dynamic_prefix_breakers:
|
|
1341
|
+
hypotheses.append({
|
|
1342
|
+
"id": "volatile-prefix-breakers",
|
|
1343
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1344
|
+
"confidence": dynamic_prefix_breakers[0]["confidence"],
|
|
1345
|
+
"reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
|
|
1346
|
+
"action": dynamic_prefix_breakers[0]["action"],
|
|
1347
|
+
})
|
|
1348
|
+
if skipped:
|
|
1349
|
+
hypotheses.append({
|
|
1350
|
+
"id": "partial-transcript-scan",
|
|
1351
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1352
|
+
"confidence": "partial",
|
|
1353
|
+
"reason": "Some transcript files, records, or prompt structures were skipped/capped.",
|
|
1354
|
+
"action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
|
|
1355
|
+
})
|
|
1356
|
+
|
|
1357
|
+
ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
|
|
1358
|
+
headroom = build_headroom_availability(summary)
|
|
1359
|
+
headroom_diagnostics = {
|
|
1360
|
+
**headroom,
|
|
1361
|
+
"historical_total_tokens_are_not_headroom": True,
|
|
1362
|
+
"required_observation": "live_statusline_snapshot",
|
|
1363
|
+
}
|
|
1364
|
+
status = "missing"
|
|
1365
|
+
if has_cache_any or samples:
|
|
1366
|
+
status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
|
|
1367
|
+
elif skipped:
|
|
1368
|
+
status = "partial"
|
|
1369
|
+
|
|
1370
|
+
diagnostics = {
|
|
1371
|
+
"schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
|
|
1372
|
+
"status": status,
|
|
1373
|
+
"confidence": confidence,
|
|
1374
|
+
"evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
|
|
1375
|
+
"heuristic": True,
|
|
1376
|
+
"observations": {
|
|
1377
|
+
"cache_fields": cache_availability,
|
|
1378
|
+
"cache_read_tokens": cache_read,
|
|
1379
|
+
"cache_creation_tokens": cache_creation,
|
|
1380
|
+
},
|
|
1381
|
+
"derived_ratios": cache_availability["derived"],
|
|
1382
|
+
"stable_prefix_candidates": stable_prefix_candidates,
|
|
1383
|
+
"dynamic_prefix_breakers": dynamic_prefix_breakers,
|
|
1384
|
+
"cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
|
|
1385
|
+
"ttl_diagnostics": ttl,
|
|
1386
|
+
"headroom_diagnostics": headroom_diagnostics,
|
|
1387
|
+
"caveats": [
|
|
1388
|
+
"Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
|
|
1389
|
+
"Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
|
|
1390
|
+
"Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
|
|
1391
|
+
],
|
|
1392
|
+
}
|
|
1393
|
+
summary.cache_diagnostics_cache = diagnostics
|
|
1394
|
+
return diagnostics
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
1398
|
+
return build_cache_diagnostics(summary)
|
|
1399
|
+
|
|
1400
|
+
|
|
1146
1401
|
def build_metric_caveats(summary: UsageSummary) -> list[str]:
|
|
1147
1402
|
caveats = [
|
|
1148
1403
|
"Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
|
|
@@ -1177,6 +1432,7 @@ def feasibility_json(
|
|
|
1177
1432
|
stable_tokens = stable_token_counter(summary.tokens)
|
|
1178
1433
|
stable_total_tokens = sum(stable_tokens.values())
|
|
1179
1434
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1435
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1180
1436
|
return {
|
|
1181
1437
|
"schema_version": FEASIBILITY_SCHEMA_VERSION,
|
|
1182
1438
|
"producer": FEASIBILITY_PRODUCER,
|
|
@@ -1195,6 +1451,7 @@ def feasibility_json(
|
|
|
1195
1451
|
"context_availability",
|
|
1196
1452
|
"headroom_availability",
|
|
1197
1453
|
"cache_friendliness",
|
|
1454
|
+
"cache_diagnostics",
|
|
1198
1455
|
"totals",
|
|
1199
1456
|
],
|
|
1200
1457
|
"diagnostic_fields": ["summary"],
|
|
@@ -1222,6 +1479,7 @@ def feasibility_json(
|
|
|
1222
1479
|
"context_availability": availability["context"],
|
|
1223
1480
|
"headroom_availability": availability["headroom"],
|
|
1224
1481
|
"cache_friendliness": cache_friendliness,
|
|
1482
|
+
"cache_diagnostics": cache_diagnostics,
|
|
1225
1483
|
"totals": {
|
|
1226
1484
|
"total_tokens": stable_total_tokens,
|
|
1227
1485
|
"tokens": stable_tokens,
|
|
@@ -1272,6 +1530,7 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
|
|
|
1272
1530
|
output_ratio = output_tokens / total
|
|
1273
1531
|
input_ratio = input_tokens / total
|
|
1274
1532
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1533
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1275
1534
|
for finding in cache_friendliness.get("findings", []):
|
|
1276
1535
|
if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
|
|
1277
1536
|
evidence = dict(finding.get("evidence") or {})
|
|
@@ -1331,25 +1590,57 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
|
|
|
1331
1590
|
},
|
|
1332
1591
|
))
|
|
1333
1592
|
if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
|
|
1593
|
+
ttl = cache_diagnostics.get("ttl_diagnostics") or {}
|
|
1594
|
+
ttl_status = str(ttl.get("status") or "unavailable")
|
|
1595
|
+
ttl_confidence = str(ttl.get("confidence") or "unavailable")
|
|
1596
|
+
ttl_candidate = ttl.get("candidate")
|
|
1597
|
+
ttl_span = ttl.get("timestamped_cache_record_span_seconds")
|
|
1598
|
+
if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
|
|
1599
|
+
ttl_reason = (
|
|
1600
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1601
|
+
f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
|
|
1602
|
+
f"({ttl_candidate})."
|
|
1603
|
+
)
|
|
1604
|
+
ttl_action = (
|
|
1605
|
+
"Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
|
|
1606
|
+
"pattern in representative sessions and rechecking current provider TTL/pricing documentation."
|
|
1607
|
+
)
|
|
1608
|
+
elif ttl_status == "hypothesis":
|
|
1609
|
+
ttl_reason = (
|
|
1610
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1611
|
+
f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
|
|
1612
|
+
)
|
|
1613
|
+
ttl_action = (
|
|
1614
|
+
"Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
|
|
1615
|
+
)
|
|
1616
|
+
else:
|
|
1617
|
+
ttl_reason = (
|
|
1618
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1619
|
+
f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
|
|
1620
|
+
"at least two timestamped cache telemetry records."
|
|
1621
|
+
)
|
|
1622
|
+
ttl_action = (
|
|
1623
|
+
"Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
|
|
1624
|
+
"prompt-cache TTL; historical token totals alone are not TTL evidence."
|
|
1625
|
+
)
|
|
1334
1626
|
recs.append(recommendation(
|
|
1335
1627
|
"evaluate-1h-ttl-cache",
|
|
1336
|
-
"Cache writes are large;
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
f"{cache_creation} write tokens; absolute write cost is high and reuse is moderate. "
|
|
1340
|
-
"This metric does not inspect timestamps, so confirm reuse spans >5min in a sample "
|
|
1341
|
-
"session before enabling 1h TTL."
|
|
1342
|
-
),
|
|
1343
|
-
(
|
|
1344
|
-
"If sessions reuse the same prefix beyond the 5-minute default TTL, evaluate the 1h prompt cache "
|
|
1345
|
-
"beta (write 2x, read 0.1x). It pays off when reuse spans the gap between two 5-min cache writes."
|
|
1346
|
-
),
|
|
1628
|
+
"Cache writes are large; validate TTL evidence before longer TTL",
|
|
1629
|
+
ttl_reason,
|
|
1630
|
+
ttl_action,
|
|
1347
1631
|
"P2",
|
|
1348
1632
|
{
|
|
1349
1633
|
"cache_creation": cache_creation,
|
|
1350
1634
|
"cache_read": cache_read,
|
|
1351
1635
|
"cache_amortization": round(summary.cache_amortization, 4),
|
|
1352
1636
|
"cache_hit_rate": round(summary.cache_hit_rate, 4),
|
|
1637
|
+
"ttl_status": ttl_status,
|
|
1638
|
+
"ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
|
|
1639
|
+
"ttl_confidence": ttl_confidence,
|
|
1640
|
+
"ttl_candidate": ttl_candidate,
|
|
1641
|
+
"timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
|
|
1642
|
+
"positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
|
|
1643
|
+
"timestamped_cache_record_span_seconds": ttl_span,
|
|
1353
1644
|
"heuristic": True,
|
|
1354
1645
|
},
|
|
1355
1646
|
))
|
|
@@ -1462,6 +1753,7 @@ def summary_json(
|
|
|
1462
1753
|
"top_commands": counter_json(summary.by_command, top),
|
|
1463
1754
|
"top_tools": counter_json(summary.by_tool, top),
|
|
1464
1755
|
"cache_friendliness": cache_friendliness_for_summary(summary),
|
|
1756
|
+
"cache_diagnostics": cache_diagnostics_for_summary(summary),
|
|
1465
1757
|
}
|
|
1466
1758
|
if include_recommendations:
|
|
1467
1759
|
data["recommendations"] = build_recommendations(summary, top)
|
|
@@ -1574,6 +1866,27 @@ def main() -> int:
|
|
|
1574
1866
|
if isinstance(finding, dict):
|
|
1575
1867
|
print(f" finding [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
|
|
1576
1868
|
|
|
1869
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1870
|
+
print("\nCache diagnostics")
|
|
1871
|
+
print(f" status {cache_diagnostics.get('status')}")
|
|
1872
|
+
print(f" confidence {cache_diagnostics.get('confidence')}")
|
|
1873
|
+
hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
|
|
1874
|
+
if hypotheses:
|
|
1875
|
+
first = hypotheses[0]
|
|
1876
|
+
print(f" top_hypothesis {first.get('id')} ({first.get('confidence')})")
|
|
1877
|
+
stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
|
|
1878
|
+
if stable_candidates:
|
|
1879
|
+
first = stable_candidates[0]
|
|
1880
|
+
print(f" stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
|
|
1881
|
+
breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
|
|
1882
|
+
if breakers:
|
|
1883
|
+
first = breakers[0]
|
|
1884
|
+
print(f" dynamic_prefix_breaker position={first.get('position')} volatile_share={first.get('volatile_share')}")
|
|
1885
|
+
ttl = cache_diagnostics.get("ttl_diagnostics") or {}
|
|
1886
|
+
print(f" ttl_status {ttl.get('status')} ({ttl.get('confidence')})")
|
|
1887
|
+
headroom = cache_diagnostics.get("headroom_diagnostics") or {}
|
|
1888
|
+
print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
|
|
1889
|
+
|
|
1577
1890
|
model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
|
|
1578
1891
|
print_counter("By model", model_totals, args.top)
|
|
1579
1892
|
|