@ictechgy/context-guard 0.4.1 → 0.4.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +9 -0
- package/README.ko.md +61 -32
- package/README.md +90 -22
- package/context-guard-kit/README.md +39 -26
- package/context-guard-kit/benchmark_runner.py +273 -8
- package/context-guard-kit/claude_transcript_cost_audit.py +325 -12
- package/context-guard-kit/context_compress.py +153 -1
- package/context-guard-kit/context_filter.py +446 -0
- package/context-guard-kit/context_guard_cli.py +3 -0
- package/context-guard-kit/context_guard_diet.py +677 -2
- package/context-guard-kit/context_pack.py +1694 -2
- package/context-guard-kit/cost_guard.py +1870 -0
- package/context-guard-kit/setup_wizard.py +820 -29
- package/context-guard-kit/trim_command_output.py +396 -45
- package/docs/benchmark-fixtures/learned-compression.tasks.example.json +24 -0
- package/docs/benchmark-fixtures/learned-compression.variants.example.json +10 -0
- package/docs/benchmark-fixtures/visual-ocr.tasks.example.json +24 -0
- package/docs/benchmark-fixtures/visual-ocr.variants.example.json +10 -0
- package/docs/benchmark-workflow-examples.md +40 -0
- package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +169 -0
- package/docs/benchmark-workflows/measured-token-workflow.example.json +170 -0
- package/docs/benchmark-workflows/provider-cache-telemetry.example.json +170 -0
- package/docs/cache-diagnostics-schema.md +75 -0
- package/docs/cache-diagnostics.example.json +116 -0
- package/docs/cache-diagnostics.schema.json +460 -0
- package/docs/distribution.md +4 -2
- package/docs/experimental-benchmark-fixtures.md +36 -0
- package/package.json +11 -2
- package/packaging/homebrew/context-guard.rb.template +3 -2
- package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
- package/plugins/context-guard/README.ko.md +21 -13
- package/plugins/context-guard/README.md +24 -10
- package/plugins/context-guard/bin/context-guard +3 -0
- package/plugins/context-guard/bin/context-guard-audit +325 -12
- package/plugins/context-guard/bin/context-guard-bench +273 -8
- package/plugins/context-guard/bin/context-guard-compress +153 -1
- package/plugins/context-guard/bin/context-guard-cost +1870 -0
- package/plugins/context-guard/bin/context-guard-diet +677 -2
- package/plugins/context-guard/bin/context-guard-filter +446 -0
- package/plugins/context-guard/bin/context-guard-pack +1694 -2
- package/plugins/context-guard/bin/context-guard-setup +820 -29
- package/plugins/context-guard/bin/context-guard-trim-output +396 -45
- package/plugins/context-guard/brief/README.md +10 -3
- package/plugins/context-guard/skills/optimize/SKILL.md +5 -2
- package/plugins/context-guard/skills/setup/SKILL.md +3 -1
|
@@ -45,8 +45,10 @@ TOKEN_TYPE_ALIASES = {
|
|
|
45
45
|
COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
|
|
46
46
|
MODEL_KEYS = ("model", "model_id", "modelId")
|
|
47
47
|
QUERY_SOURCE_KEYS = ("query_source", "querySource")
|
|
48
|
-
|
|
48
|
+
TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
|
|
49
|
+
FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.2"
|
|
49
50
|
FEASIBILITY_PRODUCER = "context-guard-audit"
|
|
51
|
+
CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
|
|
50
52
|
MAX_ERROR_EXAMPLES = 20
|
|
51
53
|
JSON_PARSE_RECURSION_LIMIT = 10_000
|
|
52
54
|
READ_CHUNK_BYTES = 64 * 1024
|
|
@@ -177,8 +179,11 @@ class UsageSummary:
|
|
|
177
179
|
by_tool: Counter[str] = field(default_factory=Counter)
|
|
178
180
|
token_field_presence: Counter[str] = field(default_factory=Counter)
|
|
179
181
|
cost_field_count: int = 0
|
|
182
|
+
cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
|
|
183
|
+
positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
|
|
180
184
|
prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
|
|
181
185
|
cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
186
|
+
cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
|
|
182
187
|
|
|
183
188
|
@property
|
|
184
189
|
def total_tokens(self) -> int:
|
|
@@ -295,6 +300,48 @@ def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | floa
|
|
|
295
300
|
return None
|
|
296
301
|
|
|
297
302
|
|
|
303
|
+
def parse_timestamp_value(value: Any) -> _dt.datetime | None:
|
|
304
|
+
if isinstance(value, str):
|
|
305
|
+
text = value.strip()
|
|
306
|
+
if not text:
|
|
307
|
+
return None
|
|
308
|
+
try:
|
|
309
|
+
if text.endswith("Z"):
|
|
310
|
+
text = text[:-1] + "+00:00"
|
|
311
|
+
parsed = _dt.datetime.fromisoformat(text)
|
|
312
|
+
except ValueError:
|
|
313
|
+
return None
|
|
314
|
+
if parsed.tzinfo is None:
|
|
315
|
+
parsed = parsed.replace(tzinfo=_dt.timezone.utc)
|
|
316
|
+
return parsed.astimezone(_dt.timezone.utc)
|
|
317
|
+
metric = finite_nonnegative_number(value, clamp_negative=False)
|
|
318
|
+
if metric is None:
|
|
319
|
+
return None
|
|
320
|
+
seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
|
|
321
|
+
try:
|
|
322
|
+
return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
|
|
323
|
+
except (OverflowError, OSError, ValueError):
|
|
324
|
+
return None
|
|
325
|
+
|
|
326
|
+
|
|
327
|
+
def record_timestamp(root: Any) -> _dt.datetime | None:
|
|
328
|
+
candidates: list[Any] = []
|
|
329
|
+
if isinstance(root, dict):
|
|
330
|
+
for key in TIMESTAMP_KEYS:
|
|
331
|
+
if key in root:
|
|
332
|
+
candidates.append(root.get(key))
|
|
333
|
+
message = root.get("message")
|
|
334
|
+
if isinstance(message, dict):
|
|
335
|
+
for key in TIMESTAMP_KEYS:
|
|
336
|
+
if key in message:
|
|
337
|
+
candidates.append(message.get(key))
|
|
338
|
+
for candidate in candidates:
|
|
339
|
+
parsed = parse_timestamp_value(candidate)
|
|
340
|
+
if parsed is not None:
|
|
341
|
+
return parsed
|
|
342
|
+
return None
|
|
343
|
+
|
|
344
|
+
|
|
298
345
|
def normalize_token_bucket(raw: str) -> str:
|
|
299
346
|
return TOKEN_TYPE_ALIASES.get(raw, raw)
|
|
300
347
|
|
|
@@ -667,11 +714,15 @@ def add_usage(
|
|
|
667
714
|
) -> RecordUsage:
|
|
668
715
|
root_model = None
|
|
669
716
|
root_query_source = None
|
|
717
|
+
parsed_timestamp = None
|
|
670
718
|
if isinstance(root, dict):
|
|
671
719
|
root_model = first_string(root, MODEL_KEYS)
|
|
672
720
|
root_query_source = first_string(root, QUERY_SOURCE_KEYS)
|
|
721
|
+
parsed_timestamp = record_timestamp(root)
|
|
673
722
|
|
|
674
723
|
record = RecordUsage()
|
|
724
|
+
cache_telemetry_present = False
|
|
725
|
+
positive_cache_telemetry_present = False
|
|
675
726
|
summary.prompt_cache_audit.observe(root)
|
|
676
727
|
for d in walk(root):
|
|
677
728
|
local_tokens: Counter[str] = Counter()
|
|
@@ -695,6 +746,10 @@ def add_usage(
|
|
|
695
746
|
|
|
696
747
|
for bucket in present_buckets:
|
|
697
748
|
summary.token_field_presence[bucket] += 1
|
|
749
|
+
if "cache_read" in present_buckets or "cache_creation" in present_buckets:
|
|
750
|
+
cache_telemetry_present = True
|
|
751
|
+
if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
|
|
752
|
+
positive_cache_telemetry_present = True
|
|
698
753
|
|
|
699
754
|
if local_tokens:
|
|
700
755
|
summary.tokens.update(local_tokens)
|
|
@@ -713,6 +768,10 @@ def add_usage(
|
|
|
713
768
|
record.cost_usd += cost
|
|
714
769
|
summary.cost_field_count += 1
|
|
715
770
|
break
|
|
771
|
+
if parsed_timestamp is not None and cache_telemetry_present:
|
|
772
|
+
summary.cache_record_timestamps.append(parsed_timestamp)
|
|
773
|
+
if parsed_timestamp is not None and positive_cache_telemetry_present:
|
|
774
|
+
summary.positive_cache_record_timestamps.append(parsed_timestamp)
|
|
716
775
|
commands, tools = collect_record_hints(root, show_commands=show_commands)
|
|
717
776
|
record.commands = commands
|
|
718
777
|
record.tools = tools
|
|
@@ -980,6 +1039,7 @@ def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window
|
|
|
980
1039
|
"stability": stability,
|
|
981
1040
|
"volatile_share": 1.0 - stability,
|
|
982
1041
|
"unique_hashes": len(counts),
|
|
1042
|
+
"sample_count": len(values),
|
|
983
1043
|
})
|
|
984
1044
|
return stats
|
|
985
1045
|
|
|
@@ -1143,6 +1203,201 @@ def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
|
1143
1203
|
return summary.cache_friendliness_cache
|
|
1144
1204
|
|
|
1145
1205
|
|
|
1206
|
+
def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
|
|
1207
|
+
if skipped:
|
|
1208
|
+
return "partial"
|
|
1209
|
+
if samples or has_cache:
|
|
1210
|
+
return "hypothesis"
|
|
1211
|
+
return "unavailable"
|
|
1212
|
+
|
|
1213
|
+
|
|
1214
|
+
def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
|
|
1215
|
+
timestamped_cache_record_count = len(summary.cache_record_timestamps)
|
|
1216
|
+
timestamps = sorted(summary.positive_cache_record_timestamps)
|
|
1217
|
+
caveats = [
|
|
1218
|
+
"Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
|
|
1219
|
+
"5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
|
|
1220
|
+
]
|
|
1221
|
+
if len(timestamps) < 2:
|
|
1222
|
+
return {
|
|
1223
|
+
"status": "unavailable",
|
|
1224
|
+
"evidence": EVIDENCE_UNAVAILABLE,
|
|
1225
|
+
"confidence": "unavailable" if not skipped else "partial",
|
|
1226
|
+
"timestamped_cache_record_count": timestamped_cache_record_count,
|
|
1227
|
+
"positive_timestamped_cache_record_count": len(timestamps),
|
|
1228
|
+
"timestamped_cache_record_span_seconds": None,
|
|
1229
|
+
"candidate": None,
|
|
1230
|
+
"reason": (
|
|
1231
|
+
"Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
|
|
1232
|
+
),
|
|
1233
|
+
"interval_basis": "positive_timestamped_cache_records",
|
|
1234
|
+
"caveats": caveats,
|
|
1235
|
+
}
|
|
1236
|
+
interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
|
|
1237
|
+
candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
|
|
1238
|
+
return {
|
|
1239
|
+
"status": "hypothesis" if has_cache_any else "unavailable",
|
|
1240
|
+
"evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
|
|
1241
|
+
"confidence": "partial" if skipped else "hypothesis",
|
|
1242
|
+
"timestamped_cache_record_count": timestamped_cache_record_count,
|
|
1243
|
+
"positive_timestamped_cache_record_count": len(timestamps),
|
|
1244
|
+
"timestamped_cache_record_span_seconds": interval,
|
|
1245
|
+
"candidate": candidate,
|
|
1246
|
+
"reason": (
|
|
1247
|
+
"Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
|
|
1248
|
+
),
|
|
1249
|
+
"interval_basis": "positive_timestamped_cache_records",
|
|
1250
|
+
"caveats": caveats,
|
|
1251
|
+
}
|
|
1252
|
+
|
|
1253
|
+
|
|
1254
|
+
def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
|
|
1255
|
+
if summary.cache_diagnostics_cache is not None:
|
|
1256
|
+
return summary.cache_diagnostics_cache
|
|
1257
|
+
|
|
1258
|
+
availability = build_metric_availability(summary)
|
|
1259
|
+
cache_availability = availability["cache"]
|
|
1260
|
+
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1261
|
+
skipped = bool(
|
|
1262
|
+
summary.skipped_files
|
|
1263
|
+
or summary.skipped_records
|
|
1264
|
+
or summary.parse_errors
|
|
1265
|
+
or cache_friendliness.get("skipped_evidence")
|
|
1266
|
+
)
|
|
1267
|
+
has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
|
|
1268
|
+
has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
|
|
1269
|
+
has_cache_any = has_cache_read or has_cache_creation
|
|
1270
|
+
cache_read = summary.tokens.get("cache_read", 0)
|
|
1271
|
+
cache_creation = summary.tokens.get("cache_creation", 0)
|
|
1272
|
+
samples = summary.prompt_cache_audit.samples
|
|
1273
|
+
prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
|
|
1274
|
+
confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
|
|
1275
|
+
|
|
1276
|
+
stable_prefix_candidates: list[dict[str, Any]] = []
|
|
1277
|
+
for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
|
|
1278
|
+
if stat_item["stability"] < 0.66:
|
|
1279
|
+
continue
|
|
1280
|
+
stable_prefix_candidates.append({
|
|
1281
|
+
"position": stat_item["position"],
|
|
1282
|
+
"stability": round(float(stat_item["stability"]), 4),
|
|
1283
|
+
"volatile_share": round(float(stat_item["volatile_share"]), 4),
|
|
1284
|
+
"unique_hashes": stat_item["unique_hashes"],
|
|
1285
|
+
"sample_count": stat_item["sample_count"],
|
|
1286
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1287
|
+
"confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
|
|
1288
|
+
"action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
|
|
1289
|
+
})
|
|
1290
|
+
|
|
1291
|
+
dynamic_prefix_breakers: list[dict[str, Any]] = []
|
|
1292
|
+
breaker_trigger = "prefix_position"
|
|
1293
|
+
for finding in cache_friendliness.get("findings", []):
|
|
1294
|
+
if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
|
|
1295
|
+
evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
|
|
1296
|
+
breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
|
|
1297
|
+
break
|
|
1298
|
+
for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
|
|
1299
|
+
if stat_item["volatile_share"] < 0.34:
|
|
1300
|
+
continue
|
|
1301
|
+
dynamic_prefix_breakers.append({
|
|
1302
|
+
"position": stat_item["position"],
|
|
1303
|
+
"trigger": breaker_trigger,
|
|
1304
|
+
"volatile_share": round(float(stat_item["volatile_share"]), 4),
|
|
1305
|
+
"stability": round(float(stat_item["stability"]), 4),
|
|
1306
|
+
"unique_hashes": stat_item["unique_hashes"],
|
|
1307
|
+
"sample_count": stat_item["sample_count"],
|
|
1308
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1309
|
+
"confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
|
|
1310
|
+
"heuristic": True,
|
|
1311
|
+
"action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
|
|
1312
|
+
})
|
|
1313
|
+
dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
|
|
1314
|
+
|
|
1315
|
+
hypotheses: list[dict[str, Any]] = []
|
|
1316
|
+
if not has_cache_any:
|
|
1317
|
+
hypotheses.append({
|
|
1318
|
+
"id": "cache-fields-missing",
|
|
1319
|
+
"evidence": EVIDENCE_UNAVAILABLE,
|
|
1320
|
+
"confidence": "unavailable" if not skipped else "partial",
|
|
1321
|
+
"reason": "No cache_read/cache_creation transcript fields were observed.",
|
|
1322
|
+
"action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
|
|
1323
|
+
})
|
|
1324
|
+
if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
|
|
1325
|
+
hypotheses.append({
|
|
1326
|
+
"id": "cache-cold-or-prefix-changed",
|
|
1327
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1328
|
+
"confidence": "hypothesis",
|
|
1329
|
+
"reason": "Cache creation tokens were observed without corresponding cache read tokens.",
|
|
1330
|
+
"action": "Check whether stable instructions changed or whether the session was cache-cold.",
|
|
1331
|
+
})
|
|
1332
|
+
if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
|
|
1333
|
+
hypotheses.append({
|
|
1334
|
+
"id": "cache-read-low-vs-write",
|
|
1335
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1336
|
+
"confidence": "hypothesis",
|
|
1337
|
+
"reason": "Cache reads are small relative to observed cache writes.",
|
|
1338
|
+
"action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
|
|
1339
|
+
})
|
|
1340
|
+
if dynamic_prefix_breakers:
|
|
1341
|
+
hypotheses.append({
|
|
1342
|
+
"id": "volatile-prefix-breakers",
|
|
1343
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1344
|
+
"confidence": dynamic_prefix_breakers[0]["confidence"],
|
|
1345
|
+
"reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
|
|
1346
|
+
"action": dynamic_prefix_breakers[0]["action"],
|
|
1347
|
+
})
|
|
1348
|
+
if skipped:
|
|
1349
|
+
hypotheses.append({
|
|
1350
|
+
"id": "partial-transcript-scan",
|
|
1351
|
+
"evidence": EVIDENCE_INFERRED,
|
|
1352
|
+
"confidence": "partial",
|
|
1353
|
+
"reason": "Some transcript files, records, or prompt structures were skipped/capped.",
|
|
1354
|
+
"action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
|
|
1355
|
+
})
|
|
1356
|
+
|
|
1357
|
+
ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
|
|
1358
|
+
headroom = build_headroom_availability(summary)
|
|
1359
|
+
headroom_diagnostics = {
|
|
1360
|
+
**headroom,
|
|
1361
|
+
"historical_total_tokens_are_not_headroom": True,
|
|
1362
|
+
"required_observation": "live_statusline_snapshot",
|
|
1363
|
+
}
|
|
1364
|
+
status = "missing"
|
|
1365
|
+
if has_cache_any or samples:
|
|
1366
|
+
status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
|
|
1367
|
+
elif skipped:
|
|
1368
|
+
status = "partial"
|
|
1369
|
+
|
|
1370
|
+
diagnostics = {
|
|
1371
|
+
"schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
|
|
1372
|
+
"status": status,
|
|
1373
|
+
"confidence": confidence,
|
|
1374
|
+
"evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
|
|
1375
|
+
"heuristic": True,
|
|
1376
|
+
"observations": {
|
|
1377
|
+
"cache_fields": cache_availability,
|
|
1378
|
+
"cache_read_tokens": cache_read,
|
|
1379
|
+
"cache_creation_tokens": cache_creation,
|
|
1380
|
+
},
|
|
1381
|
+
"derived_ratios": cache_availability["derived"],
|
|
1382
|
+
"stable_prefix_candidates": stable_prefix_candidates,
|
|
1383
|
+
"dynamic_prefix_breakers": dynamic_prefix_breakers,
|
|
1384
|
+
"cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
|
|
1385
|
+
"ttl_diagnostics": ttl,
|
|
1386
|
+
"headroom_diagnostics": headroom_diagnostics,
|
|
1387
|
+
"caveats": [
|
|
1388
|
+
"Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
|
|
1389
|
+
"Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
|
|
1390
|
+
"Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
|
|
1391
|
+
],
|
|
1392
|
+
}
|
|
1393
|
+
summary.cache_diagnostics_cache = diagnostics
|
|
1394
|
+
return diagnostics
|
|
1395
|
+
|
|
1396
|
+
|
|
1397
|
+
def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
|
|
1398
|
+
return build_cache_diagnostics(summary)
|
|
1399
|
+
|
|
1400
|
+
|
|
1146
1401
|
def build_metric_caveats(summary: UsageSummary) -> list[str]:
|
|
1147
1402
|
caveats = [
|
|
1148
1403
|
"Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
|
|
@@ -1177,6 +1432,7 @@ def feasibility_json(
|
|
|
1177
1432
|
stable_tokens = stable_token_counter(summary.tokens)
|
|
1178
1433
|
stable_total_tokens = sum(stable_tokens.values())
|
|
1179
1434
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1435
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1180
1436
|
return {
|
|
1181
1437
|
"schema_version": FEASIBILITY_SCHEMA_VERSION,
|
|
1182
1438
|
"producer": FEASIBILITY_PRODUCER,
|
|
@@ -1195,6 +1451,7 @@ def feasibility_json(
|
|
|
1195
1451
|
"context_availability",
|
|
1196
1452
|
"headroom_availability",
|
|
1197
1453
|
"cache_friendliness",
|
|
1454
|
+
"cache_diagnostics",
|
|
1198
1455
|
"totals",
|
|
1199
1456
|
],
|
|
1200
1457
|
"diagnostic_fields": ["summary"],
|
|
@@ -1222,6 +1479,7 @@ def feasibility_json(
|
|
|
1222
1479
|
"context_availability": availability["context"],
|
|
1223
1480
|
"headroom_availability": availability["headroom"],
|
|
1224
1481
|
"cache_friendliness": cache_friendliness,
|
|
1482
|
+
"cache_diagnostics": cache_diagnostics,
|
|
1225
1483
|
"totals": {
|
|
1226
1484
|
"total_tokens": stable_total_tokens,
|
|
1227
1485
|
"tokens": stable_tokens,
|
|
@@ -1272,6 +1530,7 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
|
|
|
1272
1530
|
output_ratio = output_tokens / total
|
|
1273
1531
|
input_ratio = input_tokens / total
|
|
1274
1532
|
cache_friendliness = cache_friendliness_for_summary(summary)
|
|
1533
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1275
1534
|
for finding in cache_friendliness.get("findings", []):
|
|
1276
1535
|
if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
|
|
1277
1536
|
evidence = dict(finding.get("evidence") or {})
|
|
@@ -1331,25 +1590,57 @@ def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any
|
|
|
1331
1590
|
},
|
|
1332
1591
|
))
|
|
1333
1592
|
if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
|
|
1593
|
+
ttl = cache_diagnostics.get("ttl_diagnostics") or {}
|
|
1594
|
+
ttl_status = str(ttl.get("status") or "unavailable")
|
|
1595
|
+
ttl_confidence = str(ttl.get("confidence") or "unavailable")
|
|
1596
|
+
ttl_candidate = ttl.get("candidate")
|
|
1597
|
+
ttl_span = ttl.get("timestamped_cache_record_span_seconds")
|
|
1598
|
+
if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
|
|
1599
|
+
ttl_reason = (
|
|
1600
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1601
|
+
f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
|
|
1602
|
+
f"({ttl_candidate})."
|
|
1603
|
+
)
|
|
1604
|
+
ttl_action = (
|
|
1605
|
+
"Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
|
|
1606
|
+
"pattern in representative sessions and rechecking current provider TTL/pricing documentation."
|
|
1607
|
+
)
|
|
1608
|
+
elif ttl_status == "hypothesis":
|
|
1609
|
+
ttl_reason = (
|
|
1610
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1611
|
+
f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
|
|
1612
|
+
)
|
|
1613
|
+
ttl_action = (
|
|
1614
|
+
"Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
|
|
1615
|
+
)
|
|
1616
|
+
else:
|
|
1617
|
+
ttl_reason = (
|
|
1618
|
+
f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
|
|
1619
|
+
f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
|
|
1620
|
+
"at least two timestamped cache telemetry records."
|
|
1621
|
+
)
|
|
1622
|
+
ttl_action = (
|
|
1623
|
+
"Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
|
|
1624
|
+
"prompt-cache TTL; historical token totals alone are not TTL evidence."
|
|
1625
|
+
)
|
|
1334
1626
|
recs.append(recommendation(
|
|
1335
1627
|
"evaluate-1h-ttl-cache",
|
|
1336
|
-
"Cache writes are large;
|
|
1337
|
-
|
|
1338
|
-
|
|
1339
|
-
f"{cache_creation} write tokens; absolute write cost is high and reuse is moderate. "
|
|
1340
|
-
"This metric does not inspect timestamps, so confirm reuse spans >5min in a sample "
|
|
1341
|
-
"session before enabling 1h TTL."
|
|
1342
|
-
),
|
|
1343
|
-
(
|
|
1344
|
-
"If sessions reuse the same prefix beyond the 5-minute default TTL, evaluate the 1h prompt cache "
|
|
1345
|
-
"beta (write 2x, read 0.1x). It pays off when reuse spans the gap between two 5-min cache writes."
|
|
1346
|
-
),
|
|
1628
|
+
"Cache writes are large; validate TTL evidence before longer TTL",
|
|
1629
|
+
ttl_reason,
|
|
1630
|
+
ttl_action,
|
|
1347
1631
|
"P2",
|
|
1348
1632
|
{
|
|
1349
1633
|
"cache_creation": cache_creation,
|
|
1350
1634
|
"cache_read": cache_read,
|
|
1351
1635
|
"cache_amortization": round(summary.cache_amortization, 4),
|
|
1352
1636
|
"cache_hit_rate": round(summary.cache_hit_rate, 4),
|
|
1637
|
+
"ttl_status": ttl_status,
|
|
1638
|
+
"ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
|
|
1639
|
+
"ttl_confidence": ttl_confidence,
|
|
1640
|
+
"ttl_candidate": ttl_candidate,
|
|
1641
|
+
"timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
|
|
1642
|
+
"positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
|
|
1643
|
+
"timestamped_cache_record_span_seconds": ttl_span,
|
|
1353
1644
|
"heuristic": True,
|
|
1354
1645
|
},
|
|
1355
1646
|
))
|
|
@@ -1462,6 +1753,7 @@ def summary_json(
|
|
|
1462
1753
|
"top_commands": counter_json(summary.by_command, top),
|
|
1463
1754
|
"top_tools": counter_json(summary.by_tool, top),
|
|
1464
1755
|
"cache_friendliness": cache_friendliness_for_summary(summary),
|
|
1756
|
+
"cache_diagnostics": cache_diagnostics_for_summary(summary),
|
|
1465
1757
|
}
|
|
1466
1758
|
if include_recommendations:
|
|
1467
1759
|
data["recommendations"] = build_recommendations(summary, top)
|
|
@@ -1574,6 +1866,27 @@ def main() -> int:
|
|
|
1574
1866
|
if isinstance(finding, dict):
|
|
1575
1867
|
print(f" finding [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
|
|
1576
1868
|
|
|
1869
|
+
cache_diagnostics = cache_diagnostics_for_summary(summary)
|
|
1870
|
+
print("\nCache diagnostics")
|
|
1871
|
+
print(f" status {cache_diagnostics.get('status')}")
|
|
1872
|
+
print(f" confidence {cache_diagnostics.get('confidence')}")
|
|
1873
|
+
hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
|
|
1874
|
+
if hypotheses:
|
|
1875
|
+
first = hypotheses[0]
|
|
1876
|
+
print(f" top_hypothesis {first.get('id')} ({first.get('confidence')})")
|
|
1877
|
+
stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
|
|
1878
|
+
if stable_candidates:
|
|
1879
|
+
first = stable_candidates[0]
|
|
1880
|
+
print(f" stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
|
|
1881
|
+
breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
|
|
1882
|
+
if breakers:
|
|
1883
|
+
first = breakers[0]
|
|
1884
|
+
print(f" dynamic_prefix_breaker position={first.get('position')} volatile_share={first.get('volatile_share')}")
|
|
1885
|
+
ttl = cache_diagnostics.get("ttl_diagnostics") or {}
|
|
1886
|
+
print(f" ttl_status {ttl.get('status')} ({ttl.get('confidence')})")
|
|
1887
|
+
headroom = cache_diagnostics.get("headroom_diagnostics") or {}
|
|
1888
|
+
print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
|
|
1889
|
+
|
|
1577
1890
|
model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
|
|
1578
1891
|
print_counter("By model", model_totals, args.top)
|
|
1579
1892
|
|
|
@@ -44,6 +44,55 @@ CODE_SIGNAL_RE = re.compile(
|
|
|
44
44
|
r"(^\s*(def |class |function |func |import |from \S+ import |public |private |const |let |var |#include|package )"
|
|
45
45
|
r"|[{};]\s*$|=>|::)"
|
|
46
46
|
)
|
|
47
|
+
CODE_FENCE_RE = re.compile(r"(?m)^\s*```")
|
|
48
|
+
JSON_KEY_RE = re.compile(r'"(?:[^"\\]|\\.)*"\s*:')
|
|
49
|
+
QUOTED_STRING_RE = re.compile(r"""(?x)
|
|
50
|
+
"(?:[^"\\]|\\.)*" |
|
|
51
|
+
'(?:[^'\\]|\\.)*'
|
|
52
|
+
""")
|
|
53
|
+
HASH_RE = re.compile(r"\b(?:[0-9a-fA-F]{32,}|sha256:[0-9a-fA-F]{32,})\b")
|
|
54
|
+
PATH_RE = re.compile(
|
|
55
|
+
r"(?x)(?:"
|
|
56
|
+
r"(?<![\w.-])/(?:[A-Za-z0-9._@%+=:-]+/)*[A-Za-z0-9._@%+=:-]+"
|
|
57
|
+
r"|"
|
|
58
|
+
r"\b[A-Za-z]:\\(?:[^\\\s:\"'<>|]+\\)*[^\\\s:\"'<>|]+"
|
|
59
|
+
r"|"
|
|
60
|
+
r"\b[A-Za-z0-9._-]+\#path:[0-9a-f]{12}\b"
|
|
61
|
+
r")"
|
|
62
|
+
)
|
|
63
|
+
STACK_FRAME_RE = re.compile(
|
|
64
|
+
r"(?m)^\s*(?:File\s+\"[^\"]+\",\s+line\s+\d+,\s+in\s+\S+|at\s+\S+.*\([^)]*:\d+(?::\d+)?\))"
|
|
65
|
+
)
|
|
66
|
+
IDENTIFIER_RE = re.compile(r"\b[A-Za-z_][A-Za-z0-9_]*(?:[A-Z][A-Za-z0-9_]*)?\b")
|
|
67
|
+
NUMERIC_CONSTANT_RE = re.compile(r"(?<![\w.])[-+]?(?:0x[0-9A-Fa-f]+|\d+(?:\.\d+)?)(?![\w.])")
|
|
68
|
+
PROTECTED_ZONE_KEYS = (
|
|
69
|
+
"code_fence",
|
|
70
|
+
"diff",
|
|
71
|
+
"identifier",
|
|
72
|
+
"numeric_constant",
|
|
73
|
+
"hash",
|
|
74
|
+
"path",
|
|
75
|
+
"stack_frame",
|
|
76
|
+
"quoted_string",
|
|
77
|
+
"json_key",
|
|
78
|
+
)
|
|
79
|
+
PROTECTED_ALLOWED_TRANSFORMS = (
|
|
80
|
+
"exact_dedupe",
|
|
81
|
+
"structural_window",
|
|
82
|
+
"line_truncate",
|
|
83
|
+
"whitespace_normalize",
|
|
84
|
+
"json_compact",
|
|
85
|
+
"artifact_retrieval",
|
|
86
|
+
)
|
|
87
|
+
PROTECTED_DENIED_TRANSFORMS = (
|
|
88
|
+
"semantic_compress",
|
|
89
|
+
"paraphrase",
|
|
90
|
+
"identifier_rewrite",
|
|
91
|
+
"numeric_rewrite",
|
|
92
|
+
"hash_rewrite",
|
|
93
|
+
"path_rewrite",
|
|
94
|
+
"quoted_literal_rewrite",
|
|
95
|
+
)
|
|
47
96
|
|
|
48
97
|
|
|
49
98
|
def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
|
|
@@ -173,6 +222,85 @@ def classify_content(text: str) -> str:
|
|
|
173
222
|
return "prose"
|
|
174
223
|
|
|
175
224
|
|
|
225
|
+
def protected_zone_counts(text: str) -> dict[str, int]:
|
|
226
|
+
"""Conservatively count semantic-sensitive zones without storing raw spans.
|
|
227
|
+
|
|
228
|
+
The counts intentionally over-approximate. They are policy signals for later
|
|
229
|
+
transform gates, not a parser. Metadata must never include the matched path,
|
|
230
|
+
identifier, hash, or string contents because receipts are safe to share.
|
|
231
|
+
"""
|
|
232
|
+
lines = text.splitlines()
|
|
233
|
+
fence_markers = len(CODE_FENCE_RE.findall(text))
|
|
234
|
+
diff_lines = sum(
|
|
235
|
+
1
|
|
236
|
+
for line in lines
|
|
237
|
+
if DIFF_FILE_HEADER_RE.match(line)
|
|
238
|
+
or DIFF_HUNK_RE.match(line)
|
|
239
|
+
or (line[:1] in "+-" and not line.startswith(("+++", "---")))
|
|
240
|
+
)
|
|
241
|
+
counts = {
|
|
242
|
+
"code_fence": (fence_markers + 1) // 2,
|
|
243
|
+
"diff": diff_lines,
|
|
244
|
+
"identifier": len(IDENTIFIER_RE.findall(text)),
|
|
245
|
+
"numeric_constant": len(NUMERIC_CONSTANT_RE.findall(text)),
|
|
246
|
+
"hash": len(HASH_RE.findall(text)),
|
|
247
|
+
"path": len(PATH_RE.findall(text)),
|
|
248
|
+
"stack_frame": len(STACK_FRAME_RE.findall(text)),
|
|
249
|
+
"quoted_string": len(QUOTED_STRING_RE.findall(text)),
|
|
250
|
+
"json_key": len(JSON_KEY_RE.findall(text)),
|
|
251
|
+
}
|
|
252
|
+
return {key: counts[key] for key in PROTECTED_ZONE_KEYS if counts.get(key, 0) > 0}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def build_protected_policy(
|
|
256
|
+
*,
|
|
257
|
+
text: str,
|
|
258
|
+
content_type: str,
|
|
259
|
+
strategy_detail: dict[str, object],
|
|
260
|
+
lossy: bool,
|
|
261
|
+
) -> dict[str, object]:
|
|
262
|
+
"""Build an opt-in transform policy for protected zones.
|
|
263
|
+
|
|
264
|
+
Protection governs transform eligibility and exact-retrieval expectations.
|
|
265
|
+
It does not claim the section should be provider-cache-stable; cache ordering
|
|
266
|
+
is handled by `context-guard-cost compile`.
|
|
267
|
+
"""
|
|
268
|
+
zone_counts = protected_zone_counts(text)
|
|
269
|
+
detected = bool(zone_counts)
|
|
270
|
+
strategy = str(strategy_detail.get("strategy") or "unknown")
|
|
271
|
+
retrieval_required = bool(detected and lossy)
|
|
272
|
+
return {
|
|
273
|
+
"enabled": True,
|
|
274
|
+
"detected": detected,
|
|
275
|
+
"content_type": content_type,
|
|
276
|
+
"zone_counts": zone_counts,
|
|
277
|
+
"semantic_compress": False,
|
|
278
|
+
"allowed_transforms": list(PROTECTED_ALLOWED_TRANSFORMS),
|
|
279
|
+
"denied_transforms": list(PROTECTED_DENIED_TRANSFORMS),
|
|
280
|
+
"retrieval_required": retrieval_required,
|
|
281
|
+
"retrieval_scope": "sanitized_full_input" if retrieval_required else "compressed_output",
|
|
282
|
+
"raw_spans_stored": False,
|
|
283
|
+
"policy_note": "Protected zones permit structural transforms only; no semantic/paraphrase rewrites.",
|
|
284
|
+
"strategy": {
|
|
285
|
+
"name": strategy,
|
|
286
|
+
"structural_only": True,
|
|
287
|
+
},
|
|
288
|
+
}
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def build_transform_policy(protected_policy: dict[str, object]) -> dict[str, object]:
|
|
292
|
+
"""Summarize transform eligibility without embedding raw protected content."""
|
|
293
|
+
return {
|
|
294
|
+
"mode": "protected" if protected_policy.get("detected") else "structural_default",
|
|
295
|
+
"semantic_transforms_allowed": False,
|
|
296
|
+
"semantic_compress": False,
|
|
297
|
+
"allowed": list(PROTECTED_ALLOWED_TRANSFORMS),
|
|
298
|
+
"denied": list(PROTECTED_DENIED_TRANSFORMS),
|
|
299
|
+
"exact_retrieval_required": bool(protected_policy.get("retrieval_required")),
|
|
300
|
+
"raw_spans_stored": False,
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
|
|
176
304
|
def _looks_like_json(stripped: str) -> bool:
|
|
177
305
|
if stripped[0] not in "{[":
|
|
178
306
|
return False
|
|
@@ -353,6 +481,7 @@ def build_metadata(
|
|
|
353
481
|
input_truncated: bool,
|
|
354
482
|
input_bytes: int,
|
|
355
483
|
max_bytes: int,
|
|
484
|
+
protected_policy_enabled: bool = False,
|
|
356
485
|
) -> dict[str, object]:
|
|
357
486
|
"""Assemble the compress receipt: observed byte/line counts plus an estimated token proxy.
|
|
358
487
|
|
|
@@ -370,7 +499,7 @@ def build_metadata(
|
|
|
370
499
|
if lossy
|
|
371
500
|
else "Data-preserving: compact form is semantically equivalent to the sanitized input."
|
|
372
501
|
)
|
|
373
|
-
|
|
502
|
+
metadata: dict[str, object] = {
|
|
374
503
|
"tool": "context-guard-kit.context_compress",
|
|
375
504
|
"metadata_version": 1,
|
|
376
505
|
"content_type": content_type,
|
|
@@ -407,6 +536,21 @@ def build_metadata(
|
|
|
407
536
|
},
|
|
408
537
|
"retrieval_hint": retrieval_hint,
|
|
409
538
|
}
|
|
539
|
+
if protected_policy_enabled:
|
|
540
|
+
protected_policy = build_protected_policy(
|
|
541
|
+
text=original_text,
|
|
542
|
+
content_type=content_type,
|
|
543
|
+
strategy_detail=strategy_detail,
|
|
544
|
+
lossy=lossy,
|
|
545
|
+
)
|
|
546
|
+
metadata["protected_zone_policy"] = protected_policy
|
|
547
|
+
metadata["transform_policy"] = build_transform_policy(protected_policy)
|
|
548
|
+
if protected_policy.get("retrieval_required"):
|
|
549
|
+
metadata["retrieval_hint"] = (
|
|
550
|
+
"Protected lossy structural transform: store the full sanitized text with "
|
|
551
|
+
"`context-guard-artifact store` and retrieve exact slices before relying on omitted content."
|
|
552
|
+
)
|
|
553
|
+
return metadata
|
|
410
554
|
|
|
411
555
|
|
|
412
556
|
def compress_text(
|
|
@@ -417,6 +561,7 @@ def compress_text(
|
|
|
417
561
|
input_truncated: bool,
|
|
418
562
|
input_bytes: int,
|
|
419
563
|
max_bytes: int,
|
|
564
|
+
protected_policy_enabled: bool = False,
|
|
420
565
|
) -> tuple[str, dict[str, object]]:
|
|
421
566
|
"""Sanitize first, then classify and compress, then build the receipt.
|
|
422
567
|
|
|
@@ -446,6 +591,7 @@ def compress_text(
|
|
|
446
591
|
input_truncated=input_truncated,
|
|
447
592
|
input_bytes=input_bytes,
|
|
448
593
|
max_bytes=max_bytes,
|
|
594
|
+
protected_policy_enabled=protected_policy_enabled,
|
|
449
595
|
)
|
|
450
596
|
return compressed, metadata
|
|
451
597
|
|
|
@@ -489,6 +635,7 @@ def run_compress(args: argparse.Namespace) -> int:
|
|
|
489
635
|
input_truncated=input_truncated,
|
|
490
636
|
input_bytes=input_bytes,
|
|
491
637
|
max_bytes=max_bytes,
|
|
638
|
+
protected_policy_enabled=bool(args.protected_policy),
|
|
492
639
|
)
|
|
493
640
|
if args.json:
|
|
494
641
|
payload = {"metadata": metadata, "content": compressed}
|
|
@@ -513,6 +660,11 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
513
660
|
help="force a content type instead of auto-detecting (json/diff/log/search/code/prose)",
|
|
514
661
|
)
|
|
515
662
|
parser.add_argument("--json", action="store_true", help="emit JSON with metadata and compressed content")
|
|
663
|
+
parser.add_argument(
|
|
664
|
+
"--protected-policy",
|
|
665
|
+
action="store_true",
|
|
666
|
+
help="add opt-in protected-zone transform policy metadata to --json/--metadata-only receipts; default content is unchanged",
|
|
667
|
+
)
|
|
516
668
|
parser.add_argument(
|
|
517
669
|
"--metadata-only",
|
|
518
670
|
action="store_true",
|