@ictechgy/context-guard 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/CHANGELOG.md +28 -0
  2. package/README.ko.md +59 -31
  3. package/README.md +85 -36
  4. package/docs/benchmark-fixtures/token-savings-12task-baseline.prompt.example.md +7 -0
  5. package/docs/benchmark-fixtures/token-savings-12task-contextguard.prompt.example.md +7 -0
  6. package/docs/benchmark-fixtures/token-savings-12task.evidence.example.jsonl +24 -0
  7. package/docs/benchmark-fixtures/token-savings-12task.tasks.example.json +182 -0
  8. package/docs/benchmark-fixtures/token-savings-12task.variants.example.json +10 -0
  9. package/docs/benchmark-workflow-examples.md +3 -0
  10. package/docs/benchmark-workflows/context-pack-byte-proxy.example.json +278 -137
  11. package/docs/benchmark-workflows/measured-token-workflow.example.json +279 -138
  12. package/docs/benchmark-workflows/provider-cache-telemetry.example.json +279 -138
  13. package/docs/distribution.md +10 -7
  14. package/docs/experimental-benchmark-fixtures.md +30 -6
  15. package/package.json +4 -6
  16. package/packaging/homebrew/context-guard.rb.template +1 -1
  17. package/plugins/context-guard/.claude-plugin/plugin.json +1 -1
  18. package/plugins/context-guard/README.ko.md +20 -14
  19. package/plugins/context-guard/README.md +26 -17
  20. package/plugins/context-guard/bin/context-guard +147 -25
  21. package/plugins/context-guard/bin/context-guard-artifact +884 -79
  22. package/plugins/context-guard/bin/context-guard-audit +33 -2
  23. package/plugins/context-guard/bin/context-guard-bench +1542 -31
  24. package/plugins/context-guard/bin/context-guard-cache-score +665 -0
  25. package/plugins/context-guard/bin/context-guard-compress +146 -1
  26. package/plugins/context-guard/bin/context-guard-cost +790 -6
  27. package/plugins/context-guard/bin/context-guard-experiments +463 -26
  28. package/plugins/context-guard/bin/context-guard-failed-nudge +9 -2
  29. package/plugins/context-guard/bin/context-guard-filter +163 -7
  30. package/plugins/context-guard/bin/context-guard-guard-read +3 -0
  31. package/plugins/context-guard/bin/context-guard-pack +892 -49
  32. package/plugins/context-guard/bin/context-guard-rewrite-bash +3 -0
  33. package/plugins/context-guard/bin/context-guard-sanitize-output +76 -12
  34. package/plugins/context-guard/bin/context-guard-setup +165 -31
  35. package/plugins/context-guard/bin/context-guard-statusline +490 -283
  36. package/plugins/context-guard/bin/context-guard-statusline-merged +5 -0
  37. package/plugins/context-guard/bin/context-guard-tool-prune +480 -53
  38. package/plugins/context-guard/bin/context-guard-trim-output +288 -41
  39. package/plugins/context-guard/brief/README.md +5 -5
  40. package/plugins/context-guard/lib/context_guard_commands.py +230 -0
  41. package/plugins/context-guard/skills/setup/SKILL.md +1 -0
  42. package/context-guard-kit/README.md +0 -91
  43. package/context-guard-kit/benchmark_runner.py +0 -2401
  44. package/context-guard-kit/claude_transcript_cost_audit.py +0 -2346
  45. package/context-guard-kit/context_compress.py +0 -695
  46. package/context-guard-kit/context_escrow.py +0 -935
  47. package/context-guard-kit/context_filter.py +0 -637
  48. package/context-guard-kit/context_guard_cli.py +0 -325
  49. package/context-guard-kit/context_guard_diet.py +0 -1711
  50. package/context-guard-kit/context_pack.py +0 -2713
  51. package/context-guard-kit/cost_guard.py +0 -2349
  52. package/context-guard-kit/experimental_registry.py +0 -4348
  53. package/context-guard-kit/failed_attempt_nudge.py +0 -567
  54. package/context-guard-kit/guard_large_read.py +0 -690
  55. package/context-guard-kit/hook_secret_patterns.py +0 -43
  56. package/context-guard-kit/read_symbol.py +0 -483
  57. package/context-guard-kit/rewrite_bash_for_token_budget.py +0 -501
  58. package/context-guard-kit/sanitize_output.py +0 -725
  59. package/context-guard-kit/settings.example.json +0 -67
  60. package/context-guard-kit/setup_wizard.py +0 -2515
  61. package/context-guard-kit/statusline.sh +0 -362
  62. package/context-guard-kit/statusline_merged.sh +0 -157
  63. package/context-guard-kit/tool_schema_pruner.py +0 -837
  64. package/context-guard-kit/trim_command_output.py +0 -1449
@@ -1,2346 +0,0 @@
1
- #!/usr/bin/env python3
2
- """Best-effort Claude Code transcript usage auditor.
3
-
4
- Claude Code transcript schemas may change. This script scans JSONL objects for
5
- common token/cost fields rather than relying on one exact schema. It reports
6
- parse/read skips so totals are not mistaken for billing-authoritative data.
7
- """
8
- from __future__ import annotations
9
-
10
- import argparse
11
- import datetime as _dt
12
- import errno
13
- import hashlib
14
- import json
15
- import math
16
- import os
17
- import re
18
- import shlex
19
- import stat
20
- import sys
21
- from collections import Counter, defaultdict
22
- from dataclasses import dataclass, field
23
- from pathlib import Path
24
- from typing import Any, BinaryIO, Iterable
25
-
26
- TOKEN_KEY_GROUPS: tuple[tuple[str, tuple[str, ...]], ...] = (
27
- ("input", ("input_tokens",)),
28
- ("output", ("output_tokens",)),
29
- ("cache_creation", ("cache_creation_input_tokens", "cacheCreation")),
30
- ("cache_read", ("cache_read_input_tokens", "cacheRead")),
31
- )
32
- KNOWN_TOKEN_BUCKETS = {bucket for bucket, _ in TOKEN_KEY_GROUPS}
33
- TOKEN_TYPE_ALIASES = {
34
- "input": "input",
35
- "input_tokens": "input",
36
- "output": "output",
37
- "output_tokens": "output",
38
- "cacheRead": "cache_read",
39
- "cache_read": "cache_read",
40
- "cache_read_input_tokens": "cache_read",
41
- "cacheCreation": "cache_creation",
42
- "cache_creation": "cache_creation",
43
- "cache_creation_input_tokens": "cache_creation",
44
- }
45
- COST_KEYS = ("total_cost_usd", "cost_usd", "costUSD")
46
- MODEL_KEYS = ("model", "model_id", "modelId")
47
- QUERY_SOURCE_KEYS = ("query_source", "querySource")
48
- TIMESTAMP_KEYS = ("timestamp", "created_at", "createdAt", "time", "ts")
49
- FEASIBILITY_SCHEMA_VERSION = "contextguard.metric-feasibility.v1.3"
50
- MAC_VISIBILITY_SCHEMA_VERSION = "contextguard.mac-visibility.v1"
51
- FEASIBILITY_PRODUCER = "context-guard-audit"
52
- CACHE_DIAGNOSTICS_SCHEMA_VERSION = "contextguard.cache-diagnostics.v1"
53
- CACHE_LAYOUT_ADVICE_SCHEMA_VERSION = "contextguard.cache-layout-advice.v1"
54
- MAX_ERROR_EXAMPLES = 20
55
- JSON_PARSE_RECURSION_LIMIT = 10_000
56
- READ_CHUNK_BYTES = 64 * 1024
57
- DEFAULT_MAX_FILE_BYTES = 50 * 1024 * 1024
58
- DEFAULT_MAX_LINE_BYTES = 2 * 1024 * 1024
59
- MAX_FILE_BYTES_LIMIT = 2 * 1024 * 1024 * 1024
60
- MAX_LINE_BYTES_LIMIT = 128 * 1024 * 1024
61
- SECRET_VALUE_RE = re.compile(
62
- r"(?i)(gh[pousr]_[A-Za-z0-9_]{8,}|github_pat_[A-Za-z0-9_]{20,}|"
63
- r"xox[abprs]-[A-Za-z0-9-]{8,}|(?:AKIA|ASIA)[0-9A-Z]{8,}|"
64
- r"AIza[0-9A-Za-z_\-]{8,}|Bearer\s+[A-Za-z0-9._~+/=-]+|"
65
- r"Basic\s+[A-Za-z0-9._~+/=-]+|"
66
- r"sk-ant-[A-Za-z0-9_-]{12,}|sk-[A-Za-z0-9_-]{12,}|glpat-[A-Za-z0-9_-]{12,}|"
67
- r"npm_[A-Za-z0-9]{20,}|eyJ[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+\.[A-Za-z0-9_-]+|"
68
- r"[a-z][a-z0-9+.-]*://[^/\s:@]+:[^/\s@]+@|"
69
- r"(?:--password|-p)\s+\S+|(?:-u|--user)\s+\S+:\S+|"
70
- r"(api[_-]?key|token|secret|password)=\S+)"
71
- )
72
- REDACTED_PATH_COMPONENT = "[REDACTED-PATH-COMPONENT]"
73
- COMMAND_KEYS = ("command", "cmd")
74
- TOOL_NAME_KEYS = ("tool_name", "toolName", "tool")
75
- PROMPT_AUDIT_MAX_RECORDS = 200
76
- PROMPT_AUDIT_MAX_TEXT_BYTES = 32 * 1024
77
- PROMPT_AUDIT_MAX_SEGMENTS_PER_RECORD = 32
78
- PROMPT_AUDIT_PREFIX_SEGMENTS = 3
79
- PROMPT_AUDIT_TAIL_SEGMENTS = 3
80
- PROMPT_AUDIT_MIN_RECORDS = 3
81
- PROMPT_PREFIX_VOLATILE_THRESHOLD = 0.66
82
- PROMPT_PREFIX_TAIL_CHURN_DELTA = 0.34
83
- PROMPT_AUDIT_MAX_FINDINGS = 5
84
- PROMPT_SEGMENT_HASH_CHARS = 16
85
- PROMPT_AUDIT_MAX_TEXT_VALUES = 64
86
- PROMPT_AUDIT_MAX_ROOT_NODES = 4096
87
- PROMPT_AUDIT_MAX_CONTENT_NODES = 2048
88
- PROMPT_AUDIT_MAX_DEPTH = 64
89
- USER_PROMPT_ROLES = {"user", "human"}
90
- TEXT_BLOCK_TYPES = {"text", "input_text"}
91
-
92
-
93
- def push_bounded(
94
- stack: list[tuple[Any, int]],
95
- items: Iterable[Any],
96
- depth: int,
97
- *,
98
- visited: int,
99
- max_nodes: int,
100
- ) -> bool:
101
- """Push traversal children without letting broad structures grow unbounded."""
102
- budget = max(0, max_nodes - visited - len(stack))
103
- if budget <= 0:
104
- return True
105
- pushed = 0
106
- capped = False
107
- for item in items:
108
- if pushed >= budget:
109
- capped = True
110
- break
111
- stack.append((item, depth))
112
- pushed += 1
113
- return capped
114
-
115
-
116
- @dataclass(frozen=True)
117
- class PromptSegmentSample:
118
- prefix_hashes: tuple[str, ...]
119
- tail_hashes: tuple[str, ...]
120
- segment_count: int
121
- bytes_sampled: int
122
- redactions: int
123
-
124
-
125
- @dataclass
126
- class RecordUsage:
127
- tokens: Counter[str] = field(default_factory=Counter)
128
- cost_usd: float = 0.0
129
- commands: set[str] = field(default_factory=set)
130
- tools: set[str] = field(default_factory=set)
131
-
132
-
133
- @dataclass
134
- class PromptCacheAudit:
135
- sampled_records: int = 0
136
- analyzed_prompt_records: int = 0
137
- capped_records: int = 0
138
- prompt_collection_capped_records: int = 0
139
- total_segments: int = 0
140
- total_bytes_sampled: int = 0
141
- redacted_segments: int = 0
142
- samples: list[PromptSegmentSample] = field(default_factory=list)
143
-
144
- def observe(self, root: Any) -> None:
145
- self.sampled_records += 1
146
- segments, bytes_sampled, redactions, collection_capped = prompt_segments_for_record(root)
147
- if collection_capped:
148
- self.prompt_collection_capped_records += 1
149
- if not segments:
150
- return
151
- if len(self.samples) >= PROMPT_AUDIT_MAX_RECORDS:
152
- self.capped_records += 1
153
- return
154
- self.analyzed_prompt_records += 1
155
- self.total_segments += len(segments)
156
- self.total_bytes_sampled += bytes_sampled
157
- self.redacted_segments += redactions
158
- self.samples.append(PromptSegmentSample(
159
- prefix_hashes=tuple(stable_hash(segment, PROMPT_SEGMENT_HASH_CHARS) for segment in segments[:PROMPT_AUDIT_PREFIX_SEGMENTS]),
160
- tail_hashes=tuple(stable_hash(segment, PROMPT_SEGMENT_HASH_CHARS) for segment in segments[-PROMPT_AUDIT_TAIL_SEGMENTS:]),
161
- segment_count=len(segments),
162
- bytes_sampled=bytes_sampled,
163
- redactions=redactions,
164
- ))
165
-
166
-
167
- @dataclass
168
- class UsageSummary:
169
- files: int = 0
170
- records: int = 0
171
- skipped_files: int = 0
172
- skipped_records: int = 0
173
- parse_errors: list[str] = field(default_factory=list)
174
- tokens: Counter[str] = field(default_factory=Counter)
175
- cost_usd: float = 0.0
176
- by_model: dict[str, Counter[str]] = field(default_factory=lambda: defaultdict(Counter))
177
- by_query_source: dict[str, Counter[str]] = field(default_factory=lambda: defaultdict(Counter))
178
- by_file: Counter[str] = field(default_factory=Counter)
179
- cost_by_file: Counter[str] = field(default_factory=Counter)
180
- by_command: Counter[str] = field(default_factory=Counter)
181
- by_tool: Counter[str] = field(default_factory=Counter)
182
- token_field_presence: Counter[str] = field(default_factory=Counter)
183
- cost_field_count: int = 0
184
- cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
185
- positive_cache_record_timestamps: list[_dt.datetime] = field(default_factory=list)
186
- prompt_cache_audit: PromptCacheAudit = field(default_factory=PromptCacheAudit)
187
- cache_friendliness_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
188
- cache_diagnostics_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
189
- cache_layout_advice_cache: dict[str, Any] | None = field(default=None, init=False, repr=False)
190
-
191
- @property
192
- def total_tokens(self) -> int:
193
- return sum(self.tokens.values())
194
-
195
- @property
196
- def cache_hit_rate(self) -> float:
197
- """cache_read의 입력 측 비중 = cache_read / (input + cache_read + cache_creation).
198
-
199
- cache_creation이 분모에 포함되므로 신규 prefix를 막 만든 세션에서는 비율이 낮게
200
- 나타날 수 있다. 고전적 hit-rate(cache 가능 풀 대비 hit)가 아니라 입력 비용 절감
201
- 지표로 해석해야 한다. denom == 0이면 0.0.
202
- """
203
- cr = self.tokens.get("cache_read", 0)
204
- cc = self.tokens.get("cache_creation", 0)
205
- inp = self.tokens.get("input", 0)
206
- denom = cr + cc + inp
207
- return (cr / denom) if denom > 0 else 0.0
208
-
209
- @property
210
- def cache_amortization(self) -> float:
211
- """cache_read / cache_creation. 토큰 단위로 본 평균 재사용 배수의 근사.
212
-
213
- cache_creation == 0인 경우 의미가 정의되지 않으므로 0.0을 반환한다 (정의되지 않음을
214
- 표현하기 위해 cache_amortization_defined 플래그를 함께 노출한다). 같은 prefix가
215
- 길이 변화 없이 N회 재사용되면 토큰 비도 약 N배가 되지만, prefix 길이가 변하는
216
- 세션에서는 정확히 호출 횟수가 아닌 토큰 비율로 본 근사값임에 주의.
217
- """
218
- cc = self.tokens.get("cache_creation", 0)
219
- cr = self.tokens.get("cache_read", 0)
220
- return (cr / cc) if cc > 0 else 0.0
221
-
222
- @property
223
- def cache_amortization_defined(self) -> bool:
224
- """cache_amortization이 의미를 갖는지 여부. cache_creation > 0일 때만 True."""
225
- return self.tokens.get("cache_creation", 0) > 0
226
-
227
- def note_error(self, message: str) -> None:
228
- if len(self.parse_errors) < MAX_ERROR_EXAMPLES:
229
- self.parse_errors.append(message)
230
-
231
-
232
- def iter_jsonl_files(paths: Iterable[str]) -> Iterable[Path]:
233
- seen: set[Path] = set()
234
- for raw in paths:
235
- path = Path(raw).expanduser()
236
- root = path.resolve()
237
- candidates: Iterable[Path]
238
- if path.is_file() and path.suffix in {".jsonl", ".json"}:
239
- candidates = [path]
240
- elif path.is_dir():
241
- candidates = (
242
- candidate
243
- for pattern in ("*.jsonl", "*.json")
244
- for candidate in path.rglob(pattern)
245
- )
246
- else:
247
- continue
248
- for candidate in candidates:
249
- if candidate.is_symlink():
250
- # The scanner opens candidates with O_NOFOLLOW and will skip
251
- # this path. Do not let a rejected link reserve its target's
252
- # dedupe key and suppress a later real transcript in scope.
253
- yield candidate
254
- continue
255
- resolved = candidate.resolve()
256
- try:
257
- resolved.relative_to(root if path.is_dir() else root.parent)
258
- except ValueError:
259
- continue
260
- if resolved in seen:
261
- continue
262
- seen.add(resolved)
263
- yield candidate
264
-
265
-
266
- def walk(obj: Any) -> Iterable[dict[str, Any]]:
267
- stack = [obj]
268
- while stack:
269
- current = stack.pop()
270
- if isinstance(current, dict):
271
- yield current
272
- stack.extend(current.values())
273
- elif isinstance(current, list):
274
- stack.extend(current)
275
-
276
-
277
- def first_string(obj: dict[str, Any], keys: Iterable[str]) -> str | None:
278
- for key in keys:
279
- val = obj.get(key)
280
- if isinstance(val, str):
281
- return val
282
- if isinstance(val, dict):
283
- nested = val.get("id") or val.get("name")
284
- if isinstance(nested, str):
285
- return nested
286
- return None
287
-
288
-
289
- MAX_METRIC_VALUE = 10**18
290
-
291
-
292
- def finite_nonnegative_number(value: Any, *, clamp_negative: bool) -> int | float | None:
293
- if isinstance(value, bool):
294
- return None
295
- if isinstance(value, int):
296
- if value < 0 and not clamp_negative:
297
- return None
298
- return min(max(value, 0), MAX_METRIC_VALUE)
299
- if isinstance(value, float):
300
- if not math.isfinite(value) or (value < 0 and not clamp_negative):
301
- return None
302
- return min(max(value, 0.0), float(MAX_METRIC_VALUE))
303
- return None
304
-
305
-
306
- def parse_timestamp_value(value: Any) -> _dt.datetime | None:
307
- if isinstance(value, str):
308
- text = value.strip()
309
- if not text:
310
- return None
311
- try:
312
- if text.endswith("Z"):
313
- text = text[:-1] + "+00:00"
314
- parsed = _dt.datetime.fromisoformat(text)
315
- except ValueError:
316
- return None
317
- if parsed.tzinfo is None:
318
- parsed = parsed.replace(tzinfo=_dt.timezone.utc)
319
- return parsed.astimezone(_dt.timezone.utc)
320
- metric = finite_nonnegative_number(value, clamp_negative=False)
321
- if metric is None:
322
- return None
323
- seconds = float(metric) / 1000.0 if float(metric) > 10_000_000_000 else float(metric)
324
- try:
325
- return _dt.datetime.fromtimestamp(seconds, tz=_dt.timezone.utc)
326
- except (OverflowError, OSError, ValueError):
327
- return None
328
-
329
-
330
- def record_timestamp(root: Any) -> _dt.datetime | None:
331
- candidates: list[Any] = []
332
- if isinstance(root, dict):
333
- for key in TIMESTAMP_KEYS:
334
- if key in root:
335
- candidates.append(root.get(key))
336
- message = root.get("message")
337
- if isinstance(message, dict):
338
- for key in TIMESTAMP_KEYS:
339
- if key in message:
340
- candidates.append(message.get(key))
341
- for candidate in candidates:
342
- parsed = parse_timestamp_value(candidate)
343
- if parsed is not None:
344
- return parsed
345
- return None
346
-
347
-
348
- def normalize_token_bucket(raw: str) -> str:
349
- return TOKEN_TYPE_ALIASES.get(raw, raw)
350
-
351
-
352
- def stable_token_counter(tokens: Counter[str]) -> dict[str, int]:
353
- return {bucket: tokens[bucket] for bucket in sorted(KNOWN_TOKEN_BUCKETS) if tokens.get(bucket, 0) != 0}
354
-
355
-
356
- def stable_token_presence(presence: Counter[str]) -> dict[str, int]:
357
- return {bucket: presence[bucket] for bucket in sorted(KNOWN_TOKEN_BUCKETS) if presence.get(bucket, 0) > 0}
358
-
359
-
360
- def add_token_groups(local_tokens: Counter[str], d: dict[str, Any]) -> set[str]:
361
- present: set[str] = set()
362
- for bucket, keys in TOKEN_KEY_GROUPS:
363
- for raw_key in keys:
364
- val = d.get(raw_key)
365
- metric = finite_nonnegative_number(val, clamp_negative=True)
366
- if metric is not None:
367
- local_tokens[bucket] += int(metric)
368
- present.add(bucket)
369
- break
370
- return present
371
-
372
-
373
- def sanitize_label(value: str, limit: int = 120) -> str:
374
- compact = " ".join(value.strip().split())
375
- compact = SECRET_VALUE_RE.sub("[REDACTED]", compact)
376
- if len(compact) > limit:
377
- compact = compact[: limit - 15].rstrip() + " ...[truncated]"
378
- return compact
379
-
380
-
381
- def stable_hash(value: str, length: int = 12) -> str:
382
- return hashlib.sha256(value.encode("utf-8", errors="replace")).hexdigest()[:length]
383
-
384
-
385
- def truncate_utf8(text: str, max_bytes: int) -> tuple[str, bool]:
386
- raw = text.encode("utf-8", errors="replace")
387
- if len(raw) <= max_bytes:
388
- return text, False
389
- return raw[:max_bytes].decode("utf-8", errors="ignore"), True
390
-
391
-
392
- def collect_content_text(value: Any, out: list[str]) -> bool:
393
- """Collect allowlisted text blocks without recursive descent.
394
-
395
- Returns True when collection hit a bounded traversal cap. Deep or very broad
396
- transcript shapes should downgrade cache-friendliness evidence instead of
397
- crashing the whole audit.
398
- """
399
- capped = False
400
- visited = 0
401
- stack: list[tuple[Any, int]] = [(value, 0)]
402
- while stack and len(out) < PROMPT_AUDIT_MAX_TEXT_VALUES:
403
- current, depth = stack.pop()
404
- visited += 1
405
- if visited > PROMPT_AUDIT_MAX_CONTENT_NODES or depth > PROMPT_AUDIT_MAX_DEPTH:
406
- capped = True
407
- break
408
- if isinstance(current, str):
409
- if current.strip():
410
- out.append(current)
411
- continue
412
- if isinstance(current, list):
413
- if depth >= PROMPT_AUDIT_MAX_DEPTH:
414
- capped = True
415
- continue
416
- capped = push_bounded(
417
- stack,
418
- reversed(current),
419
- depth + 1,
420
- visited=visited,
421
- max_nodes=PROMPT_AUDIT_MAX_CONTENT_NODES,
422
- ) or capped
423
- continue
424
- if not isinstance(current, dict):
425
- continue
426
- block_type = current.get("type")
427
- if block_type in TEXT_BLOCK_TYPES and isinstance(current.get("text"), str):
428
- stack.append((current.get("text"), depth + 1))
429
- continue
430
- if depth >= PROMPT_AUDIT_MAX_DEPTH:
431
- capped = True
432
- continue
433
- if "content" in current:
434
- capped = push_bounded(
435
- stack,
436
- (current.get("content"),),
437
- depth + 1,
438
- visited=visited,
439
- max_nodes=PROMPT_AUDIT_MAX_CONTENT_NODES,
440
- ) or capped
441
- if isinstance(current.get("text"), str):
442
- capped = push_bounded(
443
- stack,
444
- (current.get("text"),),
445
- depth + 1,
446
- visited=visited,
447
- max_nodes=PROMPT_AUDIT_MAX_CONTENT_NODES,
448
- ) or capped
449
- if stack or len(out) >= PROMPT_AUDIT_MAX_TEXT_VALUES:
450
- capped = True
451
- return capped
452
-
453
-
454
- def extract_prompt_texts(root: Any) -> tuple[list[str], bool]:
455
- """Best-effort prompt text extraction from allowlisted user/prompt shapes."""
456
- texts: list[str] = []
457
- capped = False
458
- visited = 0
459
- stack: list[tuple[Any, int]] = [(root, 0)]
460
- while stack and len(texts) < PROMPT_AUDIT_MAX_TEXT_VALUES:
461
- current, depth = stack.pop()
462
- visited += 1
463
- if visited > PROMPT_AUDIT_MAX_ROOT_NODES or depth > PROMPT_AUDIT_MAX_DEPTH:
464
- capped = True
465
- break
466
- if isinstance(current, dict):
467
- role = current.get("role")
468
- role_text = str(role).lower() if isinstance(role, str) else ""
469
- if role_text in USER_PROMPT_ROLES:
470
- if "content" in current:
471
- capped = collect_content_text(current.get("content"), texts) or capped
472
- if isinstance(current.get("text"), str):
473
- capped = collect_content_text(current.get("text"), texts) or capped
474
- if isinstance(current.get("prompt"), str):
475
- capped = collect_content_text(current.get("prompt"), texts) or capped
476
- # Role-scoped content was handled above; do not re-walk it and
477
- # risk duplicating text blocks.
478
- continue
479
- prompt = current.get("prompt")
480
- if isinstance(prompt, str) and prompt.strip():
481
- texts.append(prompt)
482
- if depth >= PROMPT_AUDIT_MAX_DEPTH:
483
- capped = True
484
- continue
485
- capped = push_bounded(
486
- stack,
487
- current.values(),
488
- depth + 1,
489
- visited=visited,
490
- max_nodes=PROMPT_AUDIT_MAX_ROOT_NODES,
491
- ) or capped
492
- elif isinstance(current, list):
493
- if depth >= PROMPT_AUDIT_MAX_DEPTH:
494
- capped = True
495
- continue
496
- capped = push_bounded(
497
- stack,
498
- reversed(current),
499
- depth + 1,
500
- visited=visited,
501
- max_nodes=PROMPT_AUDIT_MAX_ROOT_NODES,
502
- ) or capped
503
- if stack or len(texts) >= PROMPT_AUDIT_MAX_TEXT_VALUES:
504
- capped = True
505
- return texts, capped
506
-
507
-
508
- def prompt_segments_for_record(root: Any) -> tuple[list[str], int, int, bool]:
509
- texts, collection_capped = extract_prompt_texts(root)
510
- if not texts:
511
- return [], 0, 0, collection_capped
512
- budget = PROMPT_AUDIT_MAX_TEXT_BYTES
513
- segments: list[str] = []
514
- bytes_sampled = 0
515
- redactions = 0
516
- for text in texts:
517
- if budget <= 0 or len(segments) >= PROMPT_AUDIT_MAX_SEGMENTS_PER_RECORD:
518
- break
519
- clipped, _truncated = truncate_utf8(text, budget)
520
- sanitized, count = SECRET_VALUE_RE.subn("[REDACTED]", clipped)
521
- redactions += count
522
- bytes_sampled += len(sanitized.encode("utf-8", errors="replace"))
523
- budget = max(0, PROMPT_AUDIT_MAX_TEXT_BYTES - bytes_sampled)
524
- for raw_line in sanitized.splitlines():
525
- compact = " ".join(raw_line.strip().split())
526
- if not compact:
527
- continue
528
- segment, _ = truncate_utf8(compact, 512)
529
- segments.append(segment)
530
- if len(segments) >= PROMPT_AUDIT_MAX_SEGMENTS_PER_RECORD:
531
- break
532
- if not segments and sanitized.strip():
533
- segment, _ = truncate_utf8(" ".join(sanitized.strip().split()), 512)
534
- if segment:
535
- segments.append(segment)
536
- return segments, bytes_sampled, redactions, collection_capped
537
-
538
-
539
- def safe_resolve(path: Path) -> Path:
540
- try:
541
- return path.resolve()
542
- except (OSError, RuntimeError):
543
- return path.absolute()
544
-
545
-
546
- def path_component_contains_secret(component: str) -> bool:
547
- return bool(component and component not in {".", ".."} and SECRET_VALUE_RE.search(component))
548
-
549
-
550
- def sanitize_path_component(component: str) -> str:
551
- if not component or component in {".", ".."}:
552
- return component
553
- if not path_component_contains_secret(component):
554
- return component
555
- return REDACTED_PATH_COMPONENT
556
-
557
-
558
- def sanitize_path_text(path: str) -> str:
559
- return "/".join(sanitize_path_component(component) for component in path.replace(os.sep, "/").split("/"))
560
-
561
-
562
- def display_path_hash(path: Path) -> str:
563
- return stable_hash(sanitize_path_text(str(safe_resolve(path))))
564
-
565
-
566
- def path_label(path: Path, show_paths: bool = False) -> str:
567
- if show_paths:
568
- return sanitize_path_text(str(path))
569
- name = sanitize_label(sanitize_path_component(path.name or "transcript"), 80)
570
- return f"{name}#path:{display_path_hash(path)}"
571
-
572
-
573
- def command_label(command: str, show_commands: bool = False) -> str:
574
- sanitized = sanitize_label(command)
575
- if show_commands:
576
- return sanitized
577
- try:
578
- argv = shlex.split(sanitized)
579
- except ValueError:
580
- argv = sanitized.split()
581
- if not argv:
582
- category = "command"
583
- elif len(argv) >= 3 and argv[0] in {"python", "python3"} and argv[1] == "-m":
584
- category = " ".join(argv[:3])
585
- elif len(argv) >= 2 and argv[0] in {"npm", "pnpm", "yarn", "bun"} and argv[1] in {"run", "run-script"}:
586
- category = " ".join(argv[:3]) if len(argv) >= 3 else " ".join(argv[:2])
587
- else:
588
- category = argv[0]
589
- return f"{category}#cmd:{stable_hash(sanitized)}"
590
-
591
-
592
- def bounded_int(value: object, default: int, minimum: int, maximum: int) -> int:
593
- try:
594
- number = int(value)
595
- except (TypeError, ValueError, OverflowError):
596
- return default
597
- return min(max(number, minimum), maximum)
598
-
599
-
600
- def require_scan_limit(parser: argparse.ArgumentParser, option: str, value: int, maximum: int) -> int:
601
- if value < 1 or value > maximum:
602
- parser.error(f"{option} must be between 1 and {maximum}")
603
- return value
604
-
605
-
606
- def os_error_summary(exc: OSError) -> str:
607
- """Return OSError metadata without embedding raw filenames from str(exc)."""
608
- parts = [exc.__class__.__name__]
609
- if exc.errno is not None:
610
- parts.append(f"errno={exc.errno}")
611
- message = sanitize_label(str(exc.strerror or ""), 160)
612
- if message:
613
- parts.append(message)
614
- return ": ".join(parts)
615
-
616
-
617
- @dataclass(frozen=True)
618
- class ScanLimits:
619
- max_file_bytes: int = DEFAULT_MAX_FILE_BYTES
620
- max_line_bytes: int = DEFAULT_MAX_LINE_BYTES
621
-
622
-
623
- def open_regular_no_symlink(file: Path):
624
- """Open a transcript candidate only if it is still a regular non-symlink file."""
625
- before = file.lstat()
626
- if stat.S_ISLNK(before.st_mode):
627
- raise OSError(errno.ELOOP, "transcript file must not be a symlink", str(file))
628
- if not stat.S_ISREG(before.st_mode):
629
- raise OSError(errno.EINVAL, "transcript file must be a regular file", str(file))
630
- flags = os.O_RDONLY
631
- for optional_flag in ("O_CLOEXEC", "O_NOFOLLOW", "O_NONBLOCK"):
632
- flags |= getattr(os, optional_flag, 0)
633
- fd = os.open(file, flags)
634
- try:
635
- opened = os.fstat(fd)
636
- after = file.lstat()
637
- if (
638
- not stat.S_ISREG(opened.st_mode)
639
- or not os.path.samestat(before, opened)
640
- or not os.path.samestat(after, opened)
641
- ):
642
- raise OSError(errno.ELOOP, "transcript file changed while opening", str(file))
643
- return os.fdopen(fd, "rb")
644
- except Exception:
645
- os.close(fd)
646
- raise
647
-
648
-
649
- def iter_bounded_lines(handle: BinaryIO, max_line_bytes: int) -> Iterable[tuple[int, str | None]]:
650
- """Yield decoded lines without retaining an oversized JSONL record in memory.
651
-
652
- `None` means the record exceeded `max_line_bytes` and was skipped after the
653
- iterator consumed bytes up to the next newline. This keeps transcript audit
654
- robust when a corrupted trace contains one huge single-line payload.
655
- """
656
- line_no = 1
657
- buffer = bytearray()
658
- oversized = False
659
- while True:
660
- chunk = handle.read(READ_CHUNK_BYTES)
661
- if not chunk:
662
- if oversized:
663
- yield line_no, None
664
- elif buffer:
665
- yield line_no, buffer.decode("utf-8", errors="replace")
666
- break
667
-
668
- start = 0
669
- while start < len(chunk):
670
- newline = chunk.find(b"\n", start)
671
- end = len(chunk) if newline == -1 else newline + 1
672
- piece = chunk[start:end]
673
-
674
- if not oversized:
675
- if len(buffer) + len(piece) > max_line_bytes:
676
- buffer.clear()
677
- oversized = True
678
- else:
679
- buffer.extend(piece)
680
-
681
- if newline == -1:
682
- break
683
-
684
- if oversized:
685
- yield line_no, None
686
- else:
687
- yield line_no, buffer.decode("utf-8", errors="replace")
688
- buffer.clear()
689
- line_no += 1
690
- oversized = False
691
- start = end
692
-
693
-
694
- def collect_record_hints(root: Any, show_commands: bool = False) -> tuple[set[str], set[str]]:
695
- commands: set[str] = set()
696
- tools: set[str] = set()
697
- for d in walk(root):
698
- for key in COMMAND_KEYS:
699
- value = d.get(key)
700
- if isinstance(value, str) and value.strip():
701
- commands.add(command_label(value, show_commands=show_commands))
702
- for key in TOOL_NAME_KEYS:
703
- value = d.get(key)
704
- if isinstance(value, str) and value.strip():
705
- name = sanitize_label(value, 80)
706
- if name and len(name.split()) <= 4:
707
- tools.add(name)
708
- return commands, tools
709
-
710
-
711
- def add_usage(
712
- summary: UsageSummary,
713
- root: Any,
714
- file: Path | None = None,
715
- show_paths: bool = False,
716
- show_commands: bool = False,
717
- ) -> RecordUsage:
718
- root_model = None
719
- root_query_source = None
720
- parsed_timestamp = None
721
- if isinstance(root, dict):
722
- root_model = first_string(root, MODEL_KEYS)
723
- root_query_source = first_string(root, QUERY_SOURCE_KEYS)
724
- parsed_timestamp = record_timestamp(root)
725
-
726
- record = RecordUsage()
727
- cache_telemetry_present = False
728
- positive_cache_telemetry_present = False
729
- summary.prompt_cache_audit.observe(root)
730
- for d in walk(root):
731
- local_tokens: Counter[str] = Counter()
732
- present_buckets = add_token_groups(local_tokens, d)
733
-
734
- # OpenTelemetry-style records sometimes use {name, value, attributes.type}.
735
- name = d.get("name") or d.get("metric")
736
- if name == "claude_code.token.usage":
737
- value = d.get("value")
738
- if value is None:
739
- value = d.get("sum")
740
- if value is None:
741
- value = d.get("count")
742
- attrs = d.get("attributes") or {}
743
- token_type = attrs.get("type", "unknown") if isinstance(attrs, dict) else "unknown"
744
- metric = finite_nonnegative_number(value, clamp_negative=True)
745
- if metric is not None:
746
- bucket = normalize_token_bucket(str(token_type))
747
- local_tokens[bucket] += int(metric)
748
- present_buckets.add(bucket)
749
-
750
- for bucket in present_buckets:
751
- summary.token_field_presence[bucket] += 1
752
- if "cache_read" in present_buckets or "cache_creation" in present_buckets:
753
- cache_telemetry_present = True
754
- if local_tokens.get("cache_read", 0) > 0 or local_tokens.get("cache_creation", 0) > 0:
755
- positive_cache_telemetry_present = True
756
-
757
- if local_tokens:
758
- summary.tokens.update(local_tokens)
759
- record.tokens.update(local_tokens)
760
- model = sanitize_label(first_string(d, MODEL_KEYS) or root_model or "unknown", 80)
761
- query_source = sanitize_label(first_string(d, QUERY_SOURCE_KEYS) or root_query_source or "unknown", 80)
762
- summary.by_model[model].update(local_tokens)
763
- summary.by_query_source[query_source].update(local_tokens)
764
-
765
- for key in COST_KEYS:
766
- val = d.get(key)
767
- metric = finite_nonnegative_number(val, clamp_negative=False)
768
- if metric is not None:
769
- cost = float(metric)
770
- summary.cost_usd += cost
771
- record.cost_usd += cost
772
- summary.cost_field_count += 1
773
- break
774
- if parsed_timestamp is not None and cache_telemetry_present:
775
- summary.cache_record_timestamps.append(parsed_timestamp)
776
- if parsed_timestamp is not None and positive_cache_telemetry_present:
777
- summary.positive_cache_record_timestamps.append(parsed_timestamp)
778
- commands, tools = collect_record_hints(root, show_commands=show_commands)
779
- record.commands = commands
780
- record.tools = tools
781
- record_total = sum(record.tokens.values())
782
- if file is not None and (record_total or record.cost_usd):
783
- file_key = path_label(file, show_paths=show_paths)
784
- summary.by_file[file_key] += record_total
785
- summary.cost_by_file[file_key] += record.cost_usd
786
- for command in commands:
787
- summary.by_command[command] += 1
788
- for tool in tools:
789
- summary.by_tool[tool] += 1
790
- return record
791
-
792
-
793
- def parse_json_line(line: str) -> Any:
794
- # Python 3.11's json decoder can hit the interpreter recursion limit on
795
- # deeply nested transcript payloads before our iterative walker sees them.
796
- # Raise the process limit enough for realistic hostile fixtures, while still
797
- # treating too-deep input as a skipped parse record instead of crashing.
798
- if sys.getrecursionlimit() < JSON_PARSE_RECURSION_LIMIT:
799
- sys.setrecursionlimit(JSON_PARSE_RECURSION_LIMIT)
800
- return json.loads(line)
801
-
802
-
803
- def scan(
804
- paths: list[str],
805
- show_paths: bool = False,
806
- show_commands: bool = False,
807
- limits: ScanLimits | None = None,
808
- ) -> UsageSummary:
809
- limits = limits or ScanLimits()
810
- summary = UsageSummary()
811
- for file in iter_jsonl_files(paths):
812
- summary.files += 1
813
- try:
814
- with open_regular_no_symlink(file) as handle:
815
- size = os.fstat(handle.fileno()).st_size
816
- if size > limits.max_file_bytes:
817
- summary.skipped_files += 1
818
- summary.note_error(
819
- f"{path_label(file, show_paths=show_paths)}: skipped oversized transcript file "
820
- f"({size} bytes > {limits.max_file_bytes})"
821
- )
822
- continue
823
- for line_no, line in iter_bounded_lines(handle, limits.max_line_bytes):
824
- if line is None:
825
- summary.skipped_records += 1
826
- summary.note_error(
827
- f"{path_label(file, show_paths=show_paths)}:{line_no}: "
828
- f"skipped oversized JSONL record (> {limits.max_line_bytes} bytes)"
829
- )
830
- continue
831
- line = line.strip()
832
- if not line:
833
- continue
834
- try:
835
- obj = parse_json_line(line)
836
- except json.JSONDecodeError as exc:
837
- summary.skipped_records += 1
838
- summary.note_error(f"{path_label(file, show_paths=show_paths)}:{line_no}: JSON parse error: {exc.msg}")
839
- continue
840
- except RecursionError as exc:
841
- summary.skipped_records += 1
842
- summary.note_error(f"{path_label(file, show_paths=show_paths)}:{line_no}: JSON parse error: nested JSON exceeds supported depth")
843
- continue
844
- summary.records += 1
845
- add_usage(summary, obj, file, show_paths=show_paths, show_commands=show_commands)
846
- except OSError as exc:
847
- summary.skipped_files += 1
848
- summary.note_error(f"{path_label(file, show_paths=show_paths)}: read error: {os_error_summary(exc)}")
849
- continue
850
- return summary
851
-
852
-
853
- def print_counter(title: str, counter: Counter[str], top: int) -> None:
854
- print(f"\n{title}")
855
- for key, val in counter.most_common(top):
856
- print(f" {key:24s} {val:12d}")
857
-
858
-
859
- def counter_json(counter: Counter[str], top: int) -> list[dict[str, Any]]:
860
- return [{"name": key, "value": val} for key, val in counter.most_common(top)]
861
-
862
-
863
- def utc_now_iso() -> str:
864
- return _dt.datetime.now(_dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")
865
-
866
-
867
- def availability_status(*, present: bool, skipped: bool = False, partial: bool = False) -> str:
868
- if present and partial:
869
- return "partial"
870
- if present:
871
- return "available"
872
- if skipped:
873
- return "partial"
874
- return "missing"
875
-
876
-
877
- # 측정 증거 3-상태 등급. status(available/partial/missing)와 직교하는 보조 축으로,
878
- # 값이 "어떻게" 알려졌는지를 GUI/소비자에게 노출한다.
879
- EVIDENCE_OBSERVED = "observed"
880
- EVIDENCE_INFERRED = "inferred"
881
- EVIDENCE_UNAVAILABLE = "unavailable"
882
-
883
-
884
- def evidence_class(*, observed: bool, inferable: bool = False) -> str:
885
- """관측/추론/불가 3-상태 증거 등급을 반환한다.
886
-
887
- - observed: transcript 필드에서 직접 읽은 값.
888
- - inferred: 관측값에서 문서화된 공식으로 파생한 값(추정치).
889
- - unavailable: scan 데이터만으로는 판별할 수 없는 값.
890
-
891
- observed가 우선한다. 직접 관측이 없고 inferable한 경우에만 inferred로, 둘 다
892
- 아니면 unavailable로 분류해 보수적 측정 원칙을 지킨다.
893
- """
894
- if observed:
895
- return EVIDENCE_OBSERVED
896
- if inferable:
897
- return EVIDENCE_INFERRED
898
- return EVIDENCE_UNAVAILABLE
899
-
900
-
901
- def build_headroom_availability(summary: UsageSummary) -> dict[str, Any]:
902
- """Context-window headroom 가용성/증거 등급을 보수적으로 분류한다.
903
-
904
- transcript JSON에는 live `context_window`/잔여 토큰 정보가 없으므로 과거 scan
905
- 만으로는 headroom을 관측하거나 추론할 수 없다. 따라서 status는 기존 context와
906
- 동일하게 "missing", evidence는 "unavailable"로 둔다. live statusline snapshot을
907
- 입력으로 받는 미래 surface에서는 observed로 승급될 수 있음을 contract로 남긴다.
908
- """
909
- return {
910
- "status": "missing",
911
- "evidence": EVIDENCE_UNAVAILABLE,
912
- "reason": (
913
- "Transcript scans do not carry live context-window or remaining-token data, "
914
- "so context headroom cannot be observed or conservatively inferred from history alone."
915
- ),
916
- "observable_via": "live_statusline_snapshot",
917
- }
918
-
919
-
920
- def scan_integrity(summary: UsageSummary) -> dict[str, Any]:
921
- skipped = summary.skipped_files + summary.skipped_records
922
- complete = skipped == 0 and not summary.parse_errors
923
- return {
924
- "status": "complete" if complete else "partial",
925
- "files_scanned": summary.files,
926
- "records_scanned": summary.records,
927
- "skipped_files": summary.skipped_files,
928
- "skipped_records": summary.skipped_records,
929
- "parse_error_count": len(summary.parse_errors),
930
- "complete": complete,
931
- "reason": (
932
- "All candidate transcript files/records were parsed within configured limits."
933
- if complete
934
- else "Some transcript files or records were skipped; downstream GUI surfaces should label totals as partial."
935
- ),
936
- }
937
-
938
-
939
- def build_metric_availability(summary: UsageSummary) -> dict[str, Any]:
940
- token_presence = stable_token_presence(summary.token_field_presence)
941
- has_any_token = bool(token_presence)
942
- has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
943
- has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
944
- has_cache_any = has_cache_read or has_cache_creation
945
- cache_partial = has_cache_any and not (has_cache_read and has_cache_creation)
946
- skipped = bool(summary.skipped_files or summary.skipped_records or summary.parse_errors)
947
- has_input = summary.token_field_presence.get("input", 0) > 0
948
- has_output = summary.token_field_presence.get("output", 0) > 0
949
- return {
950
- "tokens": {
951
- "status": availability_status(present=has_any_token, skipped=skipped and not has_any_token, partial=skipped and has_any_token),
952
- "present_fields": token_presence,
953
- "evidence": evidence_class(observed=has_any_token),
954
- },
955
- "input": {
956
- "status": availability_status(present=has_input, partial=skipped and has_input),
957
- "present_count": summary.token_field_presence.get("input", 0),
958
- "evidence": evidence_class(observed=has_input),
959
- },
960
- "output": {
961
- "status": availability_status(present=has_output, partial=skipped and has_output),
962
- "present_count": summary.token_field_presence.get("output", 0),
963
- "evidence": evidence_class(observed=has_output),
964
- },
965
- "cache": {
966
- "status": availability_status(present=has_cache_any, partial=cache_partial or (skipped and has_cache_any)),
967
- "present_fields": {
968
- "cache_read": summary.token_field_presence.get("cache_read", 0),
969
- "cache_creation": summary.token_field_presence.get("cache_creation", 0),
970
- },
971
- "zero_values_observed": {
972
- "cache_read": has_cache_read and summary.tokens.get("cache_read", 0) == 0,
973
- "cache_creation": has_cache_creation and summary.tokens.get("cache_creation", 0) == 0,
974
- },
975
- # 원시 cache 토큰 수는 관측값(observed)이지만, share/reuse 비율은 관측값에서
976
- # 파생한 추정값(inferred)이므로 별도로 분류해 노출한다.
977
- "evidence": evidence_class(observed=has_cache_any),
978
- "derived": {
979
- "cache_read_share": {
980
- "evidence": evidence_class(observed=False, inferable=has_cache_any),
981
- "value": summary.cache_hit_rate if has_cache_any else None,
982
- },
983
- "cache_reuse_ratio": {
984
- "evidence": evidence_class(observed=False, inferable=summary.cache_amortization_defined),
985
- "value": summary.cache_amortization if summary.cache_amortization_defined else None,
986
- },
987
- },
988
- },
989
- "cost": {
990
- "status": availability_status(present=summary.cost_field_count > 0, partial=skipped and summary.cost_field_count > 0),
991
- "present_count": summary.cost_field_count,
992
- "observed_cost_usd": summary.cost_usd,
993
- "evidence": evidence_class(observed=summary.cost_field_count > 0),
994
- },
995
- "context": {
996
- "status": "missing",
997
- "evidence": EVIDENCE_UNAVAILABLE,
998
- "reason": (
999
- "Transcript scans do not include live Claude Code context_window data. "
1000
- "Pass a live statusline snapshot in a future surface to populate context availability."
1001
- ),
1002
- },
1003
- "headroom": build_headroom_availability(summary),
1004
- }
1005
-
1006
-
1007
- def segment_stability(samples: list[PromptSegmentSample], attr: str, window: int) -> tuple[float, int, int]:
1008
- stabilities: list[float] = []
1009
- unique_total = 0
1010
- observed_positions = 0
1011
- for pos in range(window):
1012
- values: list[str] = []
1013
- for sample in samples:
1014
- hashes = getattr(sample, attr)
1015
- if len(hashes) > pos:
1016
- values.append(hashes[pos])
1017
- if not values:
1018
- continue
1019
- counts = Counter(values)
1020
- observed_positions += 1
1021
- unique_total += len(counts)
1022
- stabilities.append(max(counts.values()) / len(values))
1023
- if not stabilities:
1024
- return 0.0, 0, 0
1025
- return sum(stabilities) / len(stabilities), unique_total, observed_positions
1026
-
1027
-
1028
- def segment_position_stats(samples: list[PromptSegmentSample], attr: str, window: int) -> list[dict[str, Any]]:
1029
- stats: list[dict[str, Any]] = []
1030
- for pos in range(window):
1031
- values: list[str] = []
1032
- for sample in samples:
1033
- hashes = getattr(sample, attr)
1034
- if len(hashes) > pos:
1035
- values.append(hashes[pos])
1036
- if not values:
1037
- continue
1038
- counts = Counter(values)
1039
- stability = max(counts.values()) / len(values)
1040
- stats.append({
1041
- "position": pos,
1042
- "stability": stability,
1043
- "volatile_share": 1.0 - stability,
1044
- "unique_hashes": len(counts),
1045
- "sample_count": len(values),
1046
- })
1047
- return stats
1048
-
1049
-
1050
- def prompt_window_overlap_counts(samples: list[PromptSegmentSample]) -> tuple[int, int]:
1051
- """Return (non_overlapping, overlapping) prefix/tail evidence counts.
1052
-
1053
- Prefix and tail segment windows are independent evidence only when the
1054
- sampled prompt has enough segments for the configured windows not to share
1055
- positions. Short prompts are still useful, but prefix-vs-tail deltas from
1056
- overlapping windows are lower-confidence diagnostics.
1057
- """
1058
- non_overlapping = 0
1059
- overlapping = 0
1060
- for sample in samples:
1061
- if sample.segment_count >= PROMPT_AUDIT_PREFIX_SEGMENTS + PROMPT_AUDIT_TAIL_SEGMENTS:
1062
- non_overlapping += 1
1063
- else:
1064
- overlapping += 1
1065
- return non_overlapping, overlapping
1066
-
1067
-
1068
- def build_cache_friendliness(summary: UsageSummary) -> dict[str, Any]:
1069
- audit = summary.prompt_cache_audit
1070
- skipped = bool(
1071
- summary.skipped_files
1072
- or summary.skipped_records
1073
- or summary.parse_errors
1074
- or audit.capped_records
1075
- or audit.prompt_collection_capped_records
1076
- )
1077
- samples = audit.samples
1078
- if not samples:
1079
- return {
1080
- "status": "partial" if skipped else "missing",
1081
- "confidence": "partial" if skipped else "unavailable",
1082
- "evidence": EVIDENCE_UNAVAILABLE,
1083
- "heuristic": True,
1084
- "sampled_records": audit.sampled_records,
1085
- "analyzed_prompt_records": 0,
1086
- "non_overlapping_prompt_records": 0,
1087
- "overlapping_prompt_records": 0,
1088
- "prefix_tail_windows_overlap": False,
1089
- "prompt_collection_capped_records": audit.prompt_collection_capped_records,
1090
- "skipped_evidence": skipped,
1091
- "segment_window": {"prefix_segments": PROMPT_AUDIT_PREFIX_SEGMENTS, "tail_segments": PROMPT_AUDIT_TAIL_SEGMENTS},
1092
- "signals": {
1093
- "stable_prefix_share": None,
1094
- "volatile_prefix_share": None,
1095
- "volatile_tail_share": None,
1096
- "cache_reuse_ratio": summary.cache_amortization if summary.cache_amortization_defined else None,
1097
- "cache_read_share": summary.cache_hit_rate,
1098
- },
1099
- "findings": [],
1100
- "caveats": [
1101
- "No allowlisted user prompt text was found in scanned transcript records; cache layout cannot be inferred.",
1102
- "Deep or broad prompt content structures are bounded and skipped rather than recursively expanded.",
1103
- "Provider cache token fields, when present, remain diagnostic telemetry rather than ContextGuard-caused token reduction.",
1104
- ],
1105
- }
1106
-
1107
- prefix_stability, prefix_unique, prefix_positions = segment_stability(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS)
1108
- tail_stability, tail_unique, tail_positions = segment_stability(samples, "tail_hashes", PROMPT_AUDIT_TAIL_SEGMENTS)
1109
- prefix_position_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS)
1110
- non_overlapping_prompt_records, overlapping_prompt_records = prompt_window_overlap_counts(samples)
1111
- prefix_tail_windows_overlap = overlapping_prompt_records > 0
1112
- volatile_prefix = 1.0 - prefix_stability
1113
- volatile_tail = 1.0 - tail_stability
1114
- most_volatile_prefix = max(prefix_position_stats, key=lambda item: item["volatile_share"], default=None)
1115
- max_prefix_position_volatile = float(most_volatile_prefix["volatile_share"]) if most_volatile_prefix else 0.0
1116
- analyzed = audit.analyzed_prompt_records
1117
- status = "available"
1118
- if skipped or analyzed < PROMPT_AUDIT_MIN_RECORDS or non_overlapping_prompt_records == 0:
1119
- status = "partial"
1120
- confidence = "partial" if status == "partial" or prefix_tail_windows_overlap else "observed"
1121
- average_prefix_churn = (
1122
- volatile_prefix >= PROMPT_PREFIX_VOLATILE_THRESHOLD
1123
- and (volatile_prefix - volatile_tail) >= PROMPT_PREFIX_TAIL_CHURN_DELTA
1124
- )
1125
- early_prefix_churn = (
1126
- max_prefix_position_volatile >= PROMPT_PREFIX_VOLATILE_THRESHOLD
1127
- and (max_prefix_position_volatile - volatile_tail) >= PROMPT_PREFIX_TAIL_CHURN_DELTA
1128
- )
1129
- findings: list[dict[str, Any]] = []
1130
- if analyzed >= PROMPT_AUDIT_MIN_RECORDS and (average_prefix_churn or early_prefix_churn):
1131
- findings.append({
1132
- "id": "volatile-content-near-prefix",
1133
- "severity": "P1",
1134
- "confidence": confidence,
1135
- "title": "Volatile content appears near prompt prefix",
1136
- "reason": (
1137
- "Observed user prompt segment hashes churn much more near the prefix than in the tail window; "
1138
- "provider cache telemetry is used only as corroborating diagnostic context."
1139
- ),
1140
- "action": "Move generated logs, diffs, file evidence, and run-specific context after stable instructions and reusable policy text.",
1141
- "heuristic": True,
1142
- "evidence": {
1143
- "records": analyzed,
1144
- "non_overlapping_prompt_records": non_overlapping_prompt_records,
1145
- "overlapping_prompt_records": overlapping_prompt_records,
1146
- "prefix_tail_windows_overlap": prefix_tail_windows_overlap,
1147
- "confidence": confidence,
1148
- "prefix_positions": prefix_positions,
1149
- "tail_positions": tail_positions,
1150
- "prefix_unique_hashes": prefix_unique,
1151
- "tail_unique_hashes": tail_unique,
1152
- "volatile_prefix_share": round(volatile_prefix, 4),
1153
- "volatile_tail_share": round(volatile_tail, 4),
1154
- "max_prefix_position_volatile_share": round(max_prefix_position_volatile, 4),
1155
- "max_prefix_position": most_volatile_prefix["position"] if most_volatile_prefix else None,
1156
- "trigger": "prefix_window_average" if average_prefix_churn else "early_prefix_position",
1157
- "cache_creation": summary.tokens.get("cache_creation", 0),
1158
- "cache_read": summary.tokens.get("cache_read", 0),
1159
- },
1160
- })
1161
- findings = findings[:PROMPT_AUDIT_MAX_FINDINGS]
1162
- return {
1163
- "status": status,
1164
- "confidence": confidence,
1165
- "evidence": EVIDENCE_OBSERVED,
1166
- "heuristic": True,
1167
- "sampled_records": audit.sampled_records,
1168
- "analyzed_prompt_records": analyzed,
1169
- "non_overlapping_prompt_records": non_overlapping_prompt_records,
1170
- "overlapping_prompt_records": overlapping_prompt_records,
1171
- "prefix_tail_windows_overlap": prefix_tail_windows_overlap,
1172
- "capped_records": audit.capped_records,
1173
- "prompt_collection_capped_records": audit.prompt_collection_capped_records,
1174
- "skipped_evidence": skipped,
1175
- "total_segments": audit.total_segments,
1176
- "total_bytes_sampled": audit.total_bytes_sampled,
1177
- "redacted_segments": audit.redacted_segments,
1178
- "segment_window": {"prefix_segments": PROMPT_AUDIT_PREFIX_SEGMENTS, "tail_segments": PROMPT_AUDIT_TAIL_SEGMENTS},
1179
- "thresholds": {
1180
- "min_records": PROMPT_AUDIT_MIN_RECORDS,
1181
- "prefix_volatile_threshold": PROMPT_PREFIX_VOLATILE_THRESHOLD,
1182
- "prefix_tail_churn_delta": PROMPT_PREFIX_TAIL_CHURN_DELTA,
1183
- },
1184
- "signals": {
1185
- "stable_prefix_share": round(prefix_stability, 4),
1186
- "volatile_prefix_share": round(volatile_prefix, 4),
1187
- "volatile_tail_share": round(volatile_tail, 4),
1188
- "max_prefix_position_volatile_share": round(max_prefix_position_volatile, 4),
1189
- "cache_reuse_ratio": summary.cache_amortization if summary.cache_amortization_defined else None,
1190
- "cache_read_share": summary.cache_hit_rate,
1191
- },
1192
- "findings": findings,
1193
- "caveats": [
1194
- "Prompt layout findings are heuristic and based on bounded redacted user-message segment hashes, not raw prompt text or exact provider cache-prefix state.",
1195
- "When prefix and tail segment windows overlap in short prompts, cache-friendliness findings are partial-confidence diagnostics.",
1196
- "Deep or broad prompt content structures are bounded and make cache-friendliness evidence partial.",
1197
- "Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
1198
- "Unknown transcript prompt schemas are skipped rather than inferred aggressively.",
1199
- ],
1200
- }
1201
-
1202
-
1203
- def cache_friendliness_for_summary(summary: UsageSummary) -> dict[str, Any]:
1204
- if summary.cache_friendliness_cache is None:
1205
- summary.cache_friendliness_cache = build_cache_friendliness(summary)
1206
- return summary.cache_friendliness_cache
1207
-
1208
-
1209
- def _cache_diagnostic_confidence(*, skipped: bool, samples: bool, has_cache: bool) -> str:
1210
- if skipped:
1211
- return "partial"
1212
- if samples or has_cache:
1213
- return "hypothesis"
1214
- return "unavailable"
1215
-
1216
-
1217
- def build_ttl_diagnostics(summary: UsageSummary, *, has_cache_any: bool, skipped: bool) -> dict[str, Any]:
1218
- timestamped_cache_record_count = len(summary.cache_record_timestamps)
1219
- timestamps = sorted(summary.positive_cache_record_timestamps)
1220
- caveats = [
1221
- "Timestamped cache telemetry records do not prove exact provider cache-prefix identity or provider cache TTL state.",
1222
- "5-minute versus 1-hour TTL guidance is a local hypothesis unless corroborated with provider telemetry and repeated stable prefixes.",
1223
- ]
1224
- if len(timestamps) < 2:
1225
- return {
1226
- "status": "unavailable",
1227
- "evidence": EVIDENCE_UNAVAILABLE,
1228
- "confidence": "unavailable" if not skipped else "partial",
1229
- "timestamped_cache_record_count": timestamped_cache_record_count,
1230
- "positive_timestamped_cache_record_count": len(timestamps),
1231
- "timestamped_cache_record_span_seconds": None,
1232
- "candidate": None,
1233
- "reason": (
1234
- "Fewer than two positive timestamped cache telemetry records were observed, so TTL reuse intervals cannot be inferred."
1235
- ),
1236
- "interval_basis": "positive_timestamped_cache_records",
1237
- "caveats": caveats,
1238
- }
1239
- interval = max(0, int((timestamps[-1] - timestamps[0]).total_seconds()))
1240
- candidate = "within-5m" if interval <= 5 * 60 else ("between-5m-and-1h" if interval <= 60 * 60 else "beyond-1h")
1241
- return {
1242
- "status": "hypothesis" if has_cache_any else "unavailable",
1243
- "evidence": EVIDENCE_INFERRED if has_cache_any else EVIDENCE_UNAVAILABLE,
1244
- "confidence": "partial" if skipped else "hypothesis",
1245
- "timestamped_cache_record_count": timestamped_cache_record_count,
1246
- "positive_timestamped_cache_record_count": len(timestamps),
1247
- "timestamped_cache_record_span_seconds": interval,
1248
- "candidate": candidate,
1249
- "reason": (
1250
- "Positive timestamped cache telemetry records bound the local cache-observation span, but exact provider cache TTL reuse remains a hypothesis."
1251
- ),
1252
- "interval_basis": "positive_timestamped_cache_records",
1253
- "caveats": caveats,
1254
- }
1255
-
1256
-
1257
- def build_cache_diagnostics(summary: UsageSummary) -> dict[str, Any]:
1258
- if summary.cache_diagnostics_cache is not None:
1259
- return summary.cache_diagnostics_cache
1260
-
1261
- availability = build_metric_availability(summary)
1262
- cache_availability = availability["cache"]
1263
- cache_friendliness = cache_friendliness_for_summary(summary)
1264
- skipped = bool(
1265
- summary.skipped_files
1266
- or summary.skipped_records
1267
- or summary.parse_errors
1268
- or cache_friendliness.get("skipped_evidence")
1269
- )
1270
- has_cache_read = summary.token_field_presence.get("cache_read", 0) > 0
1271
- has_cache_creation = summary.token_field_presence.get("cache_creation", 0) > 0
1272
- has_cache_any = has_cache_read or has_cache_creation
1273
- cache_read = summary.tokens.get("cache_read", 0)
1274
- cache_creation = summary.tokens.get("cache_creation", 0)
1275
- samples = summary.prompt_cache_audit.samples
1276
- prefix_stats = segment_position_stats(samples, "prefix_hashes", PROMPT_AUDIT_PREFIX_SEGMENTS) if samples else []
1277
- confidence = _cache_diagnostic_confidence(skipped=skipped, samples=bool(samples), has_cache=has_cache_any)
1278
-
1279
- stable_prefix_candidates: list[dict[str, Any]] = []
1280
- for stat_item in sorted(prefix_stats, key=lambda item: (-item["stability"], item["position"]))[:PROMPT_AUDIT_PREFIX_SEGMENTS]:
1281
- if stat_item["stability"] < 0.66:
1282
- continue
1283
- stable_prefix_candidates.append({
1284
- "position": stat_item["position"],
1285
- "stability": round(float(stat_item["stability"]), 4),
1286
- "volatile_share": round(float(stat_item["volatile_share"]), 4),
1287
- "unique_hashes": stat_item["unique_hashes"],
1288
- "sample_count": stat_item["sample_count"],
1289
- "evidence": EVIDENCE_INFERRED,
1290
- "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1291
- "action": "Keep stable instructions, policies, and reusable context before run-specific evidence.",
1292
- })
1293
-
1294
- dynamic_prefix_breakers: list[dict[str, Any]] = []
1295
- breaker_trigger = "prefix_position"
1296
- for finding in cache_friendliness.get("findings", []):
1297
- if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1298
- evidence = finding.get("evidence") if isinstance(finding.get("evidence"), dict) else {}
1299
- breaker_trigger = str(evidence.get("trigger") or breaker_trigger)
1300
- break
1301
- for stat_item in sorted(prefix_stats, key=lambda item: (-item["volatile_share"], item["position"])):
1302
- if stat_item["volatile_share"] < 0.34:
1303
- continue
1304
- dynamic_prefix_breakers.append({
1305
- "position": stat_item["position"],
1306
- "trigger": breaker_trigger,
1307
- "volatile_share": round(float(stat_item["volatile_share"]), 4),
1308
- "stability": round(float(stat_item["stability"]), 4),
1309
- "unique_hashes": stat_item["unique_hashes"],
1310
- "sample_count": stat_item["sample_count"],
1311
- "evidence": EVIDENCE_INFERRED,
1312
- "confidence": "partial" if cache_friendliness.get("confidence") == "partial" else "hypothesis",
1313
- "heuristic": True,
1314
- "action": "Move diffs, logs, timestamps, and command output after stable reusable prompt prefixes.",
1315
- })
1316
- dynamic_prefix_breakers = dynamic_prefix_breakers[:PROMPT_AUDIT_MAX_FINDINGS]
1317
-
1318
- hypotheses: list[dict[str, Any]] = []
1319
- if not has_cache_any:
1320
- hypotheses.append({
1321
- "id": "cache-fields-missing",
1322
- "evidence": EVIDENCE_UNAVAILABLE,
1323
- "confidence": "unavailable" if not skipped else "partial",
1324
- "reason": "No cache_read/cache_creation transcript fields were observed.",
1325
- "action": "Hide cache-read UI or label cache telemetry as missing for this scan.",
1326
- })
1327
- if has_cache_creation and cache_creation > 0 and (not has_cache_read or cache_read == 0):
1328
- hypotheses.append({
1329
- "id": "cache-cold-or-prefix-changed",
1330
- "evidence": EVIDENCE_INFERRED,
1331
- "confidence": "hypothesis",
1332
- "reason": "Cache creation tokens were observed without corresponding cache read tokens.",
1333
- "action": "Check whether stable instructions changed or whether the session was cache-cold.",
1334
- })
1335
- if has_cache_creation and cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1336
- hypotheses.append({
1337
- "id": "cache-read-low-vs-write",
1338
- "evidence": EVIDENCE_INFERRED,
1339
- "confidence": "hypothesis",
1340
- "reason": "Cache reads are small relative to observed cache writes.",
1341
- "action": "Keep reusable prompt prefixes stable across turns before changing large context blocks.",
1342
- })
1343
- if dynamic_prefix_breakers:
1344
- hypotheses.append({
1345
- "id": "volatile-prefix-breakers",
1346
- "evidence": EVIDENCE_INFERRED,
1347
- "confidence": dynamic_prefix_breakers[0]["confidence"],
1348
- "reason": "Redacted prompt segment hashes show volatile content near the prefix window.",
1349
- "action": dynamic_prefix_breakers[0]["action"],
1350
- })
1351
- if skipped:
1352
- hypotheses.append({
1353
- "id": "partial-transcript-scan",
1354
- "evidence": EVIDENCE_INFERRED,
1355
- "confidence": "partial",
1356
- "reason": "Some transcript files, records, or prompt structures were skipped/capped.",
1357
- "action": "Rerun against narrower transcript paths or higher safe scan limits before making decisions.",
1358
- })
1359
-
1360
- ttl = build_ttl_diagnostics(summary, has_cache_any=has_cache_any, skipped=skipped)
1361
- headroom = build_headroom_availability(summary)
1362
- headroom_diagnostics = {
1363
- **headroom,
1364
- "historical_total_tokens_are_not_headroom": True,
1365
- "required_observation": "live_statusline_snapshot",
1366
- }
1367
- status = "missing"
1368
- if has_cache_any or samples:
1369
- status = "partial" if skipped or cache_friendliness.get("status") == "partial" else "available"
1370
- elif skipped:
1371
- status = "partial"
1372
-
1373
- diagnostics = {
1374
- "schema_version": CACHE_DIAGNOSTICS_SCHEMA_VERSION,
1375
- "status": status,
1376
- "confidence": confidence,
1377
- "evidence": EVIDENCE_INFERRED if (has_cache_any or samples) else EVIDENCE_UNAVAILABLE,
1378
- "heuristic": True,
1379
- "observations": {
1380
- "cache_fields": cache_availability,
1381
- "cache_read_tokens": cache_read,
1382
- "cache_creation_tokens": cache_creation,
1383
- },
1384
- "derived_ratios": cache_availability["derived"],
1385
- "stable_prefix_candidates": stable_prefix_candidates,
1386
- "dynamic_prefix_breakers": dynamic_prefix_breakers,
1387
- "cache_miss_hypotheses": hypotheses[:PROMPT_AUDIT_MAX_FINDINGS],
1388
- "ttl_diagnostics": ttl,
1389
- "headroom_diagnostics": headroom_diagnostics,
1390
- "caveats": [
1391
- "Cache diagnostics are local transcript heuristics and do not prove exact provider cache-prefix state.",
1392
- "Provider cache read/write fields are diagnostic telemetry and do not prove ContextGuard-caused token reduction.",
1393
- "Stable-prefix and breaker positions come from bounded redacted segment hashes, not raw prompt text.",
1394
- ],
1395
- }
1396
- summary.cache_diagnostics_cache = diagnostics
1397
- return diagnostics
1398
-
1399
-
1400
- def cache_diagnostics_for_summary(summary: UsageSummary) -> dict[str, Any]:
1401
- return build_cache_diagnostics(summary)
1402
-
1403
-
1404
- def _dominant_transcript(summary: UsageSummary) -> dict[str, Any] | None:
1405
- if summary.total_tokens <= 0 or not summary.by_file:
1406
- return None
1407
- _label, tokens = summary.by_file.most_common(1)[0]
1408
- share = tokens / summary.total_tokens if summary.total_tokens else 0.0
1409
- return {
1410
- "tokens": tokens,
1411
- "share": round(share, 4),
1412
- "dominates": share >= 0.20 and tokens >= 1_000,
1413
- }
1414
-
1415
-
1416
- def _first_dynamic_breaker(cache_diagnostics: dict[str, Any]) -> dict[str, Any] | None:
1417
- breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
1418
- if not breakers:
1419
- return None
1420
- first = breakers[0]
1421
- return first if isinstance(first, dict) else None
1422
-
1423
-
1424
- def build_cache_layout_advice(summary: UsageSummary) -> dict[str, Any]:
1425
- if summary.cache_layout_advice_cache is not None:
1426
- return summary.cache_layout_advice_cache
1427
-
1428
- cache_friendliness = cache_friendliness_for_summary(summary)
1429
- cache_diagnostics = cache_diagnostics_for_summary(summary)
1430
- signals = cache_friendliness.get("signals") if isinstance(cache_friendliness.get("signals"), dict) else {}
1431
- dynamic_breaker = _first_dynamic_breaker(cache_diagnostics)
1432
- dominant = _dominant_transcript(summary)
1433
- cache_creation = summary.tokens.get("cache_creation", 0)
1434
- cache_read = summary.tokens.get("cache_read", 0)
1435
- cache_fields = cache_diagnostics.get("observations", {}).get("cache_fields", {}) if isinstance(cache_diagnostics.get("observations"), dict) else {}
1436
- cache_status = cache_fields.get("status") if isinstance(cache_fields, dict) else None
1437
- stable_prefix_share = signals.get("stable_prefix_share")
1438
- volatile_prefix_share = signals.get("volatile_prefix_share")
1439
- volatile_tail_share = signals.get("volatile_tail_share")
1440
- max_prefix_position = dynamic_breaker.get("position") if dynamic_breaker else None
1441
- max_prefix_position_volatile_share = dynamic_breaker.get("volatile_share") if dynamic_breaker else signals.get("max_prefix_position_volatile_share")
1442
-
1443
- status = "missing"
1444
- confidence = "unavailable"
1445
- observed_issue = "unknown"
1446
- priority = "P2"
1447
- hypothesized_causes: list[dict[str, Any]] = []
1448
- corroborated_causes: list[dict[str, Any]] = []
1449
- next_checks: list[dict[str, Any]] = []
1450
- recommended_experiments: list[dict[str, Any]] = []
1451
-
1452
- has_cache_any = bool(
1453
- summary.token_field_presence.get("cache_read", 0)
1454
- or summary.token_field_presence.get("cache_creation", 0)
1455
- )
1456
- has_prompt_samples = bool(summary.prompt_cache_audit.samples)
1457
- if has_cache_any or has_prompt_samples:
1458
- status = "partial" if (
1459
- not has_prompt_samples
1460
- or cache_friendliness.get("status") == "partial"
1461
- or cache_diagnostics.get("status") == "partial"
1462
- or summary.skipped_files
1463
- or summary.skipped_records
1464
- or summary.parse_errors
1465
- ) else "available"
1466
- confidence = "partial" if status == "partial" else "hypothesis"
1467
-
1468
- volatile_prefix_breaker = bool(
1469
- dynamic_breaker
1470
- and cache_creation > 0
1471
- and (max_prefix_position in {0, 1} or (max_prefix_position_volatile_share or 0) >= PROMPT_PREFIX_VOLATILE_THRESHOLD)
1472
- )
1473
- long_session_dominates = bool(dominant and dominant.get("dominates"))
1474
-
1475
- if volatile_prefix_breaker:
1476
- observed_issue = "volatile_prefix_breaker"
1477
- priority = "P0" if cache_creation >= 50_000 and max_prefix_position in {0, 1} else "P1"
1478
- hypothesized_causes.append({
1479
- "id": "prefix-position-churn",
1480
- "confidence": confidence,
1481
- "evidence": EVIDENCE_INFERRED,
1482
- "reason": (
1483
- "A highly volatile redacted prompt segment appears in the early prefix window; "
1484
- "this identifies a layout issue, not a confirmed source."
1485
- ),
1486
- "next_check": "Check whether startup context, generated evidence, or tool/MCP catalog changes are moving before stable policy.",
1487
- })
1488
- if cache_diagnostics.get("stable_prefix_candidates"):
1489
- hypothesized_causes.append({
1490
- "id": "evidence-before-policy",
1491
- "confidence": confidence,
1492
- "evidence": EVIDENCE_INFERRED,
1493
- "reason": (
1494
- "Stable reusable segments appear elsewhere while the early prefix churns; "
1495
- "check whether logs, diffs, timestamps, or file evidence precede stable instructions."
1496
- ),
1497
- "next_check": "Keep stable policy/instructions first and move generated run evidence later.",
1498
- })
1499
- next_checks.append({
1500
- "id": "inspect-startup-context-size",
1501
- "confidence": "hypothesis",
1502
- "command_templates": [
1503
- "context-guard-diet scan <repo>",
1504
- "context-guard-diet structural-waste <repo>",
1505
- ],
1506
- "evidence_required_for_corroboration": (
1507
- "Large or duplicate CLAUDE.md/AGENTS.md/GEMINI.md findings from diet output."
1508
- ),
1509
- })
1510
- elif long_session_dominates:
1511
- observed_issue = "long_session_accumulation"
1512
- priority = "P1"
1513
- elif cache_creation >= 10_000 and cache_read > 0 and summary.cache_amortization < 0.5:
1514
- observed_issue = "low_cache_reuse"
1515
- priority = "P1"
1516
- elif cache_status == "missing" or not has_cache_any:
1517
- observed_issue = "missing_cache_fields"
1518
- priority = "P2"
1519
-
1520
- if long_session_dominates:
1521
- recommended_experiments.append({
1522
- "id": "split-long-sessions",
1523
- "order": len(recommended_experiments) + 1,
1524
- "priority": "P1",
1525
- "effort": "low",
1526
- "action": "Use /clear between unrelated tasks and /compact focus on changed files, failing tests, and remaining TODO during long work.",
1527
- "expected_signal": "Cache creation per comparable task decreases and one transcript no longer dominates observed tokens.",
1528
- "verification": "Re-run context-guard-audit on a comparable window and compare cache_creation, cache_amortization, and top transcript share.",
1529
- "evidence": dominant or {},
1530
- })
1531
- if volatile_prefix_breaker:
1532
- recommended_experiments.append({
1533
- "id": "stabilize-cache-prefix",
1534
- "order": len(recommended_experiments) + 1,
1535
- "priority": priority,
1536
- "effort": "medium",
1537
- "action": "Keep stable reusable instructions/policy before volatile logs, diffs, timestamps, and generated file evidence.",
1538
- "expected_signal": "Stable prefix share rises and volatile prefix share falls on matched audit windows.",
1539
- "verification": "Re-run context-guard-audit --json --recommend and compare cache_layout_advice plus cache_friendliness signals.",
1540
- "evidence": {
1541
- "dynamic_prefix_breaker_position": max_prefix_position,
1542
- "dynamic_prefix_breaker_volatile_share": max_prefix_position_volatile_share,
1543
- },
1544
- })
1545
- recommended_experiments.append({
1546
- "id": "run-context-diet-checks",
1547
- "order": len(recommended_experiments) + 1,
1548
- "priority": "P1",
1549
- "effort": "low",
1550
- "action": "Run the generated diet command templates and treat any large/duplicate context-file findings as corroborating evidence before editing instructions.",
1551
- "expected_signal": "Diet output identifies or rules out oversized/duplicated startup context as a contributor.",
1552
- "verification": "Record diet JSON separately; do not convert prefix-position evidence alone into a confirmed startup-context cause.",
1553
- "command_templates": [
1554
- "context-guard-diet scan <repo> --json > diet.json",
1555
- "context-guard-diet structural-waste <repo> --json > structural-waste.json",
1556
- ],
1557
- })
1558
- if cache_creation >= 50_000 and summary.cache_amortization_defined and 1.0 <= summary.cache_amortization < 5.0:
1559
- recommended_experiments.append({
1560
- "id": "defer-longer-ttl-until-prefix-stable" if volatile_prefix_breaker else "evaluate-longer-ttl-after-stability-check",
1561
- "order": len(recommended_experiments) + 1,
1562
- "priority": "P2",
1563
- "effort": "medium",
1564
- "action": "Treat longer TTL as secondary; first corroborate stable prefix reuse and current provider TTL/pricing behavior.",
1565
- "expected_signal": "TTL evaluation happens only after prefix volatility is reduced or ruled out.",
1566
- "verification": "Use timestamped cache telemetry and provider-measured billing/cost evidence; historical token totals alone are insufficient.",
1567
- })
1568
- if not recommended_experiments and status == "partial":
1569
- next_checks.append({
1570
- "id": "rerun-narrower-audit",
1571
- "confidence": "partial",
1572
- "command_templates": ["context-guard-audit <transcript-or-project-dir> --json --recommend"],
1573
- "evidence_required_for_corroboration": "Enough uncapped prompt/cache records to classify prefix layout.",
1574
- })
1575
- if not recommended_experiments and observed_issue == "missing_cache_fields":
1576
- next_checks.append({
1577
- "id": "collect-cache-telemetry",
1578
- "confidence": "unavailable",
1579
- "command_templates": ["context-guard-audit ~/.claude/projects --json --recommend"],
1580
- "evidence_required_for_corroboration": "Transcript records with cache_read/cache_creation fields.",
1581
- })
1582
-
1583
- advice = {
1584
- "schema_version": CACHE_LAYOUT_ADVICE_SCHEMA_VERSION,
1585
- "status": status,
1586
- "confidence": confidence,
1587
- "heuristic": True,
1588
- "observed_issue": observed_issue,
1589
- "priority": priority,
1590
- "observed_summary": {
1591
- "cache_creation_tokens": cache_creation,
1592
- "cache_read_tokens": cache_read,
1593
- "cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
1594
- "stable_prefix_share": stable_prefix_share,
1595
- "volatile_prefix_share": volatile_prefix_share,
1596
- "volatile_tail_share": volatile_tail_share,
1597
- "max_prefix_position": max_prefix_position,
1598
- "max_prefix_position_volatile_share": max_prefix_position_volatile_share,
1599
- "dominant_transcript_share": dominant.get("share") if dominant else None,
1600
- },
1601
- "hypothesized_causes": hypothesized_causes,
1602
- "corroborated_causes": corroborated_causes,
1603
- "next_checks": next_checks,
1604
- "recommended_experiments": recommended_experiments,
1605
- "caveats": [
1606
- "Cache layout advice is a local transcript heuristic, not billing authority or provider-cache proof.",
1607
- "Observed issues come from cache fields and redacted segment statistics; causes remain hypotheses until corroborated by diet/structural evidence.",
1608
- "Generated command templates use placeholders and must not be treated as observed user commands or paths.",
1609
- "Use matched before/after audits before making token or cost savings claims.",
1610
- ],
1611
- }
1612
- summary.cache_layout_advice_cache = advice
1613
- return advice
1614
-
1615
-
1616
- def cache_layout_advice_for_summary(summary: UsageSummary) -> dict[str, Any]:
1617
- return build_cache_layout_advice(summary)
1618
-
1619
-
1620
- def build_metric_caveats(summary: UsageSummary) -> list[str]:
1621
- caveats = [
1622
- "Values are observed from local Claude Code transcript JSON/JSONL fields and are not official billing records.",
1623
- "Claude Code transcript schemas may change; skipped files/records and parse errors reduce confidence.",
1624
- "cache-read share is cache_read / (input + cache_read + cache_creation), not a provider billing hit-rate.",
1625
- "reuse ratio is cache_read / cache_creation when cache_creation is non-zero; it is undefined for cache-cold sessions.",
1626
- "each metric carries an evidence class: observed (read from transcript fields), inferred "
1627
- "(derived via a documented formula), or unavailable (not determinable from a historical scan).",
1628
- "context headroom is unavailable from transcript scans; it requires a live statusline snapshot to be observed.",
1629
- ]
1630
- if summary.cost_field_count == 0:
1631
- caveats.append("No cost fields were observed; use Claude Console or official billing exports for invoice-grade cost.")
1632
- if not (summary.token_field_presence.get("cache_read") or summary.token_field_presence.get("cache_creation")):
1633
- caveats.append("No cache fields were observed; hide cache UI or label cache availability as missing.")
1634
- if summary.skipped_files or summary.skipped_records:
1635
- caveats.append("Some transcript files or records were skipped, so hotspot rankings may be incomplete.")
1636
- return caveats
1637
-
1638
-
1639
- def _mac_card(
1640
- card_id: str,
1641
- title: str,
1642
- status: str,
1643
- binding_paths: list[str],
1644
- *,
1645
- required_observation: str | None = None,
1646
- ) -> dict[str, Any]:
1647
- card: dict[str, Any] = {
1648
- "id": card_id,
1649
- "title": title,
1650
- "status": status,
1651
- "binding_paths": binding_paths,
1652
- }
1653
- if required_observation:
1654
- card["required_observation"] = required_observation
1655
- return card
1656
-
1657
-
1658
- def build_mac_visibility_contract(
1659
- *,
1660
- availability: dict[str, Any],
1661
- integrity: dict[str, Any],
1662
- cache_layout_advice: dict[str, Any],
1663
- ) -> dict[str, Any]:
1664
- """Build the pre-GUI macOS visibility binding contract.
1665
-
1666
- This is intentionally a thin index over already-emitted stable feasibility
1667
- fields. It does not recompute metrics, read diagnostic summary data, or infer
1668
- live context/headroom from historical transcript totals.
1669
- """
1670
- token_status = str((availability.get("tokens") or {}).get("status", "missing"))
1671
- scan_status = str(integrity.get("status", "partial"))
1672
- if token_status == "available" and scan_status == "complete":
1673
- readiness_status = "ready"
1674
- readiness_reason = "Transcript token totals are available and the scan completed within configured limits."
1675
- elif token_status in {"available", "partial"}:
1676
- readiness_status = "partial"
1677
- readiness_reason = "Some stable fields can be shown, but scan integrity or metric availability is partial."
1678
- else:
1679
- readiness_status = "missing"
1680
- readiness_reason = "Token totals are missing from the transcript scan; show setup or unavailable state."
1681
-
1682
- context_status = str((availability.get("context") or {}).get("status", "missing"))
1683
- headroom_status = str((availability.get("headroom") or {}).get("status", "missing"))
1684
- cache_status = str((availability.get("cache") or {}).get("status", "missing"))
1685
- cost_status = str((availability.get("cost") or {}).get("status", "missing"))
1686
- advice_status = str(cache_layout_advice.get("status", "missing"))
1687
-
1688
- missing_live_observations: list[dict[str, Any]] = []
1689
- if context_status == "missing":
1690
- missing_live_observations.append({
1691
- "id": "live_context_window",
1692
- "required_observation": "live_statusline_snapshot",
1693
- "affects": ["context_availability", "metric_availability.context"],
1694
- "reason": "Historical transcript scans do not include live Claude Code context_window data.",
1695
- })
1696
- if headroom_status == "missing":
1697
- missing_live_observations.append({
1698
- "id": "live_headroom",
1699
- "required_observation": "live_statusline_snapshot",
1700
- "affects": ["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
1701
- "reason": "Historical transcript totals are not remaining-token or live headroom observations.",
1702
- })
1703
-
1704
- return {
1705
- "schema_version": MAC_VISIBILITY_SCHEMA_VERSION,
1706
- "surface_kind": "local_macos_visibility_contract",
1707
- "readiness": {
1708
- "status": readiness_status,
1709
- "reason": readiness_reason,
1710
- },
1711
- "bind_to_top_level_fields": [
1712
- "source_kind",
1713
- "source_freshness",
1714
- "scan_integrity",
1715
- "metric_availability",
1716
- "metric_caveats",
1717
- "redaction_mode",
1718
- "context_availability",
1719
- "headroom_availability",
1720
- "cache_friendliness",
1721
- "cache_diagnostics",
1722
- "cache_layout_advice",
1723
- "totals",
1724
- ],
1725
- "diagnostic_only_fields": ["summary"],
1726
- "primary_cards": [
1727
- _mac_card(
1728
- "source_freshness",
1729
- "Source freshness",
1730
- "available",
1731
- ["source_kind", "source_freshness.status", "source_freshness.generated_at"],
1732
- ),
1733
- _mac_card(
1734
- "scan_integrity",
1735
- "Scan integrity",
1736
- scan_status,
1737
- [
1738
- "scan_integrity.status",
1739
- "scan_integrity.files_scanned",
1740
- "scan_integrity.records_scanned",
1741
- "scan_integrity.skipped_files",
1742
- "scan_integrity.skipped_records",
1743
- ],
1744
- ),
1745
- _mac_card(
1746
- "token_totals",
1747
- "Token totals",
1748
- token_status,
1749
- [
1750
- "totals.total_tokens",
1751
- "totals.tokens.input",
1752
- "totals.tokens.output",
1753
- "totals.tokens.cache_read",
1754
- "totals.tokens.cache_creation",
1755
- ],
1756
- ),
1757
- _mac_card(
1758
- "cache_reuse",
1759
- "Cache-read share and reuse ratio",
1760
- cache_status,
1761
- ["totals.cache_read_share", "totals.cache_reuse_ratio", "metric_availability.cache"],
1762
- ),
1763
- _mac_card(
1764
- "observed_cost",
1765
- "Observed transcript cost",
1766
- cost_status,
1767
- ["totals.cost_usd_observed", "metric_availability.cost"],
1768
- ),
1769
- _mac_card(
1770
- "context_availability",
1771
- "Context availability",
1772
- context_status,
1773
- ["context_availability", "metric_availability.context"],
1774
- required_observation="live_statusline_snapshot" if context_status == "missing" else None,
1775
- ),
1776
- _mac_card(
1777
- "headroom_availability",
1778
- "Headroom availability",
1779
- headroom_status,
1780
- ["headroom_availability", "cache_diagnostics.headroom_diagnostics"],
1781
- required_observation="live_statusline_snapshot" if headroom_status == "missing" else None,
1782
- ),
1783
- _mac_card(
1784
- "cache_layout_advice",
1785
- "Cache layout advice",
1786
- advice_status,
1787
- ["cache_layout_advice", "cache_friendliness", "cache_diagnostics.dynamic_prefix_breakers"],
1788
- ),
1789
- ],
1790
- "missing_live_observations": missing_live_observations,
1791
- "claim_boundaries": [
1792
- "Local transcript observations are not invoice-grade billing records.",
1793
- "Provider cache fields are telemetry, not ContextGuard-caused token reduction and do not prove provider cache hits.",
1794
- "Historical transcript totals do not infer live context headroom or remaining tokens.",
1795
- "This contract does not guarantee token or cost savings.",
1796
- ],
1797
- "redaction_required": True,
1798
- }
1799
-
1800
-
1801
- def feasibility_json(
1802
- summary: UsageSummary,
1803
- top: int = 15,
1804
- include_recommendations: bool = False,
1805
- limits: ScanLimits | None = None,
1806
- *,
1807
- generated_at: str | None = None,
1808
- ) -> dict[str, Any]:
1809
- generated_at = generated_at or utc_now_iso()
1810
- base = summary_json(summary, top, include_recommendations=include_recommendations, limits=limits)
1811
- availability = build_metric_availability(summary)
1812
- integrity = scan_integrity(summary)
1813
- stable_tokens = stable_token_counter(summary.tokens)
1814
- stable_total_tokens = sum(stable_tokens.values())
1815
- cache_friendliness = cache_friendliness_for_summary(summary)
1816
- cache_diagnostics = cache_diagnostics_for_summary(summary)
1817
- cache_layout_advice = cache_layout_advice_for_summary(summary)
1818
- mac_visibility = build_mac_visibility_contract(
1819
- availability=availability,
1820
- integrity=integrity,
1821
- cache_layout_advice=cache_layout_advice,
1822
- )
1823
- return {
1824
- "schema_version": FEASIBILITY_SCHEMA_VERSION,
1825
- "producer": FEASIBILITY_PRODUCER,
1826
- "generated_at": generated_at,
1827
- "consumer_contract": {
1828
- "stable_top_level_fields": [
1829
- "schema_version",
1830
- "producer",
1831
- "generated_at",
1832
- "source_kind",
1833
- "source_freshness",
1834
- "scan_integrity",
1835
- "metric_availability",
1836
- "metric_caveats",
1837
- "redaction_mode",
1838
- "context_availability",
1839
- "headroom_availability",
1840
- "cache_friendliness",
1841
- "cache_diagnostics",
1842
- "cache_layout_advice",
1843
- "mac_visibility",
1844
- "totals",
1845
- ],
1846
- "diagnostic_fields": ["summary"],
1847
- "summary_contract": (
1848
- "summary is the legacy audit JSON payload for diagnostics and backward compatibility; "
1849
- "new GUI prototypes should bind to stable top-level feasibility fields first."
1850
- ),
1851
- },
1852
- "source_kind": "historical_transcript_scan",
1853
- "source_freshness": {
1854
- "status": "snapshot_at_scan_time",
1855
- "live": False,
1856
- "generated_at": generated_at,
1857
- "description": "Local transcript files were scanned when this report was generated; this is not a live statusline snapshot.",
1858
- },
1859
- "scan_integrity": integrity,
1860
- "metric_availability": availability,
1861
- "metric_caveats": build_metric_caveats(summary),
1862
- "redaction_mode": {
1863
- "paths": "basename_plus_stable_hash_by_default",
1864
- "commands": "command_category_plus_stable_hash_by_default",
1865
- "secret_like_values": "pattern_redacted",
1866
- "raw_path_and_command_flags": ["--show-paths", "--show-commands"],
1867
- },
1868
- "context_availability": availability["context"],
1869
- "headroom_availability": availability["headroom"],
1870
- "cache_friendliness": cache_friendliness,
1871
- "cache_diagnostics": cache_diagnostics,
1872
- "cache_layout_advice": cache_layout_advice,
1873
- "mac_visibility": mac_visibility,
1874
- "totals": {
1875
- "total_tokens": stable_total_tokens,
1876
- "tokens": stable_tokens,
1877
- "cost_usd_observed": summary.cost_usd,
1878
- "cache_read_share": summary.cache_hit_rate,
1879
- "cache_reuse_ratio": summary.cache_amortization if summary.cache_amortization_defined else None,
1880
- },
1881
- "summary": base,
1882
- }
1883
-
1884
-
1885
- def recommendation(
1886
- ident: str,
1887
- title: str,
1888
- reason: str,
1889
- action: str,
1890
- priority: str,
1891
- evidence: dict[str, Any],
1892
- ) -> dict[str, Any]:
1893
- return {
1894
- "id": ident,
1895
- "priority": priority,
1896
- "title": title,
1897
- "reason": reason,
1898
- "action": action,
1899
- "evidence": evidence,
1900
- }
1901
-
1902
-
1903
- def build_recommendations(summary: UsageSummary, top: int) -> list[dict[str, Any]]:
1904
- recs: list[dict[str, Any]] = []
1905
- total = max(0, summary.total_tokens)
1906
- if total == 0:
1907
- recs.append(recommendation(
1908
- "no-usage-found",
1909
- "No token usage found in scanned transcripts",
1910
- "The scanner did not find recognizable Claude Code usage fields.",
1911
- "Verify the transcript path or run again against ~/.claude/projects after more Claude Code activity.",
1912
- "P2",
1913
- {"files_scanned": summary.files, "records": summary.records},
1914
- ))
1915
- return recs
1916
-
1917
- output_tokens = summary.tokens.get("output", 0)
1918
- input_tokens = summary.tokens.get("input", 0)
1919
- cache_creation = summary.tokens.get("cache_creation", 0)
1920
- cache_read = summary.tokens.get("cache_read", 0)
1921
- output_ratio = output_tokens / total
1922
- input_ratio = input_tokens / total
1923
- cache_friendliness = cache_friendliness_for_summary(summary)
1924
- cache_diagnostics = cache_diagnostics_for_summary(summary)
1925
- cache_layout_advice = cache_layout_advice_for_summary(summary)
1926
- if cache_layout_advice.get("observed_issue") == "volatile_prefix_breaker":
1927
- evidence = {
1928
- "observed_issue": cache_layout_advice.get("observed_issue"),
1929
- "priority": cache_layout_advice.get("priority"),
1930
- "confidence": cache_layout_advice.get("confidence"),
1931
- "cache_creation_tokens": cache_creation,
1932
- "cache_read_tokens": cache_read,
1933
- }
1934
- observed_summary = cache_layout_advice.get("observed_summary")
1935
- if isinstance(observed_summary, dict):
1936
- for key in ("max_prefix_position", "max_prefix_position_volatile_share", "stable_prefix_share", "volatile_prefix_share"):
1937
- evidence[key] = observed_summary.get(key)
1938
- rec = recommendation(
1939
- "prioritize-cache-prefix-stabilization",
1940
- "Prioritize cache-prefix stabilization before TTL or output trimming",
1941
- (
1942
- "Cache creation remains material and redacted segment statistics show a volatile early prefix; "
1943
- "this is an experiment-prioritization signal, not a confirmed root cause."
1944
- ),
1945
- (
1946
- "If one transcript dominates, split unrelated work into shorter sessions; then check startup/context "
1947
- "size and keep stable policy before volatile logs, diffs, timestamps, and generated evidence."
1948
- ),
1949
- str(cache_layout_advice.get("priority") or "P1"),
1950
- evidence,
1951
- )
1952
- rec["heuristic"] = True
1953
- rec["confidence"] = cache_layout_advice.get("confidence")
1954
- recs.append(rec)
1955
- for finding in cache_friendliness.get("findings", []):
1956
- if isinstance(finding, dict) and finding.get("id") == "volatile-content-near-prefix":
1957
- evidence = dict(finding.get("evidence") or {})
1958
- evidence["heuristic"] = True
1959
- if finding.get("confidence"):
1960
- evidence["confidence"] = finding.get("confidence")
1961
- rec = recommendation(
1962
- "move-volatile-context-after-stable-prefix",
1963
- "Volatile context appears before stable prompt prefix",
1964
- str(finding.get("reason") or "Observed prompt prefix churn is higher than tail churn."),
1965
- str(finding.get("action") or "Move run-specific context after stable instructions."),
1966
- str(finding.get("severity") or "P1"),
1967
- evidence,
1968
- )
1969
- rec["heuristic"] = True
1970
- if finding.get("confidence"):
1971
- rec["confidence"] = finding.get("confidence")
1972
- recs.append(rec)
1973
- break
1974
- if output_tokens >= 5_000 or output_ratio >= 0.35:
1975
- recs.append(recommendation(
1976
- "trim-output-heavy-sessions",
1977
- "Output tokens are a major hotspot",
1978
- f"Output accounts for {output_ratio:.0%} of observed tokens.",
1979
- "Enable/keep Bash output trimming and add runner-aware failure extraction for repeated test/build commands.",
1980
- "P0",
1981
- {"output_tokens": output_tokens, "total_tokens": total},
1982
- ))
1983
- if input_tokens >= 5_000 or input_ratio >= 0.45:
1984
- recs.append(recommendation(
1985
- "reduce-large-reads",
1986
- "Input tokens are a major hotspot",
1987
- f"Input accounts for {input_ratio:.0%} of observed tokens.",
1988
- "Prefer diff-first review, symbol-scoped reads, and large-file read guards before sending whole files to Claude.",
1989
- "P0",
1990
- {"input_tokens": input_tokens, "total_tokens": total},
1991
- ))
1992
- if (
1993
- cache_creation >= 10_000
1994
- and cache_read >= 1
1995
- and summary.cache_amortization < 0.5
1996
- ):
1997
- recs.append(recommendation(
1998
- "improve-prompt-cache-reuse",
1999
- "Prompt cache reuse looks low",
2000
- (
2001
- f"Cache amortization is {summary.cache_amortization:.2f}x "
2002
- f"(cache_read={cache_read}, cache_creation={cache_creation}); each cached prefix is barely re-served."
2003
- ),
2004
- "Keep stable instructions early, move volatile context later, and avoid editing large instruction files during active sessions.",
2005
- "P1",
2006
- {
2007
- "cache_creation": cache_creation,
2008
- "cache_read": cache_read,
2009
- "cache_amortization": round(summary.cache_amortization, 4),
2010
- "cache_hit_rate": round(summary.cache_hit_rate, 4),
2011
- },
2012
- ))
2013
- if cache_creation >= 50_000 and 1.0 <= summary.cache_amortization < 5.0:
2014
- ttl = cache_diagnostics.get("ttl_diagnostics") or {}
2015
- ttl_status = str(ttl.get("status") or "unavailable")
2016
- ttl_confidence = str(ttl.get("confidence") or "unavailable")
2017
- ttl_candidate = ttl.get("candidate")
2018
- ttl_span = ttl.get("timestamped_cache_record_span_seconds")
2019
- if ttl_status == "hypothesis" and ttl_candidate in {"between-5m-and-1h", "beyond-1h"}:
2020
- ttl_reason = (
2021
- f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
2022
- f"{cache_creation} write tokens; timestamped cache telemetry spans {ttl_span} seconds "
2023
- f"({ttl_candidate})."
2024
- )
2025
- ttl_action = (
2026
- "Evaluate a longer provider prompt-cache TTL only after confirming the same stable prefix "
2027
- "pattern in representative sessions and rechecking current provider TTL/pricing documentation."
2028
- )
2029
- elif ttl_status == "hypothesis":
2030
- ttl_reason = (
2031
- f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
2032
- f"{cache_creation} write tokens, but timestamped cache telemetry currently points to {ttl_candidate}."
2033
- )
2034
- ttl_action = (
2035
- "Keep collecting timestamped cache read/write evidence; do not enable a longer TTL solely from this scan."
2036
- )
2037
- else:
2038
- ttl_reason = (
2039
- f"Heuristic only — cache amortization {summary.cache_amortization:.2f}x with "
2040
- f"{cache_creation} write tokens, but TTL diagnostics are {ttl_status} because this scan lacks "
2041
- "at least two timestamped cache telemetry records."
2042
- )
2043
- ttl_action = (
2044
- "Collect or inspect timestamped cache read/write evidence before evaluating a longer provider "
2045
- "prompt-cache TTL; historical token totals alone are not TTL evidence."
2046
- )
2047
- recs.append(recommendation(
2048
- "evaluate-1h-ttl-cache",
2049
- "Cache writes are large; validate TTL evidence before longer TTL",
2050
- ttl_reason,
2051
- ttl_action,
2052
- "P2",
2053
- {
2054
- "cache_creation": cache_creation,
2055
- "cache_read": cache_read,
2056
- "cache_amortization": round(summary.cache_amortization, 4),
2057
- "cache_hit_rate": round(summary.cache_hit_rate, 4),
2058
- "ttl_status": ttl_status,
2059
- "ttl_evidence": ttl.get("evidence") or EVIDENCE_UNAVAILABLE,
2060
- "ttl_confidence": ttl_confidence,
2061
- "ttl_candidate": ttl_candidate,
2062
- "timestamped_cache_record_count": ttl.get("timestamped_cache_record_count"),
2063
- "positive_timestamped_cache_record_count": ttl.get("positive_timestamped_cache_record_count"),
2064
- "timestamped_cache_record_span_seconds": ttl_span,
2065
- "heuristic": True,
2066
- },
2067
- ))
2068
- if cache_read >= 10_000 and summary.cache_hit_rate >= 0.5:
2069
- rec = recommendation(
2070
- "separate-cache-discounts-from-token-reduction",
2071
- "Provider cache reuse is visible, but it is not token reduction",
2072
- (
2073
- f"Cache read share is {summary.cache_hit_rate:.0%}; this can reduce provider input cost/latency, "
2074
- "but the prompt content may still be sent logically and should not be counted as ContextGuard token reduction."
2075
- ),
2076
- (
2077
- "Report cache_read/cache_creation separately from bytes avoided by local guards, and keep stable cached "
2078
- "instructions before volatile evidence to preserve provider-cache eligibility."
2079
- ),
2080
- "P2",
2081
- {
2082
- "cache_read": cache_read,
2083
- "cache_creation": cache_creation,
2084
- "cache_hit_rate": round(summary.cache_hit_rate, 4),
2085
- "cache_amortization": round(summary.cache_amortization, 4) if summary.cache_amortization_defined else None,
2086
- "provider_cache_telemetry_only": True,
2087
- },
2088
- )
2089
- rec["heuristic"] = True
2090
- recs.append(rec)
2091
-
2092
- for command, record_count in summary.by_command.most_common(top):
2093
- lowered = command.lower()
2094
- if any(marker in lowered for marker in ("pytest", "jest", "vitest", "go test", "cargo test", "npm test", "pnpm test", "yarn test")):
2095
- recs.append(recommendation(
2096
- "runner-aware-test-summary",
2097
- "Test command appears in transcript records",
2098
- "A test command category was observed in transcript records; token totals are session-level, not precise per-command billing.",
2099
- "Route this command through runner-aware failure extraction so Claude sees failing test names, file:line, assertion text, and rerun commands only.",
2100
- "P0",
2101
- {"command_hint": command, "record_count": record_count},
2102
- ))
2103
- break
2104
-
2105
- top_files = summary.by_file.most_common(3)
2106
- if top_files:
2107
- largest_file, largest_tokens = top_files[0]
2108
- if largest_tokens >= max(1_000, total * 0.25):
2109
- recs.append(recommendation(
2110
- "inspect-costliest-transcript",
2111
- "One transcript file dominates observed usage",
2112
- "A single transcript file accounts for a large share of observed tokens.",
2113
- "Inspect this session first, then use /clear between unrelated tasks or /compact during long-running work.",
2114
- "P1",
2115
- {"file": largest_file, "tokens": largest_tokens, "share": round(largest_tokens / total, 3)},
2116
- ))
2117
-
2118
- if summary.by_model:
2119
- model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
2120
- model, model_tokens = model_totals.most_common(1)[0]
2121
- if model != "unknown" and model_tokens >= max(2_000, total * 0.5):
2122
- recs.append(recommendation(
2123
- "route-heavy-work-by-model",
2124
- "One model carries most observed token usage",
2125
- "A single model dominates the observed transcript tokens.",
2126
- "Use lower-cost/auxiliary models for broad search, logs, and first-pass summaries; reserve Claude for final reasoning and edits.",
2127
- "P1",
2128
- {"model": model, "tokens": model_tokens, "share": round(model_tokens / total, 3)},
2129
- ))
2130
-
2131
- if summary.skipped_files or summary.skipped_records:
2132
- recs.append(recommendation(
2133
- "fix-transcript-scan-gaps",
2134
- "Some transcript data was skipped",
2135
- "Skipped records can hide token hotspots and make recommendations less reliable.",
2136
- "Review parse warnings and rerun with a narrower path if malformed or unrelated JSON files are mixed in.",
2137
- "P2",
2138
- {"skipped_files": summary.skipped_files, "skipped_records": summary.skipped_records},
2139
- ))
2140
- return recs
2141
-
2142
-
2143
- def summary_json(
2144
- summary: UsageSummary,
2145
- top: int = 15,
2146
- include_recommendations: bool = False,
2147
- limits: ScanLimits | None = None,
2148
- ) -> dict[str, Any]:
2149
- limits = limits or ScanLimits()
2150
- data = {
2151
- "files": summary.files,
2152
- "records": summary.records,
2153
- "skipped_files": summary.skipped_files,
2154
- "skipped_records": summary.skipped_records,
2155
- "parse_errors": summary.parse_errors,
2156
- "scan_limits": {
2157
- "max_file_bytes": limits.max_file_bytes,
2158
- "max_line_bytes": limits.max_line_bytes,
2159
- },
2160
- "total_tokens": summary.total_tokens,
2161
- "tokens": dict(summary.tokens),
2162
- "cache_metrics": {
2163
- "cache_hit_rate": round(summary.cache_hit_rate, 4),
2164
- "cache_amortization": round(summary.cache_amortization, 4),
2165
- "cache_amortization_defined": summary.cache_amortization_defined,
2166
- "cache_read_tokens": summary.tokens.get("cache_read", 0),
2167
- "cache_creation_tokens": summary.tokens.get("cache_creation", 0),
2168
- "input_tokens": summary.tokens.get("input", 0),
2169
- },
2170
- "cost_usd_observed": summary.cost_usd,
2171
- "by_model": {k: dict(v) for k, v in summary.by_model.items()},
2172
- "by_query_source": {k: dict(v) for k, v in summary.by_query_source.items()},
2173
- "top_files": counter_json(summary.by_file, top),
2174
- "top_commands": counter_json(summary.by_command, top),
2175
- "top_tools": counter_json(summary.by_tool, top),
2176
- "cache_friendliness": cache_friendliness_for_summary(summary),
2177
- "cache_diagnostics": cache_diagnostics_for_summary(summary),
2178
- "cache_layout_advice": cache_layout_advice_for_summary(summary),
2179
- }
2180
- if include_recommendations:
2181
- data["recommendations"] = build_recommendations(summary, top)
2182
- return data
2183
-
2184
-
2185
- def print_recommendations(summary: UsageSummary, top: int) -> None:
2186
- print("\nRecommendations")
2187
- for idx, rec in enumerate(build_recommendations(summary, top), 1):
2188
- print(f"{idx}. [{rec['priority']}] {rec['title']}")
2189
- print(f" reason: {rec['reason']}")
2190
- print(f" action: {rec['action']}")
2191
- if rec.get("evidence"):
2192
- print(f" evidence: {json.dumps(rec['evidence'], ensure_ascii=False, sort_keys=True)}")
2193
-
2194
-
2195
- def main() -> int:
2196
- parser = argparse.ArgumentParser()
2197
- parser.add_argument("paths", nargs="*", default=[os.path.expanduser("~/.claude/projects")])
2198
- parser.add_argument("--top", type=int, default=15)
2199
- parser.add_argument("--json", action="store_true")
2200
- parser.add_argument(
2201
- "--feasibility-json",
2202
- action="store_true",
2203
- help="emit a GUI-consumable local metric availability report with schema, freshness, caveats, and redaction metadata",
2204
- )
2205
- parser.add_argument("--recommend", action="store_true", help="Print concrete token-saving recommendations")
2206
- parser.add_argument(
2207
- "--show-paths",
2208
- action="store_true",
2209
- help="Show transcript paths instead of basename+hash labels; local debugging only; secret-shaped path components remain redacted",
2210
- )
2211
- parser.add_argument("--show-commands", action="store_true", help="Show redacted command strings instead of command category+hash labels")
2212
- parser.add_argument(
2213
- "--max-file-bytes",
2214
- type=int,
2215
- default=DEFAULT_MAX_FILE_BYTES,
2216
- help="skip transcript files larger than this many bytes (default: 50 MiB)",
2217
- )
2218
- parser.add_argument(
2219
- "--max-line-bytes",
2220
- type=int,
2221
- default=DEFAULT_MAX_LINE_BYTES,
2222
- help="skip individual JSONL records larger than this many bytes (default: 2 MiB)",
2223
- )
2224
- args = parser.parse_args()
2225
- limits = ScanLimits(
2226
- max_file_bytes=require_scan_limit(parser, "--max-file-bytes", args.max_file_bytes, MAX_FILE_BYTES_LIMIT),
2227
- max_line_bytes=require_scan_limit(parser, "--max-line-bytes", args.max_line_bytes, MAX_LINE_BYTES_LIMIT),
2228
- )
2229
-
2230
- summary = scan(args.paths, show_paths=args.show_paths, show_commands=args.show_commands, limits=limits)
2231
-
2232
- if args.feasibility_json:
2233
- print(json.dumps(
2234
- feasibility_json(summary, args.top, include_recommendations=args.recommend, limits=limits),
2235
- indent=2,
2236
- sort_keys=True,
2237
- ))
2238
- return 0
2239
-
2240
- if args.json:
2241
- print(json.dumps(
2242
- summary_json(summary, args.top, include_recommendations=args.recommend, limits=limits),
2243
- indent=2,
2244
- sort_keys=True,
2245
- ))
2246
- return 0
2247
-
2248
- print("Claude Code transcript usage audit")
2249
- print(
2250
- f"files_scanned={summary.files} records={summary.records} "
2251
- f"skipped_files={summary.skipped_files} skipped_records={summary.skipped_records}"
2252
- )
2253
- print(f"scan_limits=max_file_bytes:{limits.max_file_bytes} max_line_bytes:{limits.max_line_bytes}")
2254
- print(f"observed_total_tokens={summary.total_tokens}")
2255
- if summary.cost_usd:
2256
- print(f"observed_cost_usd={summary.cost_usd:.4f}")
2257
- if summary.parse_errors:
2258
- print("\nWarnings")
2259
- for warning in summary.parse_errors:
2260
- print(f" - {warning}")
2261
- print_counter("Token buckets", summary.tokens, args.top)
2262
-
2263
- print("\nCache reuse")
2264
- print(f" cache_hit_rate {summary.cache_hit_rate:.2%}")
2265
- if summary.cache_amortization_defined:
2266
- print(f" cache_amortization {summary.cache_amortization:.2f}x")
2267
- else:
2268
- print(" cache_amortization n/a (no cache writes observed)")
2269
- print(f" cache_read_tokens {summary.tokens.get('cache_read', 0):12d}")
2270
- print(f" cache_creation_tokens {summary.tokens.get('cache_creation', 0):12d}")
2271
- cache_friendliness = cache_friendliness_for_summary(summary)
2272
- if cache_friendliness.get("status") != "missing":
2273
- signals = cache_friendliness.get("signals", {})
2274
- print("\nCache friendliness")
2275
- print(f" status {cache_friendliness.get('status')}")
2276
- print(f" heuristic {str(cache_friendliness.get('heuristic')).lower()}")
2277
- print(f" analyzed_prompt_records {cache_friendliness.get('analyzed_prompt_records', 0):12d}")
2278
- stable_prefix = signals.get("stable_prefix_share")
2279
- volatile_prefix = signals.get("volatile_prefix_share")
2280
- volatile_tail = signals.get("volatile_tail_share")
2281
- if stable_prefix is not None:
2282
- print(f" stable_prefix_share {stable_prefix:.2%}")
2283
- if volatile_prefix is not None:
2284
- print(f" volatile_prefix_share {volatile_prefix:.2%}")
2285
- if volatile_tail is not None:
2286
- print(f" volatile_tail_share {volatile_tail:.2%}")
2287
- for finding in cache_friendliness.get("findings", []):
2288
- if isinstance(finding, dict):
2289
- print(f" finding [{finding.get('severity')}] {finding.get('id')}: {finding.get('title')}")
2290
-
2291
- cache_diagnostics = cache_diagnostics_for_summary(summary)
2292
- print("\nCache diagnostics")
2293
- print(f" status {cache_diagnostics.get('status')}")
2294
- print(f" confidence {cache_diagnostics.get('confidence')}")
2295
- hypotheses = cache_diagnostics.get("cache_miss_hypotheses") or []
2296
- if hypotheses:
2297
- first = hypotheses[0]
2298
- print(f" top_hypothesis {first.get('id')} ({first.get('confidence')})")
2299
- stable_candidates = cache_diagnostics.get("stable_prefix_candidates") or []
2300
- if stable_candidates:
2301
- first = stable_candidates[0]
2302
- print(f" stable_prefix_candidate position={first.get('position')} stability={first.get('stability')}")
2303
- breakers = cache_diagnostics.get("dynamic_prefix_breakers") or []
2304
- if breakers:
2305
- first = breakers[0]
2306
- print(f" dynamic_prefix_breaker position={first.get('position')} volatile_share={first.get('volatile_share')}")
2307
- ttl = cache_diagnostics.get("ttl_diagnostics") or {}
2308
- print(f" ttl_status {ttl.get('status')} ({ttl.get('confidence')})")
2309
- headroom = cache_diagnostics.get("headroom_diagnostics") or {}
2310
- print(f" headroom_status {headroom.get('status')} ({headroom.get('evidence')})")
2311
-
2312
- cache_layout_advice = cache_layout_advice_for_summary(summary)
2313
- if cache_layout_advice.get("status") != "missing" or cache_layout_advice.get("observed_issue") != "unknown":
2314
- print("\nCache layout advice")
2315
- print(f" status {cache_layout_advice.get('status')}")
2316
- print(f" confidence {cache_layout_advice.get('confidence')}")
2317
- print(f" observed_issue {cache_layout_advice.get('observed_issue')}")
2318
- print(f" priority {cache_layout_advice.get('priority')}")
2319
- experiments = cache_layout_advice.get("recommended_experiments") or []
2320
- if experiments:
2321
- first = experiments[0]
2322
- print(f" first_experiment {first.get('id')} ({first.get('priority')})")
2323
- print(f" experiment_action {first.get('action')}")
2324
- checks = cache_layout_advice.get("next_checks") or []
2325
- if checks:
2326
- first = checks[0]
2327
- print(f" next_check {first.get('id')}")
2328
- templates = first.get("command_templates") or []
2329
- if templates:
2330
- print(f" command_template {templates[0]}")
2331
-
2332
- model_totals = Counter({model: sum(tokens.values()) for model, tokens in summary.by_model.items()})
2333
- print_counter("By model", model_totals, args.top)
2334
-
2335
- source_totals = Counter({src: sum(tokens.values()) for src, tokens in summary.by_query_source.items()})
2336
- print_counter("By query_source", source_totals, args.top)
2337
- print_counter("Top transcript files", summary.by_file, args.top)
2338
- print_counter("Top command hints observed", summary.by_command, args.top)
2339
- print_counter("Top tools observed", summary.by_tool, args.top)
2340
- if args.recommend:
2341
- print_recommendations(summary, args.top)
2342
- return 0
2343
-
2344
-
2345
- if __name__ == "__main__":
2346
- raise SystemExit(main())