@cleocode/skills 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/dispatch-config.json +404 -0
  2. package/index.d.ts +178 -0
  3. package/index.js +405 -0
  4. package/package.json +14 -0
  5. package/profiles/core.json +7 -0
  6. package/profiles/full.json +10 -0
  7. package/profiles/minimal.json +7 -0
  8. package/profiles/recommended.json +7 -0
  9. package/provider-skills-map.json +97 -0
  10. package/skills/_shared/cleo-style-guide.md +84 -0
  11. package/skills/_shared/manifest-operations.md +810 -0
  12. package/skills/_shared/placeholders.json +433 -0
  13. package/skills/_shared/skill-chaining-patterns.md +237 -0
  14. package/skills/_shared/subagent-protocol-base.md +223 -0
  15. package/skills/_shared/task-system-integration.md +232 -0
  16. package/skills/_shared/testing-framework-config.md +110 -0
  17. package/skills/ct-cleo/SKILL.md +490 -0
  18. package/skills/ct-cleo/references/anti-patterns.md +19 -0
  19. package/skills/ct-cleo/references/loom-lifecycle.md +136 -0
  20. package/skills/ct-cleo/references/orchestrator-constraints.md +55 -0
  21. package/skills/ct-cleo/references/session-protocol.md +162 -0
  22. package/skills/ct-codebase-mapper/SKILL.md +82 -0
  23. package/skills/ct-contribution/SKILL.md +521 -0
  24. package/skills/ct-contribution/templates/contribution-init.json +21 -0
  25. package/skills/ct-dev-workflow/SKILL.md +423 -0
  26. package/skills/ct-docs-lookup/SKILL.md +66 -0
  27. package/skills/ct-docs-review/SKILL.md +175 -0
  28. package/skills/ct-docs-write/SKILL.md +108 -0
  29. package/skills/ct-documentor/SKILL.md +231 -0
  30. package/skills/ct-epic-architect/SKILL.md +305 -0
  31. package/skills/ct-epic-architect/references/bug-epic-example.md +172 -0
  32. package/skills/ct-epic-architect/references/commands.md +201 -0
  33. package/skills/ct-epic-architect/references/feature-epic-example.md +210 -0
  34. package/skills/ct-epic-architect/references/migration-epic-example.md +244 -0
  35. package/skills/ct-epic-architect/references/output-format.md +92 -0
  36. package/skills/ct-epic-architect/references/patterns.md +284 -0
  37. package/skills/ct-epic-architect/references/refactor-epic-example.md +412 -0
  38. package/skills/ct-epic-architect/references/research-epic-example.md +226 -0
  39. package/skills/ct-epic-architect/references/shell-escaping.md +86 -0
  40. package/skills/ct-epic-architect/references/skill-aware-execution.md +195 -0
  41. package/skills/ct-grade/SKILL.md +230 -0
  42. package/skills/ct-grade/agents/analysis-reporter.md +203 -0
  43. package/skills/ct-grade/agents/blind-comparator.md +157 -0
  44. package/skills/ct-grade/agents/scenario-runner.md +134 -0
  45. package/skills/ct-grade/eval-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  46. package/skills/ct-grade/eval-viewer/generate_grade_review.py +1138 -0
  47. package/skills/ct-grade/eval-viewer/generate_grade_viewer.py +544 -0
  48. package/skills/ct-grade/eval-viewer/generate_review.py +283 -0
  49. package/skills/ct-grade/eval-viewer/grade-review.html +1574 -0
  50. package/skills/ct-grade/eval-viewer/viewer.html +219 -0
  51. package/skills/ct-grade/evals/evals.json +94 -0
  52. package/skills/ct-grade/references/ab-test-methodology.md +150 -0
  53. package/skills/ct-grade/references/domains.md +137 -0
  54. package/skills/ct-grade/references/grade-spec.md +236 -0
  55. package/skills/ct-grade/references/scenario-playbook.md +234 -0
  56. package/skills/ct-grade/references/token-tracking.md +120 -0
  57. package/skills/ct-grade/scripts/__pycache__/audit_analyzer.cpython-314.pyc +0 -0
  58. package/skills/ct-grade/scripts/__pycache__/run_ab_test.cpython-314.pyc +0 -0
  59. package/skills/ct-grade/scripts/__pycache__/run_all.cpython-314.pyc +0 -0
  60. package/skills/ct-grade/scripts/__pycache__/token_tracker.cpython-314.pyc +0 -0
  61. package/skills/ct-grade/scripts/audit_analyzer.py +279 -0
  62. package/skills/ct-grade/scripts/generate_report.py +283 -0
  63. package/skills/ct-grade/scripts/run_ab_test.py +504 -0
  64. package/skills/ct-grade/scripts/run_all.py +287 -0
  65. package/skills/ct-grade/scripts/setup_run.py +183 -0
  66. package/skills/ct-grade/scripts/token_tracker.py +630 -0
  67. package/skills/ct-grade-v2-1/SKILL.md +237 -0
  68. package/skills/ct-grade-v2-1/agents/analysis-reporter.md +203 -0
  69. package/skills/ct-grade-v2-1/agents/blind-comparator.md +157 -0
  70. package/skills/ct-grade-v2-1/agents/scenario-runner.md +179 -0
  71. package/skills/ct-grade-v2-1/evals/evals.json +74 -0
  72. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/build_op_stats.cpython-314.pyc +0 -0
  73. package/skills/ct-grade-v2-1/grade-viewer/__pycache__/generate_grade_review.cpython-314.pyc +0 -0
  74. package/skills/ct-grade-v2-1/grade-viewer/build_op_stats.py +174 -0
  75. package/skills/ct-grade-v2-1/grade-viewer/eval-analysis.json +41 -0
  76. package/skills/ct-grade-v2-1/grade-viewer/eval-report.md +34 -0
  77. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_review.py +1023 -0
  78. package/skills/ct-grade-v2-1/grade-viewer/generate_grade_viewer.py +548 -0
  79. package/skills/ct-grade-v2-1/grade-viewer/grade-review-eval.html +613 -0
  80. package/skills/ct-grade-v2-1/grade-viewer/grade-review.html +1532 -0
  81. package/skills/ct-grade-v2-1/grade-viewer/viewer.html +620 -0
  82. package/skills/ct-grade-v2-1/manifest-entry.json +31 -0
  83. package/skills/ct-grade-v2-1/references/ab-testing.md +233 -0
  84. package/skills/ct-grade-v2-1/references/domains-ssot.md +156 -0
  85. package/skills/ct-grade-v2-1/references/grade-spec-v2.md +167 -0
  86. package/skills/ct-grade-v2-1/references/playbook-v2.md +393 -0
  87. package/skills/ct-grade-v2-1/references/token-tracking.md +202 -0
  88. package/skills/ct-grade-v2-1/scripts/generate_report.py +419 -0
  89. package/skills/ct-grade-v2-1/scripts/run_ab_test.py +493 -0
  90. package/skills/ct-grade-v2-1/scripts/run_scenario.py +396 -0
  91. package/skills/ct-grade-v2-1/scripts/setup_run.py +207 -0
  92. package/skills/ct-grade-v2-1/scripts/token_tracker.py +175 -0
  93. package/skills/ct-memory/SKILL.md +84 -0
  94. package/skills/ct-orchestrator/INSTALL.md +61 -0
  95. package/skills/ct-orchestrator/README.md +69 -0
  96. package/skills/ct-orchestrator/SKILL.md +380 -0
  97. package/skills/ct-orchestrator/manifest-entry.json +19 -0
  98. package/skills/ct-orchestrator/orchestrator-prompt.txt +17 -0
  99. package/skills/ct-orchestrator/references/SUBAGENT-PROTOCOL-BLOCK.md +66 -0
  100. package/skills/ct-orchestrator/references/autonomous-operation.md +167 -0
  101. package/skills/ct-orchestrator/references/lifecycle-gates.md +98 -0
  102. package/skills/ct-orchestrator/references/orchestrator-compliance.md +271 -0
  103. package/skills/ct-orchestrator/references/orchestrator-handoffs.md +85 -0
  104. package/skills/ct-orchestrator/references/orchestrator-patterns.md +164 -0
  105. package/skills/ct-orchestrator/references/orchestrator-recovery.md +113 -0
  106. package/skills/ct-orchestrator/references/orchestrator-spawning.md +271 -0
  107. package/skills/ct-orchestrator/references/orchestrator-tokens.md +180 -0
  108. package/skills/ct-research-agent/SKILL.md +226 -0
  109. package/skills/ct-skill-creator/.cleo/.context-state.json +13 -0
  110. package/skills/ct-skill-creator/.cleo/logs/cleo.2026-03-07.1.log +24 -0
  111. package/skills/ct-skill-creator/.cleo/tasks.db +0 -0
  112. package/skills/ct-skill-creator/SKILL.md +356 -0
  113. package/skills/ct-skill-creator/agents/analyzer.md +276 -0
  114. package/skills/ct-skill-creator/agents/comparator.md +204 -0
  115. package/skills/ct-skill-creator/agents/grader.md +225 -0
  116. package/skills/ct-skill-creator/assets/eval_review.html +146 -0
  117. package/skills/ct-skill-creator/eval-viewer/__pycache__/generate_review.cpython-314.pyc +0 -0
  118. package/skills/ct-skill-creator/eval-viewer/generate_review.py +471 -0
  119. package/skills/ct-skill-creator/eval-viewer/viewer.html +1325 -0
  120. package/skills/ct-skill-creator/manifest-entry.json +17 -0
  121. package/skills/ct-skill-creator/references/dynamic-context.md +228 -0
  122. package/skills/ct-skill-creator/references/frontmatter.md +83 -0
  123. package/skills/ct-skill-creator/references/invocation-control.md +165 -0
  124. package/skills/ct-skill-creator/references/output-patterns.md +86 -0
  125. package/skills/ct-skill-creator/references/provider-deployment.md +175 -0
  126. package/skills/ct-skill-creator/references/schemas.md +430 -0
  127. package/skills/ct-skill-creator/references/workflows.md +28 -0
  128. package/skills/ct-skill-creator/scripts/__init__.py +1 -0
  129. package/skills/ct-skill-creator/scripts/__pycache__/__init__.cpython-314.pyc +0 -0
  130. package/skills/ct-skill-creator/scripts/__pycache__/aggregate_benchmark.cpython-314.pyc +0 -0
  131. package/skills/ct-skill-creator/scripts/__pycache__/generate_report.cpython-314.pyc +0 -0
  132. package/skills/ct-skill-creator/scripts/__pycache__/improve_description.cpython-314.pyc +0 -0
  133. package/skills/ct-skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
  134. package/skills/ct-skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  135. package/skills/ct-skill-creator/scripts/__pycache__/run_eval.cpython-314.pyc +0 -0
  136. package/skills/ct-skill-creator/scripts/__pycache__/run_loop.cpython-314.pyc +0 -0
  137. package/skills/ct-skill-creator/scripts/__pycache__/utils.cpython-314.pyc +0 -0
  138. package/skills/ct-skill-creator/scripts/aggregate_benchmark.py +401 -0
  139. package/skills/ct-skill-creator/scripts/generate_report.py +326 -0
  140. package/skills/ct-skill-creator/scripts/improve_description.py +247 -0
  141. package/skills/ct-skill-creator/scripts/init_skill.py +306 -0
  142. package/skills/ct-skill-creator/scripts/package_skill.py +110 -0
  143. package/skills/ct-skill-creator/scripts/quick_validate.py +97 -0
  144. package/skills/ct-skill-creator/scripts/run_eval.py +310 -0
  145. package/skills/ct-skill-creator/scripts/run_loop.py +328 -0
  146. package/skills/ct-skill-creator/scripts/utils.py +47 -0
  147. package/skills/ct-skill-validator/SKILL.md +178 -0
  148. package/skills/ct-skill-validator/agents/ecosystem-checker.md +151 -0
  149. package/skills/ct-skill-validator/assets/valid-skill-example.md +13 -0
  150. package/skills/ct-skill-validator/evals/eval_set.json +14 -0
  151. package/skills/ct-skill-validator/evals/evals.json +52 -0
  152. package/skills/ct-skill-validator/manifest-entry.json +20 -0
  153. package/skills/ct-skill-validator/references/cleo-ecosystem-rules.md +163 -0
  154. package/skills/ct-skill-validator/references/validation-rules.md +168 -0
  155. package/skills/ct-skill-validator/scripts/__init__.py +0 -0
  156. package/skills/ct-skill-validator/scripts/__pycache__/audit_body.cpython-314.pyc +0 -0
  157. package/skills/ct-skill-validator/scripts/__pycache__/check_ecosystem.cpython-314.pyc +0 -0
  158. package/skills/ct-skill-validator/scripts/__pycache__/generate_validation_report.cpython-314.pyc +0 -0
  159. package/skills/ct-skill-validator/scripts/__pycache__/validate.cpython-314.pyc +0 -0
  160. package/skills/ct-skill-validator/scripts/audit_body.py +242 -0
  161. package/skills/ct-skill-validator/scripts/check_ecosystem.py +169 -0
  162. package/skills/ct-skill-validator/scripts/check_manifest.py +172 -0
  163. package/skills/ct-skill-validator/scripts/generate_validation_report.py +442 -0
  164. package/skills/ct-skill-validator/scripts/validate.py +422 -0
  165. package/skills/ct-spec-writer/SKILL.md +189 -0
  166. package/skills/ct-stickynote/README.md +14 -0
  167. package/skills/ct-stickynote/SKILL.md +46 -0
  168. package/skills/ct-task-executor/SKILL.md +296 -0
  169. package/skills/ct-validator/SKILL.md +216 -0
  170. package/skills/manifest.json +469 -0
  171. package/skills.json +281 -0
@@ -0,0 +1,630 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ token_tracker.py — Three-layer token estimation chain for ct-grade v3.
4
+
5
+ Three estimation layers (tried in order, first success wins):
6
+ Layer 1 — OTel (REAL): Read ~/.cleo/metrics/otel/ telemetry
7
+ Layer 2 — chars/4 (ESTIMATED): response_chars / 4 approximation
8
+ Layer 3 — Coarse (COARSE): entry_count × op_type_average
9
+
10
+ Usage:
11
+ python scripts/token_tracker.py --run-dir ./ab-results/run-001
12
+ python scripts/token_tracker.py --grades-file .cleo/metrics/GRADES.jsonl
13
+ python scripts/token_tracker.py --run-dir ./ab-results/run-001 \\
14
+ --grades-file .cleo/metrics/GRADES.jsonl \\
15
+ --project-dir . \\
16
+ --output token-summary.json
17
+ """
18
+
19
+ import argparse
20
+ import json
21
+ import math
22
+ import os
23
+ import sys
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+
27
+
28
+ # ---------------------------------------------------------------------------
29
+ # Constants
30
+ # ---------------------------------------------------------------------------
31
+
32
+ CONFIDENCE_LEVELS = {
33
+ "otel": "REAL",
34
+ "chars": "ESTIMATED",
35
+ "coarse": "COARSE",
36
+ }
37
+
38
+ OP_TOKEN_AVERAGES = {
39
+ "tasks.find": 750,
40
+ "tasks.list": 3000,
41
+ "tasks.show": 600,
42
+ "tasks.exists": 300,
43
+ "tasks.tree": 800,
44
+ "tasks.plan": 900,
45
+ "session.status": 350,
46
+ "session.list": 400,
47
+ "session.briefing.show": 500,
48
+ "admin.dash": 500,
49
+ "admin.help": 800,
50
+ "admin.health": 300,
51
+ "admin.stats": 600,
52
+ "memory.find": 600,
53
+ "memory.timeline": 500,
54
+ "tools.skill.list": 400,
55
+ "tools.skill.show": 350,
56
+ "default": 400,
57
+ }
58
+
59
+ MCP_OVERHEAD_PER_OP = 200 # approximate MCP framing tokens per operation
60
+
61
+
62
+ # ---------------------------------------------------------------------------
63
+ # Helpers
64
+ # ---------------------------------------------------------------------------
65
+
66
+ def _mean(values):
67
+ return sum(values) / len(values) if values else 0
68
+
69
+
70
+ def _stddev(values):
71
+ if len(values) < 2:
72
+ return 0
73
+ m = _mean(values)
74
+ return math.sqrt(sum((x - m) ** 2 for x in values) / (len(values) - 1))
75
+
76
+
77
+ def _stats(values):
78
+ if not values:
79
+ return {"mean": None, "stddev": None, "min": None, "max": None, "count": 0}
80
+ return {
81
+ "mean": round(_mean(values), 1),
82
+ "stddev": round(_stddev(values), 1),
83
+ "min": min(values),
84
+ "max": max(values),
85
+ "count": len(values),
86
+ }
87
+
88
+
89
+ def _op_key(domain, operation):
90
+ """Return OP_TOKEN_AVERAGES lookup key for a domain+operation pair."""
91
+ full = f"{domain}.{operation}"
92
+ return full if full in OP_TOKEN_AVERAGES else "default"
93
+
94
+
95
+ def _tokens_for_op(domain, operation):
96
+ return OP_TOKEN_AVERAGES[_op_key(domain, operation)]
97
+
98
+
99
+ # ---------------------------------------------------------------------------
100
+ # Layer 1 — OTel (REAL)
101
+ # ---------------------------------------------------------------------------
102
+
103
+ def _scan_otel(otel_dir: Path, session_id: str | None = None) -> int | None:
104
+ """
105
+ Scan ~/.cleo/metrics/otel/ for claude_code.token.usage entries.
106
+
107
+ Returns total token count if any relevant entries found, else None.
108
+ """
109
+ if not otel_dir.is_dir():
110
+ return None
111
+
112
+ total = 0
113
+ found = False
114
+ for fpath in otel_dir.iterdir():
115
+ if fpath.suffix not in (".jsonl", ".json"):
116
+ continue
117
+ try:
118
+ with open(fpath, encoding="utf-8") as fh:
119
+ for raw_line in fh:
120
+ raw_line = raw_line.strip()
121
+ if not raw_line:
122
+ continue
123
+ try:
124
+ entry = json.loads(raw_line)
125
+ except json.JSONDecodeError:
126
+ continue
127
+ if entry.get("name") != "claude_code.token.usage":
128
+ continue
129
+ attrs = entry.get("attributes", {})
130
+ if session_id and attrs.get("session_id") not in (None, session_id):
131
+ continue
132
+ value = entry.get("value")
133
+ if isinstance(value, (int, float)):
134
+ total += int(value)
135
+ found = True
136
+ except OSError:
137
+ continue
138
+
139
+ return total if found else None
140
+
141
+
142
+ def layer1_otel(session_id: str | None = None) -> dict | None:
143
+ """
144
+ Layer 1: OTel telemetry.
145
+
146
+ Returns dict with total_tokens and method/confidence, or None if unavailable.
147
+ """
148
+ otel_dir = Path.home() / ".cleo" / "metrics" / "otel"
149
+ tokens = _scan_otel(otel_dir, session_id)
150
+ if tokens is None:
151
+ return None
152
+ return {
153
+ "method": "otel",
154
+ "confidence": CONFIDENCE_LEVELS["otel"],
155
+ "total_tokens": tokens,
156
+ }
157
+
158
+
159
+ # ---------------------------------------------------------------------------
160
+ # Layer 2 — chars/4 (ESTIMATED)
161
+ # ---------------------------------------------------------------------------
162
+
163
+ def _collect_response_chars(run_dir: Path) -> int:
164
+ """
165
+ Recursively find response.json files and sum their serialised character lengths.
166
+ """
167
+ total_chars = 0
168
+ for rpath in run_dir.rglob("response.json"):
169
+ try:
170
+ with open(rpath, encoding="utf-8") as fh:
171
+ data = json.load(fh)
172
+ total_chars += len(json.dumps(data))
173
+ except (OSError, json.JSONDecodeError):
174
+ continue
175
+ return total_chars
176
+
177
+
178
+ def _collect_timing_tokens(run_dir: Path) -> int | None:
179
+ """
180
+ Collect total_tokens from timing.json files that already have the field.
181
+ Returns sum if any found, else None.
182
+ """
183
+ total = 0
184
+ found = False
185
+ for tpath in run_dir.rglob("timing.json"):
186
+ try:
187
+ with open(tpath, encoding="utf-8") as fh:
188
+ data = json.load(fh)
189
+ if isinstance(data.get("total_tokens"), (int, float)):
190
+ total += int(data["total_tokens"])
191
+ found = True
192
+ except (OSError, json.JSONDecodeError):
193
+ continue
194
+ return total if found else None
195
+
196
+
197
+ def layer2_chars(run_dir_str: str | None) -> dict | None:
198
+ """
199
+ Layer 2: chars/4 approximation from run directory.
200
+
201
+ Prefers timing.json total_tokens where already set; falls back to
202
+ response.json character counts / 4.
203
+
204
+ Returns dict with total_tokens, method, confidence, or None if no run_dir.
205
+ """
206
+ if not run_dir_str:
207
+ return None
208
+ run_dir = Path(run_dir_str)
209
+ if not run_dir.is_dir():
210
+ return None
211
+
212
+ # Prefer pre-computed timing tokens
213
+ timing_tokens = _collect_timing_tokens(run_dir)
214
+ if timing_tokens is not None and timing_tokens > 0:
215
+ return {
216
+ "method": "chars",
217
+ "confidence": CONFIDENCE_LEVELS["chars"],
218
+ "total_tokens": timing_tokens,
219
+ "source": "timing.json",
220
+ }
221
+
222
+ # Fall back to response char counting
223
+ total_chars = _collect_response_chars(run_dir)
224
+ if total_chars == 0:
225
+ return None
226
+
227
+ estimated = max(1, total_chars // 4)
228
+ return {
229
+ "method": "chars",
230
+ "confidence": CONFIDENCE_LEVELS["chars"],
231
+ "total_tokens": estimated,
232
+ "source": "response_chars/4",
233
+ "total_chars": total_chars,
234
+ }
235
+
236
+
237
+ # ---------------------------------------------------------------------------
238
+ # Layer 3 — Coarse (COARSE)
239
+ # ---------------------------------------------------------------------------
240
+
241
+ def _parse_audit_ops(project_dir_str: str | None) -> list[dict]:
242
+ """
243
+ Attempt to read operation records from tasks.db audit log or any
244
+ audit-log.jsonl file under project_dir. Returns list of op dicts with
245
+ keys: domain, operation, gateway.
246
+ """
247
+ if not project_dir_str:
248
+ return []
249
+
250
+ project_dir = Path(project_dir_str)
251
+ ops = []
252
+
253
+ # Look for a JSONL audit log
254
+ for candidate in (
255
+ project_dir / ".cleo" / "audit-log.jsonl",
256
+ project_dir / ".cleo" / "audit.jsonl",
257
+ project_dir / "audit-log.jsonl",
258
+ ):
259
+ if candidate.is_file():
260
+ try:
261
+ with open(candidate, encoding="utf-8") as fh:
262
+ for raw in fh:
263
+ raw = raw.strip()
264
+ if not raw:
265
+ continue
266
+ try:
267
+ entry = json.loads(raw)
268
+ except json.JSONDecodeError:
269
+ continue
270
+ domain = entry.get("domain", "")
271
+ operation = entry.get("operation", "")
272
+ gateway = entry.get("gateway", "mcp")
273
+ if domain and operation:
274
+ ops.append(
275
+ {"domain": domain, "operation": operation, "gateway": gateway}
276
+ )
277
+ except OSError:
278
+ pass
279
+ if ops:
280
+ return ops
281
+
282
+ return ops
283
+
284
+
285
+ def layer3_coarse(
286
+ ops: list[dict] | None = None, entry_count: int = 0
287
+ ) -> dict:
288
+ """
289
+ Layer 3: Coarse estimation using OP_TOKEN_AVERAGES.
290
+
291
+ Uses ops list if available, otherwise assumes entry_count operations of
292
+ the default average type.
293
+ """
294
+ if ops:
295
+ total = 0
296
+ per_op: dict[str, int] = {}
297
+ for op in ops:
298
+ key = f"{op.get('domain', '')}.{op.get('operation', '')}"
299
+ avg = OP_TOKEN_AVERAGES.get(key, OP_TOKEN_AVERAGES["default"])
300
+ total += avg
301
+ per_op[key] = per_op.get(key, 0) + avg
302
+ return {
303
+ "method": "coarse",
304
+ "confidence": CONFIDENCE_LEVELS["coarse"],
305
+ "total_tokens": total,
306
+ "per_operation": per_op,
307
+ }
308
+
309
+ # No ops available — multiply entry_count by default average
310
+ fallback_count = max(entry_count, 1)
311
+ total = fallback_count * OP_TOKEN_AVERAGES["default"]
312
+ return {
313
+ "method": "coarse",
314
+ "confidence": CONFIDENCE_LEVELS["coarse"],
315
+ "total_tokens": total,
316
+ "per_operation": {},
317
+ }
318
+
319
+
320
+ # ---------------------------------------------------------------------------
321
+ # Three-layer resolution
322
+ # ---------------------------------------------------------------------------
323
+
324
+ def resolve_tokens(
325
+ run_dir: str | None = None,
326
+ project_dir: str | None = None,
327
+ session_id: str | None = None,
328
+ entry_count: int = 0,
329
+ ) -> dict:
330
+ """
331
+ Try layers in order: OTel → chars/4 → coarse.
332
+ Returns the first successful result, always guaranteed to return something.
333
+ """
334
+ # Layer 1
335
+ result = layer1_otel(session_id)
336
+ if result:
337
+ return result
338
+
339
+ # Layer 2
340
+ result = layer2_chars(run_dir)
341
+ if result:
342
+ return result
343
+
344
+ # Layer 3
345
+ ops = _parse_audit_ops(project_dir)
346
+ return layer3_coarse(ops=ops if ops else None, entry_count=entry_count)
347
+
348
+
349
+ # ---------------------------------------------------------------------------
350
+ # Per-operation breakdown helpers
351
+ # ---------------------------------------------------------------------------
352
+
353
+ def _build_per_operation(ops: list[dict]) -> dict[str, int]:
354
+ """Build per-operation token map from a list of op dicts."""
355
+ result: dict[str, int] = {}
356
+ for op in ops:
357
+ key = f"{op.get('domain', '')}.{op.get('operation', '')}"
358
+ avg = OP_TOKEN_AVERAGES.get(key, OP_TOKEN_AVERAGES["default"])
359
+ result[key] = result.get(key, 0) + avg
360
+ return result
361
+
362
+
363
+ def _build_by_domain(ops: list[dict]) -> dict[str, dict]:
364
+ """Aggregate estimated tokens and op count by domain."""
365
+ by_domain: dict[str, dict] = {}
366
+ for op in ops:
367
+ domain = op.get("domain", "unknown")
368
+ key = f"{domain}.{op.get('operation', '')}"
369
+ avg = OP_TOKEN_AVERAGES.get(key, OP_TOKEN_AVERAGES["default"])
370
+ if domain not in by_domain:
371
+ by_domain[domain] = {"estimated_tokens": 0, "ops_count": 0}
372
+ by_domain[domain]["estimated_tokens"] += avg
373
+ by_domain[domain]["ops_count"] += 1
374
+ return by_domain
375
+
376
+
377
+ def _build_mcp_vs_cli(ops: list[dict]) -> dict[str, dict]:
378
+ """Split token totals between mcp and cli gateways."""
379
+ result: dict[str, dict] = {
380
+ "mcp": {"estimated_tokens": 0, "ops_count": 0},
381
+ "cli": {"estimated_tokens": 0, "ops_count": 0},
382
+ }
383
+ for op in ops:
384
+ gw = op.get("gateway", "mcp")
385
+ if gw not in result:
386
+ result[gw] = {"estimated_tokens": 0, "ops_count": 0}
387
+ key = f"{op.get('domain', '')}.{op.get('operation', '')}"
388
+ avg = OP_TOKEN_AVERAGES.get(key, OP_TOKEN_AVERAGES["default"])
389
+ result[gw]["estimated_tokens"] += avg
390
+ result[gw]["ops_count"] += 1
391
+ return result
392
+
393
+
394
+ # ---------------------------------------------------------------------------
395
+ # token-summary.json builder
396
+ # ---------------------------------------------------------------------------
397
+
398
+ def build_summary(
399
+ run_dir: str | None,
400
+ project_dir: str | None,
401
+ session_id: str | None = None,
402
+ ) -> dict:
403
+ """Build the full token-summary.json structure."""
404
+ ops = _parse_audit_ops(project_dir)
405
+ resolution = resolve_tokens(
406
+ run_dir=run_dir,
407
+ project_dir=project_dir,
408
+ session_id=session_id,
409
+ entry_count=len(ops),
410
+ )
411
+
412
+ method = resolution["method"]
413
+ confidence = resolution["confidence"]
414
+ total_tokens = resolution["total_tokens"]
415
+
416
+ by_domain = _build_by_domain(ops) if ops else {}
417
+ mcp_vs_cli = _build_mcp_vs_cli(ops) if ops else {
418
+ "mcp": {"estimated_tokens": 0, "ops_count": 0},
419
+ "cli": {"estimated_tokens": 0, "ops_count": 0},
420
+ }
421
+
422
+ note = f"Confidence: {confidence} ({method}"
423
+ if method == "otel":
424
+ note += "). Real token counts from OpenTelemetry."
425
+ elif method == "chars":
426
+ note += "/4). Enable OTel for REAL token counts."
427
+ else:
428
+ note += " average). Enable OTel for REAL token counts."
429
+
430
+ return {
431
+ "run_dir": str(Path(run_dir).resolve()) if run_dir else None,
432
+ "confidence": confidence,
433
+ "method": method,
434
+ "total_tokens": total_tokens,
435
+ "by_domain": by_domain,
436
+ "mcp_vs_cli": mcp_vs_cli,
437
+ "score_per_1k_tokens": None,
438
+ "note": note,
439
+ }
440
+
441
+
442
+ # ---------------------------------------------------------------------------
443
+ # _tokenMeta enrichment for GRADES.jsonl
444
+ # ---------------------------------------------------------------------------
445
+
446
+ def _build_token_meta(
447
+ grade_entry: dict,
448
+ run_dir: str | None,
449
+ project_dir: str | None,
450
+ ) -> dict:
451
+ """
452
+ Build _tokenMeta for a single GRADES.jsonl entry.
453
+
454
+ Tries all three layers; adds per-operation breakdown from audit ops when
455
+ available, otherwise infers from grade entry fields.
456
+ """
457
+ session_id = grade_entry.get("session_id")
458
+
459
+ # Gather any ops referenced in the entry itself (heuristic)
460
+ entry_ops: list[dict] = []
461
+ if "operations" in grade_entry and isinstance(grade_entry["operations"], list):
462
+ entry_ops = grade_entry["operations"]
463
+ elif "audit" in grade_entry and isinstance(grade_entry["audit"], list):
464
+ entry_ops = grade_entry["audit"]
465
+
466
+ # Try project-level audit log first; fall back to entry-level ops
467
+ project_ops = _parse_audit_ops(project_dir)
468
+ ops_to_use = project_ops if project_ops else entry_ops
469
+
470
+ resolution = resolve_tokens(
471
+ run_dir=run_dir,
472
+ project_dir=project_dir,
473
+ session_id=session_id,
474
+ entry_count=len(ops_to_use) or 1,
475
+ )
476
+
477
+ per_op = _build_per_operation(ops_to_use) if ops_to_use else {}
478
+
479
+ return {
480
+ "method": resolution["method"],
481
+ "confidence": resolution["confidence"],
482
+ "total_tokens": resolution["total_tokens"],
483
+ "mcp_token_overhead": MCP_OVERHEAD_PER_OP * max(len(ops_to_use), 1),
484
+ "per_operation": per_op,
485
+ }
486
+
487
+
488
+ def enrich_grades_file(grades_path: str, run_dir: str | None, project_dir: str | None) -> int:
489
+ """
490
+ Read GRADES.jsonl, add _tokenMeta to entries that lack it, rewrite file.
491
+
492
+ Returns count of entries enriched.
493
+ """
494
+ path = Path(grades_path)
495
+ if not path.is_file():
496
+ print(f"ERROR: Grades file not found: {grades_path}", file=sys.stderr)
497
+ sys.exit(1)
498
+
499
+ entries = []
500
+ with open(path, encoding="utf-8") as fh:
501
+ for raw in fh:
502
+ raw = raw.strip()
503
+ if not raw:
504
+ continue
505
+ try:
506
+ entries.append(json.loads(raw))
507
+ except json.JSONDecodeError as exc:
508
+ print(f" WARN: Skipping malformed line: {exc}", file=sys.stderr)
509
+
510
+ enriched_count = 0
511
+ updated = []
512
+ for entry in entries:
513
+ if "_tokenMeta" not in entry:
514
+ entry["_tokenMeta"] = _build_token_meta(entry, run_dir, project_dir)
515
+ enriched_count += 1
516
+ updated.append(entry)
517
+
518
+ # Rewrite file atomically (write to temp, then rename)
519
+ tmp_path = path.with_suffix(".jsonl.tmp")
520
+ with open(tmp_path, "w", encoding="utf-8") as fh:
521
+ for entry in updated:
522
+ fh.write(json.dumps(entry, separators=(",", ":")) + "\n")
523
+ tmp_path.replace(path)
524
+
525
+ return enriched_count
526
+
527
+
528
+ # ---------------------------------------------------------------------------
529
+ # Output formatting
530
+ # ---------------------------------------------------------------------------
531
+
532
+ def _fmt_tokens(value: int, confidence: str) -> str:
533
+ return f"{value:,} tokens ({confidence})"
534
+
535
+
536
+ def print_summary(summary: dict) -> None:
537
+ confidence = summary["confidence"]
538
+ total = summary["total_tokens"]
539
+ method = summary["method"]
540
+
541
+ print(f"\nToken Summary")
542
+ print("=" * 52)
543
+ print(f" Total: {_fmt_tokens(total, confidence)}")
544
+ print(f" Method: {method}")
545
+
546
+ by_domain = summary.get("by_domain", {})
547
+ if by_domain:
548
+ print(f"\n By Domain:")
549
+ for domain, info in sorted(by_domain.items()):
550
+ t = info["estimated_tokens"]
551
+ n = info["ops_count"]
552
+ print(f" {domain:<20} {_fmt_tokens(t, confidence)} ({n} op{'s' if n != 1 else ''})")
553
+
554
+ mcp_cli = summary.get("mcp_vs_cli", {})
555
+ if any(v["ops_count"] for v in mcp_cli.values()):
556
+ print(f"\n MCP vs CLI:")
557
+ for gw in ("mcp", "cli"):
558
+ if gw in mcp_cli:
559
+ t = mcp_cli[gw]["estimated_tokens"]
560
+ n = mcp_cli[gw]["ops_count"]
561
+ print(f" {gw.upper():<6} {_fmt_tokens(t, confidence)} ({n} op{'s' if n != 1 else ''})")
562
+
563
+ print(f"\n Note: {summary['note']}")
564
+
565
+
566
+ # ---------------------------------------------------------------------------
567
+ # CLI
568
+ # ---------------------------------------------------------------------------
569
+
570
+ def main() -> None:
571
+ parser = argparse.ArgumentParser(
572
+ description="Three-layer token estimation for ct-grade v3",
573
+ formatter_class=argparse.RawDescriptionHelpFormatter,
574
+ epilog=__doc__,
575
+ )
576
+ parser.add_argument(
577
+ "--run-dir",
578
+ default=None,
579
+ help="A/B run directory (enables Layer 2 chars/4 estimation)",
580
+ )
581
+ parser.add_argument(
582
+ "--project-dir",
583
+ default=None,
584
+ help="Project root for audit log / tasks.db (enables Layer 3 coarse estimation)",
585
+ )
586
+ parser.add_argument(
587
+ "--grades-file",
588
+ default=None,
589
+ help="GRADES.jsonl path — enrich each entry with _tokenMeta in-place",
590
+ )
591
+ parser.add_argument(
592
+ "--output",
593
+ default=None,
594
+ help="Output path for token-summary.json (default: <run-dir>/token-summary.json or ./token-summary.json)",
595
+ )
596
+ parser.add_argument(
597
+ "--session-id",
598
+ default=None,
599
+ help="Filter OTel data to a specific session ID",
600
+ )
601
+ args = parser.parse_args()
602
+
603
+ # Enrich GRADES.jsonl if requested
604
+ if args.grades_file:
605
+ count = enrich_grades_file(args.grades_file, args.run_dir, args.project_dir)
606
+ print(f"Enriched {count} GRADES.jsonl entr{'ies' if count != 1 else 'y'} with _tokenMeta")
607
+
608
+ # Always build and write the token-summary.json
609
+ summary = build_summary(
610
+ run_dir=args.run_dir,
611
+ project_dir=args.project_dir,
612
+ session_id=args.session_id,
613
+ )
614
+
615
+ if args.output:
616
+ output_path = args.output
617
+ elif args.run_dir and os.path.isdir(args.run_dir):
618
+ output_path = os.path.join(args.run_dir, "token-summary.json")
619
+ else:
620
+ output_path = "token-summary.json"
621
+
622
+ with open(output_path, "w", encoding="utf-8") as fh:
623
+ json.dump(summary, fh, indent=2)
624
+
625
+ print_summary(summary)
626
+ print(f"\nWritten: {output_path}")
627
+
628
+
629
+ if __name__ == "__main__":
630
+ main()