codex-coach 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codex-coach",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Local-first Codex usage coach that analyzes logs and suggests workflow improvements.",
5
5
  "author": {
6
6
  "name": "Codex Coach Contributors"
package/README.md CHANGED
@@ -82,6 +82,7 @@ Suggest custom instruction changes
82
82
  - Project capsules: redacted per-project workflow summaries with suggested local instructions.
83
83
  - Prompt rewrites: safe templates for vague prompts without storing full prompt text.
84
84
  - Confidence-scored suggestions: low, medium, or high confidence improvement notes.
85
+ - Token efficiency: cached vs uncached input, output, routing, and context-budget recommendations.
85
86
  - Beginner and expert report modes.
86
87
  - Skill opportunities: repeated workflow patterns that may deserve a reusable Codex skill.
87
88
  - Real-time prompt linting through `codex-coach lint-prompt`.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "codex-coach",
3
- "version": "0.1.3",
3
+ "version": "0.1.5",
4
4
  "description": "Local-first Codex usage coach and plugin",
5
5
  "license": "MIT",
6
6
  "author": "Codex Coach Contributors",
package/pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "codex-coach"
7
- version = "0.1.3"
7
+ version = "0.1.5"
8
8
  description = "Local-first Codex usage coach and plugin"
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
@@ -1,3 +1,3 @@
1
1
  """Local-first Codex usage coach."""
2
2
 
3
- __version__ = "0.1.0"
3
+ __version__ = "0.1.5"
@@ -166,8 +166,11 @@ def _doctor(paths) -> int:
166
166
  print(f"command: {command} {'OK' if command.exists() else 'not installed'}")
167
167
  plugin = paths.home / "plugins" / "codex-coach" / ".codex-plugin" / "plugin.json"
168
168
  print(f"plugin: {plugin} {'OK' if plugin.exists() else 'not installed'}")
169
- skill = paths.home / ".agents" / "skills" / "codex-coach" / "SKILL.md"
169
+ skill = paths.codex_home / "skills" / "codex-coach" / "SKILL.md"
170
+ legacy_skill = paths.home / ".agents" / "skills" / "codex-coach" / "SKILL.md"
170
171
  print(f"skill: {skill} {'OK' if skill.exists() else 'not installed'}")
172
+ if legacy_skill.exists():
173
+ print(f"legacy_skill: {legacy_skill} duplicate")
171
174
  return 0 if paths.codex_home.exists() else 1
172
175
 
173
176
 
@@ -19,7 +19,7 @@ def install_from_source(source_root: Path, paths: CoachPaths, *, schedule: str =
19
19
  _copy_app(source_root, paths.app_dir)
20
20
  _install_command(paths)
21
21
  plugin_path = _install_plugin(source_root, paths.home)
22
- skill_paths = _install_user_skills(source_root, paths.home)
22
+ skill_paths = _install_user_skills(source_root, paths)
23
23
  marketplace = _update_marketplace(paths.home, plugin_path)
24
24
  scheduler = _write_scheduler(paths, schedule=schedule)
25
25
  return {
@@ -90,20 +90,28 @@ def _install_plugin(source_root: Path, home: Path) -> Path:
90
90
  return plugin_root
91
91
 
92
92
 
93
- def _install_user_skills(source_root: Path, home: Path) -> list[Path]:
93
+ def _install_user_skills(source_root: Path, paths: CoachPaths) -> list[Path]:
94
94
  skill_source = source_root / "skills" / PLUGIN_NAME
95
- targets = [
96
- home / ".agents" / "skills" / PLUGIN_NAME,
97
- home / ".codex" / "skills" / PLUGIN_NAME,
98
- ]
99
- installed: list[Path] = []
100
- for target in targets:
101
- if target.exists():
102
- shutil.rmtree(target)
103
- target.parent.mkdir(parents=True, exist_ok=True)
104
- shutil.copytree(skill_source, target)
105
- installed.append(target)
106
- return installed
95
+ target = paths.codex_home / "skills" / PLUGIN_NAME
96
+ if target.exists():
97
+ shutil.rmtree(target)
98
+ target.parent.mkdir(parents=True, exist_ok=True)
99
+ shutil.copytree(skill_source, target)
100
+ _remove_legacy_duplicate_skill(paths.home, target)
101
+ return [target]
102
+
103
+
104
+ def _remove_legacy_duplicate_skill(home: Path, canonical_target: Path) -> None:
105
+ legacy_target = home / ".agents" / "skills" / PLUGIN_NAME
106
+ if legacy_target.resolve() == canonical_target.resolve() or not legacy_target.exists():
107
+ return
108
+ skill_file = legacy_target / "SKILL.md"
109
+ try:
110
+ skill_text = skill_file.read_text(encoding="utf-8")
111
+ except OSError:
112
+ return
113
+ if f"name: {PLUGIN_NAME}" in skill_text:
114
+ shutil.rmtree(legacy_target)
107
115
 
108
116
 
109
117
  def _update_marketplace(home: Path, plugin_path: Path) -> Path:
@@ -58,9 +58,14 @@ class ScanAccumulator:
58
58
  project_efforts: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
59
59
  project_verification_tools: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
60
60
  project_prompt_scores: dict[str, list[int]] = field(default_factory=lambda: defaultdict(list))
61
+ project_token_totals: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
61
62
  prompt_scores: list[dict[str, Any]] = field(default_factory=list)
62
63
  error_counts: Counter = field(default_factory=Counter)
63
64
  verification_tools: Counter = field(default_factory=Counter)
65
+ token_totals: Counter = field(default_factory=Counter)
66
+ max_model_context_window: int = 0
67
+ max_last_input_tokens: int = 0
68
+ max_last_uncached_input_tokens: int = 0
64
69
  compacted_sessions: set[str] = field(default_factory=set)
65
70
  current_file_session: dict[str, str] = field(default_factory=dict)
66
71
 
@@ -90,6 +95,7 @@ class ScanAccumulator:
90
95
  self.project_efforts[cwd],
91
96
  self.project_prompt_scores[cwd],
92
97
  verification_tool_calls,
98
+ self.project_token_totals[cwd],
93
99
  )
94
100
  )
95
101
  projects.sort(key=lambda item: (item["user_messages"], item["tool_calls"], item["turns"]), reverse=True)
@@ -126,6 +132,13 @@ class ScanAccumulator:
126
132
  "tools": dict(self.tool_counts.most_common()),
127
133
  "verification_tools": dict(self.verification_tools.most_common()),
128
134
  "errors": dict(self.error_counts.most_common()),
135
+ "token_efficiency": {
136
+ "status": "observed" if self.token_totals["token_count_events"] else "not_available",
137
+ "usage": _token_summary(self.token_totals, turns=self.totals["turns"]),
138
+ "max_model_context_window": self.max_model_context_window,
139
+ "max_last_input_tokens": self.max_last_input_tokens,
140
+ "max_last_uncached_input_tokens": self.max_last_uncached_input_tokens,
141
+ },
129
142
  "prompt_quality": {
130
143
  "average_score": avg_prompt,
131
144
  "categories": dict(prompt_categories),
@@ -203,7 +216,7 @@ def _scan_file(path: Path, acc: ScanAccumulator, *, since_dt) -> None:
203
216
  _handle_response_item(payload, acc, current_cwd)
204
217
  continue
205
218
  if event_type == "event_msg":
206
- _handle_event_msg(payload, acc)
219
+ _handle_event_msg(payload, acc, current_cwd)
207
220
  continue
208
221
  if event_type == "compacted":
209
222
  acc.totals["compactions"] += 1
@@ -305,7 +318,9 @@ def _handle_response_item(payload: dict[str, Any], acc: ScanAccumulator, cwd: st
305
318
  acc.totals["reasoning_items"] += 1
306
319
 
307
320
 
308
- def _handle_event_msg(payload: dict[str, Any], acc: ScanAccumulator) -> None:
321
+ def _handle_event_msg(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
322
+ if payload.get("type") == "token_count":
323
+ _handle_token_count(payload, acc, cwd)
309
324
  message = _content_text(payload.get("message") or payload.get("text") or payload)
310
325
  _count_errors(message, acc)
311
326
 
@@ -317,6 +332,7 @@ def _project_capsule(
317
332
  efforts: Counter,
318
333
  prompt_scores: list[int],
319
334
  verification_tool_calls: int,
335
+ token_totals: Counter,
320
336
  ) -> dict[str, Any]:
321
337
  prompt_average = round(sum(prompt_scores) / len(prompt_scores), 2) if prompt_scores else 0.0
322
338
  workflow = _infer_workflow(tools, counts)
@@ -332,6 +348,7 @@ def _project_capsule(
332
348
  "prompt_quality_average": prompt_average,
333
349
  "top_tools": dict(tools.most_common(5)),
334
350
  "effort_mix": dict(efforts.most_common()),
351
+ "token_usage": _token_summary(token_totals, turns=counts["turns"]),
335
352
  "likely_workflow": workflow,
336
353
  "recommended_instruction": instruction,
337
354
  "skill_candidate": counts["turns"] >= 3 or counts["tool_calls"] >= 10,
@@ -382,6 +399,69 @@ def _content_text(value: Any) -> str:
382
399
  return str(value)
383
400
 
384
401
 
402
+ def _handle_token_count(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
403
+ info = payload.get("info")
404
+ if not isinstance(info, dict):
405
+ return
406
+ usage = info.get("last_token_usage")
407
+ if not isinstance(usage, dict):
408
+ return
409
+
410
+ input_tokens = _int_token(usage.get("input_tokens"))
411
+ cached_input_tokens = _int_token(usage.get("cached_input_tokens"))
412
+ output_tokens = _int_token(usage.get("output_tokens"))
413
+ reasoning_output_tokens = _int_token(usage.get("reasoning_output_tokens"))
414
+ total_tokens = _int_token(usage.get("total_tokens"))
415
+ uncached_input_tokens = max(0, input_tokens - cached_input_tokens)
416
+
417
+ values = {
418
+ "token_count_events": 1,
419
+ "input_tokens": input_tokens,
420
+ "cached_input_tokens": cached_input_tokens,
421
+ "uncached_input_tokens": uncached_input_tokens,
422
+ "output_tokens": output_tokens,
423
+ "reasoning_output_tokens": reasoning_output_tokens,
424
+ "total_tokens": total_tokens,
425
+ }
426
+ acc.token_totals.update(values)
427
+ acc.project_token_totals[cwd].update(values)
428
+
429
+ acc.max_last_input_tokens = max(acc.max_last_input_tokens, input_tokens)
430
+ acc.max_last_uncached_input_tokens = max(acc.max_last_uncached_input_tokens, uncached_input_tokens)
431
+ context_window = _int_token(info.get("model_context_window"))
432
+ acc.max_model_context_window = max(acc.max_model_context_window, context_window)
433
+
434
+
435
+ def _token_summary(tokens: Counter, *, turns: int) -> dict[str, Any]:
436
+ input_tokens = int(tokens["input_tokens"])
437
+ cached_input_tokens = int(tokens["cached_input_tokens"])
438
+ uncached_input_tokens = int(tokens["uncached_input_tokens"])
439
+ output_tokens = int(tokens["output_tokens"])
440
+ total_tokens = int(tokens["total_tokens"])
441
+ turn_count = max(1, int(turns or 0))
442
+ return {
443
+ "events": int(tokens["token_count_events"]),
444
+ "input_tokens": input_tokens,
445
+ "cached_input_tokens": cached_input_tokens,
446
+ "uncached_input_tokens": uncached_input_tokens,
447
+ "output_tokens": output_tokens,
448
+ "reasoning_output_tokens": int(tokens["reasoning_output_tokens"]),
449
+ "total_tokens": total_tokens,
450
+ "cache_ratio": round(cached_input_tokens / input_tokens, 3) if input_tokens else 0.0,
451
+ "uncached_ratio": round(uncached_input_tokens / input_tokens, 3) if input_tokens else 0.0,
452
+ "input_tokens_per_turn": round(input_tokens / turn_count, 1) if input_tokens else 0.0,
453
+ "uncached_input_tokens_per_turn": round(uncached_input_tokens / turn_count, 1) if uncached_input_tokens else 0.0,
454
+ "output_tokens_per_turn": round(output_tokens / turn_count, 1) if output_tokens else 0.0,
455
+ }
456
+
457
+
458
+ def _int_token(value: Any) -> int:
459
+ try:
460
+ return max(0, int(value or 0))
461
+ except (TypeError, ValueError):
462
+ return 0
463
+
464
+
385
465
  def _looks_like_verification(tool_name: str, arguments: str) -> bool:
386
466
  text = f"{tool_name} {arguments}".lower()
387
467
  return any(marker in text for marker in VERIFY_RE)
@@ -41,11 +41,16 @@ def render_markdown_report(
41
41
  f"- Compactions: {totals.get('compactions', 0)}",
42
42
  f"- Prompt quality average: {prompt_quality.get('average_score', 0)}/10",
43
43
  "",
44
+ "Plain English: this is a private local report about how Codex was used, where the sessions got expensive or repetitive, and what small instruction changes may improve the next run.",
45
+ "",
44
46
  "## Top Coaching Notes",
45
47
  "",
46
48
  ]
47
49
  lines.extend(_coaching_notes(suggestions, limit=5 if expert else 3))
50
+ lines.extend(_token_efficiency_lines(facts, expert=expert))
48
51
  lines.extend(["", "## Project Mix", ""])
52
+ lines.append("Plain English: these are the projects where Codex spent the most work in this window. High tool-call counts usually mean implementation, diagnosis, or verification-heavy sessions.")
53
+ lines.append("")
49
54
  projects = facts.get("projects", [])[:8]
50
55
  if projects:
51
56
  lines.append("| Project | Sessions | Turns | User Messages | Tool Calls | Verification |")
@@ -60,6 +65,8 @@ def render_markdown_report(
60
65
  lines.append("No project activity found.")
61
66
 
62
67
  lines.extend(["", "## Project Capsules", ""])
68
+ lines.append("Plain English: a project capsule is a tiny memory card for a repo. Add one to that repo's `AGENTS.md` when Codex keeps needing to rediscover the same workflow.")
69
+ lines.append("")
63
70
  capsules = facts.get("project_capsules", [])[:5]
64
71
  if capsules:
65
72
  for capsule in capsules:
@@ -70,6 +77,8 @@ def render_markdown_report(
70
77
  lines.extend(_instruction_playbook_lines(facts.get("instruction_audit", {}), expert=expert))
71
78
 
72
79
  lines.extend(["", "## Prompt Quality", ""])
80
+ lines.append("Plain English: short prompts are fine when the context is obvious. When Codex guesses wrong, add the target, symptom, and success state.")
81
+ lines.append("")
73
82
  categories = prompt_quality.get("categories", {})
74
83
  if categories:
75
84
  for name in ("excellent", "good", "needs_work"):
@@ -88,8 +97,10 @@ def render_markdown_report(
88
97
  )
89
98
 
90
99
  lines.extend(["", "## Suggested Improvements", ""])
100
+ lines.append("Review these before pasting anything. Use global custom instructions for personal habits that should apply everywhere; use a project `AGENTS.md` for repo-specific commands, stack rules, or verification steps.")
101
+ lines.append("")
91
102
  for suggestion in suggestions:
92
- lines.append(f"- [{suggestion['confidence']}] {suggestion['title']}: {suggestion['body']}")
103
+ lines.extend(_suggestion_lines(suggestion))
93
104
 
94
105
  skill_opportunities = build_skill_opportunities(facts)
95
106
  if skill_opportunities:
@@ -141,6 +152,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
141
152
  "title": "Right-size reasoning effort",
142
153
  "confidence": _confidence(ratio, high=0.75, medium=0.6),
143
154
  "body": "High reasoning dominates recent turns. Default simple status, search, and small edit tasks to medium; reserve high/xhigh for ambiguous debugging, architecture, security, or broad refactors.",
155
+ "paste_target": "Global custom instructions",
156
+ "suggested_text": "Use medium effort for routine status checks, targeted searches, formatting, small edits, and deterministic reports. Escalate to high or xhigh only for ambiguous debugging, architecture decisions, security review, broad refactors, or production-risk changes.",
144
157
  }
145
158
  )
146
159
 
@@ -154,6 +167,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
154
167
  "title": "Verify before calling work done",
155
168
  "confidence": "high" if ratio < 0.08 and tool_calls >= 20 else "medium",
156
169
  "body": "Verification commands are a small share of tool use. Ask Codex to run the smallest meaningful test, build, lint, browser check, or runtime probe before final status.",
170
+ "paste_target": "Project AGENTS.md",
171
+ "suggested_text": "Before final status, run the smallest meaningful verification for the change: a focused test, build, lint/typecheck, browser check, or runtime probe. If verification cannot run, say exactly why and what risk remains.",
157
172
  }
158
173
  )
159
174
 
@@ -165,6 +180,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
165
180
  "title": "Checkpoint long runs",
166
181
  "confidence": "high" if compactions >= 3 else "medium",
167
182
  "body": "Compactions appeared in the window. For long tasks, ask Codex to keep a small task ledger and validate durable files before resuming.",
183
+ "paste_target": "Global custom instructions or project AGENTS.md",
184
+ "suggested_text": "For long tasks, keep a short task ledger with completed, in-progress, and pending steps. After compaction or interruption, verify the current file state and last successful command before continuing.",
168
185
  }
169
186
  )
170
187
 
@@ -180,6 +197,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
180
197
  "title": "Tighten ambiguous prompts",
181
198
  "confidence": _confidence(ratio, high=0.15, medium=0.08),
182
199
  "body": "A noticeable share of prompts are too short to identify the target. Include action, file/project, symptom, and success state when context is not obvious.",
200
+ "paste_target": "Global custom instructions",
201
+ "suggested_text": "When my prompt is vague, infer the likely task from the current repo and recent context. If the target or success state is still unclear, ask one concise question before making broad changes.",
183
202
  }
184
203
  )
185
204
 
@@ -191,6 +210,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
191
210
  "title": "Use project capsules",
192
211
  "confidence": "high" if len(projects) >= 6 else "medium",
193
212
  "body": "Recent work spans several projects. Keep a short per-project AGENTS or context note so Codex does not rebuild project intent every time.",
213
+ "paste_target": "Each active project's AGENTS.md",
214
+ "suggested_text": "## Project Capsule\n- Purpose: <what this repo is for>\n- Stack: <main frameworks, runtime, package manager>\n- Entry points: <key files or commands>\n- Verify: <smallest reliable test/build/check>\n- Avoid: <repo-specific traps or risky commands>",
194
215
  }
195
216
  )
196
217
 
@@ -201,9 +222,13 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
201
222
  "title": "Turn repeated workflows into skills",
202
223
  "confidence": "medium",
203
224
  "body": "At least one project shows repeated tool patterns. Consider a small user skill with the workflow steps, validation commands, and resume rules.",
225
+ "paste_target": "A Codex skill `SKILL.md` or project AGENTS.md",
226
+ "suggested_text": "Use this workflow when <trigger>. First read <specific files>. Then perform <steps>. Verify with <commands>. If interrupted, resume by checking <durable artifact or command output>.",
204
227
  }
205
228
  )
206
229
 
230
+ suggestions.extend(build_token_suggestions(facts))
231
+
207
232
  instruction_audit = facts.get("instruction_audit", {})
208
233
  instruction_findings = instruction_audit.get("findings", []) if isinstance(instruction_audit, dict) else []
209
234
  if instruction_findings:
@@ -214,6 +239,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
214
239
  "title": "Review instruction playbook",
215
240
  "confidence": "high" if high else "medium",
216
241
  "body": "Instruction files have review findings. Check for stale mode locks, project-specific global rules, missing AGENTS.md coverage, or secrets before changing user instructions.",
242
+ "paste_target": "Instruction review checklist",
243
+ "suggested_text": "Keep global instructions limited to durable personal preferences. Move repo-specific stack, commands, UI style, and deployment rules into that repo's AGENTS.md. Never store tokens, passwords, or API keys in instruction files.",
217
244
  }
218
245
  )
219
246
 
@@ -224,11 +251,83 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
224
251
  "title": "Keep the current loop",
225
252
  "confidence": "medium",
226
253
  "body": "No strong coaching warnings stood out. Keep using explicit success states and ask for verification on user-facing or production-sensitive work.",
254
+ "paste_target": "Global custom instructions",
255
+ "suggested_text": "For user-facing or production-sensitive changes, finish with a short verification note that names the command or check that passed and any remaining risk.",
227
256
  }
228
257
  )
229
258
  return suggestions
230
259
 
231
260
 
261
+ def build_token_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
262
+ token_efficiency = facts.get("token_efficiency", {})
263
+ if not isinstance(token_efficiency, dict) or token_efficiency.get("status") != "observed":
264
+ return []
265
+
266
+ usage = token_efficiency.get("usage", {})
267
+ if not isinstance(usage, dict):
268
+ return []
269
+
270
+ suggestions: list[dict[str, str]] = []
271
+ input_tokens = int(usage.get("input_tokens", 0) or 0)
272
+ uncached_tokens = int(usage.get("uncached_input_tokens", 0) or 0)
273
+ output_tokens = int(usage.get("output_tokens", 0) or 0)
274
+ cache_ratio = float(usage.get("cache_ratio", 0.0) or 0.0)
275
+ uncached_per_turn = float(usage.get("uncached_input_tokens_per_turn", 0.0) or 0.0)
276
+ turns = max(1, int((facts.get("totals") or {}).get("turns", 0) or 0))
277
+ efforts = facts.get("efforts", {})
278
+ high_effort = sum(int(efforts.get(name, 0) or 0) for name in ("high", "xhigh"))
279
+
280
+ if input_tokens >= 100_000 and cache_ratio >= 0.75:
281
+ suggestions.append(
282
+ {
283
+ "id": "use-compact-context-artifacts",
284
+ "title": "Use compact context artifacts",
285
+ "confidence": _confidence(cache_ratio, high=0.85, medium=0.75),
286
+ "body": "Most input is repeated cached context. Keep short project capsules, latest facts, and resume notes so routine coaching can start from compact artifacts instead of full history.",
287
+ "paste_target": "Global custom instructions",
288
+ "suggested_text": "Before re-reading a large repo or long history, first check existing summaries, reports, AGENTS.md, and recent task notes. Use those compact artifacts to choose the smallest next context to inspect.",
289
+ }
290
+ )
291
+
292
+ if uncached_tokens >= 50_000 or uncached_per_turn >= 12_000:
293
+ suggestions.append(
294
+ {
295
+ "id": "cap-uncached-context",
296
+ "title": "Cap uncached context",
297
+ "confidence": "high" if uncached_per_turn >= 20_000 else "medium",
298
+ "body": "Uncached input is the expensive part. Ask Codex to read one likely file first, summarize before widening, and prefer targeted searches over broad file dumps.",
299
+ "paste_target": "Global custom instructions",
300
+ "suggested_text": "Before broad exploration, identify the likely bottleneck and inspect the one most relevant file or targeted search result first. Widen only after explaining what is still unknown.",
301
+ }
302
+ )
303
+
304
+ if high_effort and high_effort / turns >= 0.25:
305
+ suggestions.append(
306
+ {
307
+ "id": "route-routine-work-to-mini",
308
+ "title": "Route routine work to mini or medium",
309
+ "confidence": "high" if high_effort / turns >= 0.5 else "medium",
310
+ "body": "High effort appears often enough to merit routing. Use mini/medium for scan, report, grep, formatting, and deterministic edits; escalate only for ambiguous debugging, architecture, security, and risky decisions.",
311
+ "paste_target": "Global custom instructions",
312
+ "suggested_text": "Prefer cheaper routine routing: use mini or medium reasoning for scanning, reports, greps, formatting, and deterministic small edits. Escalate only when the task needs judgment, tradeoff analysis, or high-risk debugging.",
313
+ }
314
+ )
315
+
316
+ if output_tokens >= 20_000 and input_tokens and output_tokens / input_tokens >= 0.08:
317
+ suggestions.append(
318
+ {
319
+ "id": "request-concise-outputs",
320
+ "title": "Request concise outputs",
321
+ "confidence": "medium",
322
+ "body": "Output tokens are a visible part of spend. Ask for summaries first and detailed evidence only when deciding or reviewing.",
323
+ "paste_target": "Global custom instructions",
324
+ "suggested_text": "Default to concise final answers: say what changed, how it was verified, and any risk. Include detailed logs or long evidence only when asked or when needed for a decision.",
325
+ }
326
+ )
327
+
328
+ return suggestions
329
+
330
+
232
331
  def build_skill_opportunities(facts: dict[str, Any]) -> list[dict[str, str]]:
233
332
  opportunities: list[dict[str, str]] = []
234
333
  for capsule in facts.get("project_capsules", []):
@@ -301,12 +400,124 @@ def _coaching_notes(suggestions: list[dict[str, str]], *, limit: int) -> list[st
301
400
  return [f"- [{item['confidence']}] {item['title']}: {item['body']}" for item in suggestions[:limit]]
302
401
 
303
402
 
403
+ def _suggestion_lines(suggestion: dict[str, Any]) -> list[str]:
404
+ lines = [
405
+ f"### {suggestion['title']}",
406
+ "",
407
+ f"- Confidence: {suggestion['confidence']}",
408
+ f"- Why: {suggestion['body']}",
409
+ ]
410
+ paste_target = suggestion.get("paste_target")
411
+ suggested_text = suggestion.get("suggested_text")
412
+ if paste_target and suggested_text:
413
+ lines.extend(
414
+ [
415
+ f"- Paste into: {paste_target}",
416
+ "",
417
+ "```md",
418
+ str(suggested_text),
419
+ "```",
420
+ ]
421
+ )
422
+ lines.append("")
423
+ return lines
424
+
425
+
426
+ def _token_efficiency_lines(facts: dict[str, Any], *, expert: bool) -> list[str]:
427
+ lines = ["", "## Token Efficiency", ""]
428
+ token_efficiency = facts.get("token_efficiency", {})
429
+ if not isinstance(token_efficiency, dict) or token_efficiency.get("status") != "observed":
430
+ lines.append("No token usage events were found in this window.")
431
+ return lines
432
+
433
+ usage = token_efficiency.get("usage", {})
434
+ if not isinstance(usage, dict):
435
+ lines.append("No token usage events were found in this window.")
436
+ return lines
437
+
438
+ input_tokens = int(usage.get("input_tokens", 0) or 0)
439
+ cached_tokens = int(usage.get("cached_input_tokens", 0) or 0)
440
+ uncached_tokens = int(usage.get("uncached_input_tokens", 0) or 0)
441
+ output_tokens = int(usage.get("output_tokens", 0) or 0)
442
+ reasoning_tokens = int(usage.get("reasoning_output_tokens", 0) or 0)
443
+ total_tokens = int(usage.get("total_tokens", 0) or 0)
444
+ cache_ratio = float(usage.get("cache_ratio", 0.0) or 0.0)
445
+ uncached_ratio = float(usage.get("uncached_ratio", 0.0) or 0.0)
446
+
447
+ lines.append("Plain English: cached input is repeated context Codex could reuse more cheaply. Uncached input is new context, and that is usually where the biggest savings are.")
448
+ lines.append("")
449
+ lines.append(
450
+ f"- Input: {_fmt_int(input_tokens)} "
451
+ f"({_fmt_int(cached_tokens)} cached, {_fmt_int(uncached_tokens)} uncached)"
452
+ )
453
+ lines.append(f"- Output: {_fmt_int(output_tokens)} ({_fmt_int(reasoning_tokens)} reasoning)")
454
+ lines.append(f"- Total: {_fmt_int(total_tokens)} across {_fmt_int(int(usage.get('events', 0) or 0))} token events")
455
+ lines.append(f"- Cache ratio: {cache_ratio:.1%}; uncached ratio: {uncached_ratio:.1%}")
456
+ lines.append(
457
+ f"- Per turn: {_fmt_float(usage.get('input_tokens_per_turn'))} input, "
458
+ f"{_fmt_float(usage.get('uncached_input_tokens_per_turn'))} uncached"
459
+ )
460
+
461
+ max_last_input = int(token_efficiency.get("max_last_input_tokens", 0) or 0)
462
+ max_last_uncached = int(token_efficiency.get("max_last_uncached_input_tokens", 0) or 0)
463
+ context_window = int(token_efficiency.get("max_model_context_window", 0) or 0)
464
+ if max_last_input:
465
+ context_note = f" of {_fmt_int(context_window)}" if context_window else ""
466
+ lines.append(
467
+ f"- Largest step: {_fmt_int(max_last_input)} input tokens{context_note}; "
468
+ f"{_fmt_int(max_last_uncached)} uncached"
469
+ )
470
+
471
+ token_suggestions = build_token_suggestions(facts)
472
+ if token_suggestions:
473
+ lines.extend(["", "Token-saving moves:"])
474
+ for item in token_suggestions[: 5 if expert else 3]:
475
+ lines.append(f"- [{item['confidence']}] {item['title']}: {item['body']}")
476
+ if item.get("suggested_text"):
477
+ lines.extend(
478
+ [
479
+ f" Paste into: {item.get('paste_target', 'instructions')}",
480
+ "",
481
+ "```md",
482
+ str(item["suggested_text"]),
483
+ "```",
484
+ "",
485
+ ]
486
+ )
487
+ else:
488
+ lines.extend(
489
+ [
490
+ "",
491
+ "Token-saving moves:",
492
+ "- No strong token-efficiency warning stood out. Keep routing routine work to cheaper models and reserve high effort for judgment-heavy turns.",
493
+ ]
494
+ )
495
+
496
+ if expert:
497
+ capsules = [item for item in facts.get("project_capsules", []) if isinstance(item, dict)]
498
+ token_capsules = [item for item in capsules if (item.get("token_usage") or {}).get("input_tokens")]
499
+ if token_capsules:
500
+ token_capsules.sort(key=lambda item: int((item.get("token_usage") or {}).get("input_tokens", 0)), reverse=True)
501
+ lines.extend(["", "Top token projects:"])
502
+ for item in token_capsules[:5]:
503
+ token_usage = item.get("token_usage") or {}
504
+ lines.append(
505
+ f"- `{item.get('project')}`: {_fmt_int(int(token_usage.get('input_tokens', 0) or 0))} input, "
506
+ f"{_fmt_int(int(token_usage.get('uncached_input_tokens', 0) or 0))} uncached, "
507
+ f"{token_usage.get('cache_ratio', 0):.1%} cached"
508
+ )
509
+
510
+ return lines
511
+
512
+
304
513
  def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bool) -> list[str]:
305
514
  lines = ["", "## Instruction Playbook", ""]
306
515
  if not isinstance(instruction_audit, dict) or not instruction_audit:
307
516
  lines.append("No instruction audit was generated.")
308
517
  return lines
309
518
 
519
+ lines.append("Plain English: this checks whether your global custom instructions and project `AGENTS.md` files are helping Codex, getting stale, or leaking project-specific rules into every repo.")
520
+ lines.append("")
310
521
  lines.append(f"- Status: {instruction_audit.get('status', 'unknown')}")
311
522
  lines.append(f"- Files reviewed: {instruction_audit.get('files_reviewed', 0)}")
312
523
  lines.append(f"- Findings: {len(instruction_audit.get('findings', []))}")
@@ -325,9 +536,13 @@ def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bo
325
536
 
326
537
  suggestions = [item for item in instruction_audit.get("suggestions", []) if isinstance(item, dict)]
327
538
  if suggestions:
328
- lines.extend(["", "Suggested playbook changes:"])
539
+ lines.extend(["", "Suggested playbook changes with pasteable examples:"])
329
540
  for item in suggestions[: 8 if expert else 4]:
330
541
  lines.append(f"- [{item.get('confidence', 'medium')}] {item.get('title')}: {item.get('body')}")
542
+ lines.append(f" Paste into: `{item.get('target', 'instruction file')}`")
543
+ suggested_text = str(item.get("suggested_text") or "").strip()
544
+ if suggested_text:
545
+ lines.extend(["", "```md", suggested_text, "```", ""])
331
546
 
332
547
  if expert:
333
548
  files = [item for item in instruction_audit.get("files", []) if isinstance(item, dict)]
@@ -343,6 +558,8 @@ def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bo
343
558
 
344
559
 
345
560
  def _render_suggestion_patch(suggestion: dict[str, str]) -> str:
561
+ paste_target = suggestion.get("paste_target", "custom instructions or project AGENTS.md")
562
+ suggested_text = suggestion.get("suggested_text") or f"- {suggestion['body']}"
346
563
  return "\n".join(
347
564
  [
348
565
  f"# Suggested Codex Instruction Change: {suggestion['title']}",
@@ -357,8 +574,10 @@ def _render_suggestion_patch(suggestion: dict[str, str]) -> str:
357
574
  "",
358
575
  "## Suggested Text",
359
576
  "",
577
+ f"Paste into: {paste_target}",
578
+ "",
360
579
  "```md",
361
- f"- {suggestion['body']}",
580
+ suggested_text,
362
581
  "```",
363
582
  "",
364
583
  "## Rollback",
@@ -389,6 +608,8 @@ def _render_instruction_suggestion_patch(suggestion: dict[str, Any]) -> str:
389
608
  "",
390
609
  "## Suggested Text",
391
610
  "",
611
+ f"Paste into: {suggestion.get('target', 'instruction file')}",
612
+ "",
392
613
  "```md",
393
614
  suggested_text,
394
615
  "```",
@@ -407,3 +628,14 @@ def _confidence(value: float, *, high: float, medium: float) -> str:
407
628
  if value >= medium:
408
629
  return "medium"
409
630
  return "low"
631
+
632
+
633
+ def _fmt_int(value: int) -> str:
634
+ return f"{int(value):,}"
635
+
636
+
637
+ def _fmt_float(value: Any) -> str:
638
+ try:
639
+ return f"{float(value):,.1f}"
640
+ except (TypeError, ValueError):
641
+ return "0.0"