codex-coach 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex-plugin/plugin.json +1 -1
- package/README.md +1 -0
- package/package.json +1 -1
- package/pyproject.toml +1 -1
- package/src/codex_coach/__init__.py +1 -1
- package/src/codex_coach/cli.py +4 -1
- package/src/codex_coach/install.py +22 -14
- package/src/codex_coach/parser.py +82 -2
- package/src/codex_coach/reports.py +235 -3
package/README.md
CHANGED
|
@@ -82,6 +82,7 @@ Suggest custom instruction changes
|
|
|
82
82
|
- Project capsules: redacted per-project workflow summaries with suggested local instructions.
|
|
83
83
|
- Prompt rewrites: safe templates for vague prompts without storing full prompt text.
|
|
84
84
|
- Confidence-scored suggestions: low, medium, or high confidence improvement notes.
|
|
85
|
+
- Token efficiency: cached vs uncached input, output, routing, and context-budget recommendations.
|
|
85
86
|
- Beginner and expert report modes.
|
|
86
87
|
- Skill opportunities: repeated workflow patterns that may deserve a reusable Codex skill.
|
|
87
88
|
- Real-time prompt linting through `codex-coach lint-prompt`.
|
package/package.json
CHANGED
package/pyproject.toml
CHANGED
package/src/codex_coach/cli.py
CHANGED
|
@@ -166,8 +166,11 @@ def _doctor(paths) -> int:
|
|
|
166
166
|
print(f"command: {command} {'OK' if command.exists() else 'not installed'}")
|
|
167
167
|
plugin = paths.home / "plugins" / "codex-coach" / ".codex-plugin" / "plugin.json"
|
|
168
168
|
print(f"plugin: {plugin} {'OK' if plugin.exists() else 'not installed'}")
|
|
169
|
-
skill = paths.
|
|
169
|
+
skill = paths.codex_home / "skills" / "codex-coach" / "SKILL.md"
|
|
170
|
+
legacy_skill = paths.home / ".agents" / "skills" / "codex-coach" / "SKILL.md"
|
|
170
171
|
print(f"skill: {skill} {'OK' if skill.exists() else 'not installed'}")
|
|
172
|
+
if legacy_skill.exists():
|
|
173
|
+
print(f"legacy_skill: {legacy_skill} duplicate")
|
|
171
174
|
return 0 if paths.codex_home.exists() else 1
|
|
172
175
|
|
|
173
176
|
|
|
@@ -19,7 +19,7 @@ def install_from_source(source_root: Path, paths: CoachPaths, *, schedule: str =
|
|
|
19
19
|
_copy_app(source_root, paths.app_dir)
|
|
20
20
|
_install_command(paths)
|
|
21
21
|
plugin_path = _install_plugin(source_root, paths.home)
|
|
22
|
-
skill_paths = _install_user_skills(source_root, paths
|
|
22
|
+
skill_paths = _install_user_skills(source_root, paths)
|
|
23
23
|
marketplace = _update_marketplace(paths.home, plugin_path)
|
|
24
24
|
scheduler = _write_scheduler(paths, schedule=schedule)
|
|
25
25
|
return {
|
|
@@ -90,20 +90,28 @@ def _install_plugin(source_root: Path, home: Path) -> Path:
|
|
|
90
90
|
return plugin_root
|
|
91
91
|
|
|
92
92
|
|
|
93
|
-
def _install_user_skills(source_root: Path,
|
|
93
|
+
def _install_user_skills(source_root: Path, paths: CoachPaths) -> list[Path]:
|
|
94
94
|
skill_source = source_root / "skills" / PLUGIN_NAME
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
95
|
+
target = paths.codex_home / "skills" / PLUGIN_NAME
|
|
96
|
+
if target.exists():
|
|
97
|
+
shutil.rmtree(target)
|
|
98
|
+
target.parent.mkdir(parents=True, exist_ok=True)
|
|
99
|
+
shutil.copytree(skill_source, target)
|
|
100
|
+
_remove_legacy_duplicate_skill(paths.home, target)
|
|
101
|
+
return [target]
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _remove_legacy_duplicate_skill(home: Path, canonical_target: Path) -> None:
|
|
105
|
+
legacy_target = home / ".agents" / "skills" / PLUGIN_NAME
|
|
106
|
+
if legacy_target.resolve() == canonical_target.resolve() or not legacy_target.exists():
|
|
107
|
+
return
|
|
108
|
+
skill_file = legacy_target / "SKILL.md"
|
|
109
|
+
try:
|
|
110
|
+
skill_text = skill_file.read_text(encoding="utf-8")
|
|
111
|
+
except OSError:
|
|
112
|
+
return
|
|
113
|
+
if f"name: {PLUGIN_NAME}" in skill_text:
|
|
114
|
+
shutil.rmtree(legacy_target)
|
|
107
115
|
|
|
108
116
|
|
|
109
117
|
def _update_marketplace(home: Path, plugin_path: Path) -> Path:
|
|
@@ -58,9 +58,14 @@ class ScanAccumulator:
|
|
|
58
58
|
project_efforts: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
59
59
|
project_verification_tools: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
60
60
|
project_prompt_scores: dict[str, list[int]] = field(default_factory=lambda: defaultdict(list))
|
|
61
|
+
project_token_totals: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
61
62
|
prompt_scores: list[dict[str, Any]] = field(default_factory=list)
|
|
62
63
|
error_counts: Counter = field(default_factory=Counter)
|
|
63
64
|
verification_tools: Counter = field(default_factory=Counter)
|
|
65
|
+
token_totals: Counter = field(default_factory=Counter)
|
|
66
|
+
max_model_context_window: int = 0
|
|
67
|
+
max_last_input_tokens: int = 0
|
|
68
|
+
max_last_uncached_input_tokens: int = 0
|
|
64
69
|
compacted_sessions: set[str] = field(default_factory=set)
|
|
65
70
|
current_file_session: dict[str, str] = field(default_factory=dict)
|
|
66
71
|
|
|
@@ -90,6 +95,7 @@ class ScanAccumulator:
|
|
|
90
95
|
self.project_efforts[cwd],
|
|
91
96
|
self.project_prompt_scores[cwd],
|
|
92
97
|
verification_tool_calls,
|
|
98
|
+
self.project_token_totals[cwd],
|
|
93
99
|
)
|
|
94
100
|
)
|
|
95
101
|
projects.sort(key=lambda item: (item["user_messages"], item["tool_calls"], item["turns"]), reverse=True)
|
|
@@ -126,6 +132,13 @@ class ScanAccumulator:
|
|
|
126
132
|
"tools": dict(self.tool_counts.most_common()),
|
|
127
133
|
"verification_tools": dict(self.verification_tools.most_common()),
|
|
128
134
|
"errors": dict(self.error_counts.most_common()),
|
|
135
|
+
"token_efficiency": {
|
|
136
|
+
"status": "observed" if self.token_totals["token_count_events"] else "not_available",
|
|
137
|
+
"usage": _token_summary(self.token_totals, turns=self.totals["turns"]),
|
|
138
|
+
"max_model_context_window": self.max_model_context_window,
|
|
139
|
+
"max_last_input_tokens": self.max_last_input_tokens,
|
|
140
|
+
"max_last_uncached_input_tokens": self.max_last_uncached_input_tokens,
|
|
141
|
+
},
|
|
129
142
|
"prompt_quality": {
|
|
130
143
|
"average_score": avg_prompt,
|
|
131
144
|
"categories": dict(prompt_categories),
|
|
@@ -203,7 +216,7 @@ def _scan_file(path: Path, acc: ScanAccumulator, *, since_dt) -> None:
|
|
|
203
216
|
_handle_response_item(payload, acc, current_cwd)
|
|
204
217
|
continue
|
|
205
218
|
if event_type == "event_msg":
|
|
206
|
-
_handle_event_msg(payload, acc)
|
|
219
|
+
_handle_event_msg(payload, acc, current_cwd)
|
|
207
220
|
continue
|
|
208
221
|
if event_type == "compacted":
|
|
209
222
|
acc.totals["compactions"] += 1
|
|
@@ -305,7 +318,9 @@ def _handle_response_item(payload: dict[str, Any], acc: ScanAccumulator, cwd: st
|
|
|
305
318
|
acc.totals["reasoning_items"] += 1
|
|
306
319
|
|
|
307
320
|
|
|
308
|
-
def _handle_event_msg(payload: dict[str, Any], acc: ScanAccumulator) -> None:
|
|
321
|
+
def _handle_event_msg(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
|
|
322
|
+
if payload.get("type") == "token_count":
|
|
323
|
+
_handle_token_count(payload, acc, cwd)
|
|
309
324
|
message = _content_text(payload.get("message") or payload.get("text") or payload)
|
|
310
325
|
_count_errors(message, acc)
|
|
311
326
|
|
|
@@ -317,6 +332,7 @@ def _project_capsule(
|
|
|
317
332
|
efforts: Counter,
|
|
318
333
|
prompt_scores: list[int],
|
|
319
334
|
verification_tool_calls: int,
|
|
335
|
+
token_totals: Counter,
|
|
320
336
|
) -> dict[str, Any]:
|
|
321
337
|
prompt_average = round(sum(prompt_scores) / len(prompt_scores), 2) if prompt_scores else 0.0
|
|
322
338
|
workflow = _infer_workflow(tools, counts)
|
|
@@ -332,6 +348,7 @@ def _project_capsule(
|
|
|
332
348
|
"prompt_quality_average": prompt_average,
|
|
333
349
|
"top_tools": dict(tools.most_common(5)),
|
|
334
350
|
"effort_mix": dict(efforts.most_common()),
|
|
351
|
+
"token_usage": _token_summary(token_totals, turns=counts["turns"]),
|
|
335
352
|
"likely_workflow": workflow,
|
|
336
353
|
"recommended_instruction": instruction,
|
|
337
354
|
"skill_candidate": counts["turns"] >= 3 or counts["tool_calls"] >= 10,
|
|
@@ -382,6 +399,69 @@ def _content_text(value: Any) -> str:
|
|
|
382
399
|
return str(value)
|
|
383
400
|
|
|
384
401
|
|
|
402
|
+
def _handle_token_count(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
|
|
403
|
+
info = payload.get("info")
|
|
404
|
+
if not isinstance(info, dict):
|
|
405
|
+
return
|
|
406
|
+
usage = info.get("last_token_usage")
|
|
407
|
+
if not isinstance(usage, dict):
|
|
408
|
+
return
|
|
409
|
+
|
|
410
|
+
input_tokens = _int_token(usage.get("input_tokens"))
|
|
411
|
+
cached_input_tokens = _int_token(usage.get("cached_input_tokens"))
|
|
412
|
+
output_tokens = _int_token(usage.get("output_tokens"))
|
|
413
|
+
reasoning_output_tokens = _int_token(usage.get("reasoning_output_tokens"))
|
|
414
|
+
total_tokens = _int_token(usage.get("total_tokens"))
|
|
415
|
+
uncached_input_tokens = max(0, input_tokens - cached_input_tokens)
|
|
416
|
+
|
|
417
|
+
values = {
|
|
418
|
+
"token_count_events": 1,
|
|
419
|
+
"input_tokens": input_tokens,
|
|
420
|
+
"cached_input_tokens": cached_input_tokens,
|
|
421
|
+
"uncached_input_tokens": uncached_input_tokens,
|
|
422
|
+
"output_tokens": output_tokens,
|
|
423
|
+
"reasoning_output_tokens": reasoning_output_tokens,
|
|
424
|
+
"total_tokens": total_tokens,
|
|
425
|
+
}
|
|
426
|
+
acc.token_totals.update(values)
|
|
427
|
+
acc.project_token_totals[cwd].update(values)
|
|
428
|
+
|
|
429
|
+
acc.max_last_input_tokens = max(acc.max_last_input_tokens, input_tokens)
|
|
430
|
+
acc.max_last_uncached_input_tokens = max(acc.max_last_uncached_input_tokens, uncached_input_tokens)
|
|
431
|
+
context_window = _int_token(info.get("model_context_window"))
|
|
432
|
+
acc.max_model_context_window = max(acc.max_model_context_window, context_window)
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _token_summary(tokens: Counter, *, turns: int) -> dict[str, Any]:
|
|
436
|
+
input_tokens = int(tokens["input_tokens"])
|
|
437
|
+
cached_input_tokens = int(tokens["cached_input_tokens"])
|
|
438
|
+
uncached_input_tokens = int(tokens["uncached_input_tokens"])
|
|
439
|
+
output_tokens = int(tokens["output_tokens"])
|
|
440
|
+
total_tokens = int(tokens["total_tokens"])
|
|
441
|
+
turn_count = max(1, int(turns or 0))
|
|
442
|
+
return {
|
|
443
|
+
"events": int(tokens["token_count_events"]),
|
|
444
|
+
"input_tokens": input_tokens,
|
|
445
|
+
"cached_input_tokens": cached_input_tokens,
|
|
446
|
+
"uncached_input_tokens": uncached_input_tokens,
|
|
447
|
+
"output_tokens": output_tokens,
|
|
448
|
+
"reasoning_output_tokens": int(tokens["reasoning_output_tokens"]),
|
|
449
|
+
"total_tokens": total_tokens,
|
|
450
|
+
"cache_ratio": round(cached_input_tokens / input_tokens, 3) if input_tokens else 0.0,
|
|
451
|
+
"uncached_ratio": round(uncached_input_tokens / input_tokens, 3) if input_tokens else 0.0,
|
|
452
|
+
"input_tokens_per_turn": round(input_tokens / turn_count, 1) if input_tokens else 0.0,
|
|
453
|
+
"uncached_input_tokens_per_turn": round(uncached_input_tokens / turn_count, 1) if uncached_input_tokens else 0.0,
|
|
454
|
+
"output_tokens_per_turn": round(output_tokens / turn_count, 1) if output_tokens else 0.0,
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
|
|
458
|
+
def _int_token(value: Any) -> int:
|
|
459
|
+
try:
|
|
460
|
+
return max(0, int(value or 0))
|
|
461
|
+
except (TypeError, ValueError):
|
|
462
|
+
return 0
|
|
463
|
+
|
|
464
|
+
|
|
385
465
|
def _looks_like_verification(tool_name: str, arguments: str) -> bool:
|
|
386
466
|
text = f"{tool_name} {arguments}".lower()
|
|
387
467
|
return any(marker in text for marker in VERIFY_RE)
|
|
@@ -41,11 +41,16 @@ def render_markdown_report(
|
|
|
41
41
|
f"- Compactions: {totals.get('compactions', 0)}",
|
|
42
42
|
f"- Prompt quality average: {prompt_quality.get('average_score', 0)}/10",
|
|
43
43
|
"",
|
|
44
|
+
"Plain English: this is a private local report about how Codex was used, where the sessions got expensive or repetitive, and what small instruction changes may improve the next run.",
|
|
45
|
+
"",
|
|
44
46
|
"## Top Coaching Notes",
|
|
45
47
|
"",
|
|
46
48
|
]
|
|
47
49
|
lines.extend(_coaching_notes(suggestions, limit=5 if expert else 3))
|
|
50
|
+
lines.extend(_token_efficiency_lines(facts, expert=expert))
|
|
48
51
|
lines.extend(["", "## Project Mix", ""])
|
|
52
|
+
lines.append("Plain English: these are the projects where Codex spent the most work in this window. High tool-call counts usually mean implementation, diagnosis, or verification-heavy sessions.")
|
|
53
|
+
lines.append("")
|
|
49
54
|
projects = facts.get("projects", [])[:8]
|
|
50
55
|
if projects:
|
|
51
56
|
lines.append("| Project | Sessions | Turns | User Messages | Tool Calls | Verification |")
|
|
@@ -60,6 +65,8 @@ def render_markdown_report(
|
|
|
60
65
|
lines.append("No project activity found.")
|
|
61
66
|
|
|
62
67
|
lines.extend(["", "## Project Capsules", ""])
|
|
68
|
+
lines.append("Plain English: a project capsule is a tiny memory card for a repo. Add one to that repo's `AGENTS.md` when Codex keeps needing to rediscover the same workflow.")
|
|
69
|
+
lines.append("")
|
|
63
70
|
capsules = facts.get("project_capsules", [])[:5]
|
|
64
71
|
if capsules:
|
|
65
72
|
for capsule in capsules:
|
|
@@ -70,6 +77,8 @@ def render_markdown_report(
|
|
|
70
77
|
lines.extend(_instruction_playbook_lines(facts.get("instruction_audit", {}), expert=expert))
|
|
71
78
|
|
|
72
79
|
lines.extend(["", "## Prompt Quality", ""])
|
|
80
|
+
lines.append("Plain English: short prompts are fine when the context is obvious. When Codex guesses wrong, add the target, symptom, and success state.")
|
|
81
|
+
lines.append("")
|
|
73
82
|
categories = prompt_quality.get("categories", {})
|
|
74
83
|
if categories:
|
|
75
84
|
for name in ("excellent", "good", "needs_work"):
|
|
@@ -88,8 +97,10 @@ def render_markdown_report(
|
|
|
88
97
|
)
|
|
89
98
|
|
|
90
99
|
lines.extend(["", "## Suggested Improvements", ""])
|
|
100
|
+
lines.append("Review these before pasting anything. Use global custom instructions for personal habits that should apply everywhere; use a project `AGENTS.md` for repo-specific commands, stack rules, or verification steps.")
|
|
101
|
+
lines.append("")
|
|
91
102
|
for suggestion in suggestions:
|
|
92
|
-
lines.
|
|
103
|
+
lines.extend(_suggestion_lines(suggestion))
|
|
93
104
|
|
|
94
105
|
skill_opportunities = build_skill_opportunities(facts)
|
|
95
106
|
if skill_opportunities:
|
|
@@ -141,6 +152,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
141
152
|
"title": "Right-size reasoning effort",
|
|
142
153
|
"confidence": _confidence(ratio, high=0.75, medium=0.6),
|
|
143
154
|
"body": "High reasoning dominates recent turns. Default simple status, search, and small edit tasks to medium; reserve high/xhigh for ambiguous debugging, architecture, security, or broad refactors.",
|
|
155
|
+
"paste_target": "Global custom instructions",
|
|
156
|
+
"suggested_text": "Use medium effort for routine status checks, targeted searches, formatting, small edits, and deterministic reports. Escalate to high or xhigh only for ambiguous debugging, architecture decisions, security review, broad refactors, or production-risk changes.",
|
|
144
157
|
}
|
|
145
158
|
)
|
|
146
159
|
|
|
@@ -154,6 +167,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
154
167
|
"title": "Verify before calling work done",
|
|
155
168
|
"confidence": "high" if ratio < 0.08 and tool_calls >= 20 else "medium",
|
|
156
169
|
"body": "Verification commands are a small share of tool use. Ask Codex to run the smallest meaningful test, build, lint, browser check, or runtime probe before final status.",
|
|
170
|
+
"paste_target": "Project AGENTS.md",
|
|
171
|
+
"suggested_text": "Before final status, run the smallest meaningful verification for the change: a focused test, build, lint/typecheck, browser check, or runtime probe. If verification cannot run, say exactly why and what risk remains.",
|
|
157
172
|
}
|
|
158
173
|
)
|
|
159
174
|
|
|
@@ -165,6 +180,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
165
180
|
"title": "Checkpoint long runs",
|
|
166
181
|
"confidence": "high" if compactions >= 3 else "medium",
|
|
167
182
|
"body": "Compactions appeared in the window. For long tasks, ask Codex to keep a small task ledger and validate durable files before resuming.",
|
|
183
|
+
"paste_target": "Global custom instructions or project AGENTS.md",
|
|
184
|
+
"suggested_text": "For long tasks, keep a short task ledger with completed, in-progress, and pending steps. After compaction or interruption, verify the current file state and last successful command before continuing.",
|
|
168
185
|
}
|
|
169
186
|
)
|
|
170
187
|
|
|
@@ -180,6 +197,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
180
197
|
"title": "Tighten ambiguous prompts",
|
|
181
198
|
"confidence": _confidence(ratio, high=0.15, medium=0.08),
|
|
182
199
|
"body": "A noticeable share of prompts are too short to identify the target. Include action, file/project, symptom, and success state when context is not obvious.",
|
|
200
|
+
"paste_target": "Global custom instructions",
|
|
201
|
+
"suggested_text": "When my prompt is vague, infer the likely task from the current repo and recent context. If the target or success state is still unclear, ask one concise question before making broad changes.",
|
|
183
202
|
}
|
|
184
203
|
)
|
|
185
204
|
|
|
@@ -191,6 +210,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
191
210
|
"title": "Use project capsules",
|
|
192
211
|
"confidence": "high" if len(projects) >= 6 else "medium",
|
|
193
212
|
"body": "Recent work spans several projects. Keep a short per-project AGENTS or context note so Codex does not rebuild project intent every time.",
|
|
213
|
+
"paste_target": "Each active project's AGENTS.md",
|
|
214
|
+
"suggested_text": "## Project Capsule\n- Purpose: <what this repo is for>\n- Stack: <main frameworks, runtime, package manager>\n- Entry points: <key files or commands>\n- Verify: <smallest reliable test/build/check>\n- Avoid: <repo-specific traps or risky commands>",
|
|
194
215
|
}
|
|
195
216
|
)
|
|
196
217
|
|
|
@@ -201,9 +222,13 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
201
222
|
"title": "Turn repeated workflows into skills",
|
|
202
223
|
"confidence": "medium",
|
|
203
224
|
"body": "At least one project shows repeated tool patterns. Consider a small user skill with the workflow steps, validation commands, and resume rules.",
|
|
225
|
+
"paste_target": "A Codex skill `SKILL.md` or project AGENTS.md",
|
|
226
|
+
"suggested_text": "Use this workflow when <trigger>. First read <specific files>. Then perform <steps>. Verify with <commands>. If interrupted, resume by checking <durable artifact or command output>.",
|
|
204
227
|
}
|
|
205
228
|
)
|
|
206
229
|
|
|
230
|
+
suggestions.extend(build_token_suggestions(facts))
|
|
231
|
+
|
|
207
232
|
instruction_audit = facts.get("instruction_audit", {})
|
|
208
233
|
instruction_findings = instruction_audit.get("findings", []) if isinstance(instruction_audit, dict) else []
|
|
209
234
|
if instruction_findings:
|
|
@@ -214,6 +239,8 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
214
239
|
"title": "Review instruction playbook",
|
|
215
240
|
"confidence": "high" if high else "medium",
|
|
216
241
|
"body": "Instruction files have review findings. Check for stale mode locks, project-specific global rules, missing AGENTS.md coverage, or secrets before changing user instructions.",
|
|
242
|
+
"paste_target": "Instruction review checklist",
|
|
243
|
+
"suggested_text": "Keep global instructions limited to durable personal preferences. Move repo-specific stack, commands, UI style, and deployment rules into that repo's AGENTS.md. Never store tokens, passwords, or API keys in instruction files.",
|
|
217
244
|
}
|
|
218
245
|
)
|
|
219
246
|
|
|
@@ -224,11 +251,83 @@ def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
|
224
251
|
"title": "Keep the current loop",
|
|
225
252
|
"confidence": "medium",
|
|
226
253
|
"body": "No strong coaching warnings stood out. Keep using explicit success states and ask for verification on user-facing or production-sensitive work.",
|
|
254
|
+
"paste_target": "Global custom instructions",
|
|
255
|
+
"suggested_text": "For user-facing or production-sensitive changes, finish with a short verification note that names the command or check that passed and any remaining risk.",
|
|
227
256
|
}
|
|
228
257
|
)
|
|
229
258
|
return suggestions
|
|
230
259
|
|
|
231
260
|
|
|
261
|
+
def build_token_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
262
|
+
token_efficiency = facts.get("token_efficiency", {})
|
|
263
|
+
if not isinstance(token_efficiency, dict) or token_efficiency.get("status") != "observed":
|
|
264
|
+
return []
|
|
265
|
+
|
|
266
|
+
usage = token_efficiency.get("usage", {})
|
|
267
|
+
if not isinstance(usage, dict):
|
|
268
|
+
return []
|
|
269
|
+
|
|
270
|
+
suggestions: list[dict[str, str]] = []
|
|
271
|
+
input_tokens = int(usage.get("input_tokens", 0) or 0)
|
|
272
|
+
uncached_tokens = int(usage.get("uncached_input_tokens", 0) or 0)
|
|
273
|
+
output_tokens = int(usage.get("output_tokens", 0) or 0)
|
|
274
|
+
cache_ratio = float(usage.get("cache_ratio", 0.0) or 0.0)
|
|
275
|
+
uncached_per_turn = float(usage.get("uncached_input_tokens_per_turn", 0.0) or 0.0)
|
|
276
|
+
turns = max(1, int((facts.get("totals") or {}).get("turns", 0) or 0))
|
|
277
|
+
efforts = facts.get("efforts", {})
|
|
278
|
+
high_effort = sum(int(efforts.get(name, 0) or 0) for name in ("high", "xhigh"))
|
|
279
|
+
|
|
280
|
+
if input_tokens >= 100_000 and cache_ratio >= 0.75:
|
|
281
|
+
suggestions.append(
|
|
282
|
+
{
|
|
283
|
+
"id": "use-compact-context-artifacts",
|
|
284
|
+
"title": "Use compact context artifacts",
|
|
285
|
+
"confidence": _confidence(cache_ratio, high=0.85, medium=0.75),
|
|
286
|
+
"body": "Most input is repeated cached context. Keep short project capsules, latest facts, and resume notes so routine coaching can start from compact artifacts instead of full history.",
|
|
287
|
+
"paste_target": "Global custom instructions",
|
|
288
|
+
"suggested_text": "Before re-reading a large repo or long history, first check existing summaries, reports, AGENTS.md, and recent task notes. Use those compact artifacts to choose the smallest next context to inspect.",
|
|
289
|
+
}
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
if uncached_tokens >= 50_000 or uncached_per_turn >= 12_000:
|
|
293
|
+
suggestions.append(
|
|
294
|
+
{
|
|
295
|
+
"id": "cap-uncached-context",
|
|
296
|
+
"title": "Cap uncached context",
|
|
297
|
+
"confidence": "high" if uncached_per_turn >= 20_000 else "medium",
|
|
298
|
+
"body": "Uncached input is the expensive part. Ask Codex to read one likely file first, summarize before widening, and prefer targeted searches over broad file dumps.",
|
|
299
|
+
"paste_target": "Global custom instructions",
|
|
300
|
+
"suggested_text": "Before broad exploration, identify the likely bottleneck and inspect the one most relevant file or targeted search result first. Widen only after explaining what is still unknown.",
|
|
301
|
+
}
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
if high_effort and high_effort / turns >= 0.25:
|
|
305
|
+
suggestions.append(
|
|
306
|
+
{
|
|
307
|
+
"id": "route-routine-work-to-mini",
|
|
308
|
+
"title": "Route routine work to mini or medium",
|
|
309
|
+
"confidence": "high" if high_effort / turns >= 0.5 else "medium",
|
|
310
|
+
"body": "High effort appears often enough to merit routing. Use mini/medium for scan, report, grep, formatting, and deterministic edits; escalate only for ambiguous debugging, architecture, security, and risky decisions.",
|
|
311
|
+
"paste_target": "Global custom instructions",
|
|
312
|
+
"suggested_text": "Prefer cheaper routine routing: use mini or medium reasoning for scanning, reports, greps, formatting, and deterministic small edits. Escalate only when the task needs judgment, tradeoff analysis, or high-risk debugging.",
|
|
313
|
+
}
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
if output_tokens >= 20_000 and input_tokens and output_tokens / input_tokens >= 0.08:
|
|
317
|
+
suggestions.append(
|
|
318
|
+
{
|
|
319
|
+
"id": "request-concise-outputs",
|
|
320
|
+
"title": "Request concise outputs",
|
|
321
|
+
"confidence": "medium",
|
|
322
|
+
"body": "Output tokens are a visible part of spend. Ask for summaries first and detailed evidence only when deciding or reviewing.",
|
|
323
|
+
"paste_target": "Global custom instructions",
|
|
324
|
+
"suggested_text": "Default to concise final answers: say what changed, how it was verified, and any risk. Include detailed logs or long evidence only when asked or when needed for a decision.",
|
|
325
|
+
}
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
return suggestions
|
|
329
|
+
|
|
330
|
+
|
|
232
331
|
def build_skill_opportunities(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
233
332
|
opportunities: list[dict[str, str]] = []
|
|
234
333
|
for capsule in facts.get("project_capsules", []):
|
|
@@ -301,12 +400,124 @@ def _coaching_notes(suggestions: list[dict[str, str]], *, limit: int) -> list[st
|
|
|
301
400
|
return [f"- [{item['confidence']}] {item['title']}: {item['body']}" for item in suggestions[:limit]]
|
|
302
401
|
|
|
303
402
|
|
|
403
|
+
def _suggestion_lines(suggestion: dict[str, Any]) -> list[str]:
|
|
404
|
+
lines = [
|
|
405
|
+
f"### {suggestion['title']}",
|
|
406
|
+
"",
|
|
407
|
+
f"- Confidence: {suggestion['confidence']}",
|
|
408
|
+
f"- Why: {suggestion['body']}",
|
|
409
|
+
]
|
|
410
|
+
paste_target = suggestion.get("paste_target")
|
|
411
|
+
suggested_text = suggestion.get("suggested_text")
|
|
412
|
+
if paste_target and suggested_text:
|
|
413
|
+
lines.extend(
|
|
414
|
+
[
|
|
415
|
+
f"- Paste into: {paste_target}",
|
|
416
|
+
"",
|
|
417
|
+
"```md",
|
|
418
|
+
str(suggested_text),
|
|
419
|
+
"```",
|
|
420
|
+
]
|
|
421
|
+
)
|
|
422
|
+
lines.append("")
|
|
423
|
+
return lines
|
|
424
|
+
|
|
425
|
+
|
|
426
|
+
def _token_efficiency_lines(facts: dict[str, Any], *, expert: bool) -> list[str]:
|
|
427
|
+
lines = ["", "## Token Efficiency", ""]
|
|
428
|
+
token_efficiency = facts.get("token_efficiency", {})
|
|
429
|
+
if not isinstance(token_efficiency, dict) or token_efficiency.get("status") != "observed":
|
|
430
|
+
lines.append("No token usage events were found in this window.")
|
|
431
|
+
return lines
|
|
432
|
+
|
|
433
|
+
usage = token_efficiency.get("usage", {})
|
|
434
|
+
if not isinstance(usage, dict):
|
|
435
|
+
lines.append("No token usage events were found in this window.")
|
|
436
|
+
return lines
|
|
437
|
+
|
|
438
|
+
input_tokens = int(usage.get("input_tokens", 0) or 0)
|
|
439
|
+
cached_tokens = int(usage.get("cached_input_tokens", 0) or 0)
|
|
440
|
+
uncached_tokens = int(usage.get("uncached_input_tokens", 0) or 0)
|
|
441
|
+
output_tokens = int(usage.get("output_tokens", 0) or 0)
|
|
442
|
+
reasoning_tokens = int(usage.get("reasoning_output_tokens", 0) or 0)
|
|
443
|
+
total_tokens = int(usage.get("total_tokens", 0) or 0)
|
|
444
|
+
cache_ratio = float(usage.get("cache_ratio", 0.0) or 0.0)
|
|
445
|
+
uncached_ratio = float(usage.get("uncached_ratio", 0.0) or 0.0)
|
|
446
|
+
|
|
447
|
+
lines.append("Plain English: cached input is repeated context Codex could reuse more cheaply. Uncached input is new context, and that is usually where the biggest savings are.")
|
|
448
|
+
lines.append("")
|
|
449
|
+
lines.append(
|
|
450
|
+
f"- Input: {_fmt_int(input_tokens)} "
|
|
451
|
+
f"({_fmt_int(cached_tokens)} cached, {_fmt_int(uncached_tokens)} uncached)"
|
|
452
|
+
)
|
|
453
|
+
lines.append(f"- Output: {_fmt_int(output_tokens)} ({_fmt_int(reasoning_tokens)} reasoning)")
|
|
454
|
+
lines.append(f"- Total: {_fmt_int(total_tokens)} across {_fmt_int(int(usage.get('events', 0) or 0))} token events")
|
|
455
|
+
lines.append(f"- Cache ratio: {cache_ratio:.1%}; uncached ratio: {uncached_ratio:.1%}")
|
|
456
|
+
lines.append(
|
|
457
|
+
f"- Per turn: {_fmt_float(usage.get('input_tokens_per_turn'))} input, "
|
|
458
|
+
f"{_fmt_float(usage.get('uncached_input_tokens_per_turn'))} uncached"
|
|
459
|
+
)
|
|
460
|
+
|
|
461
|
+
max_last_input = int(token_efficiency.get("max_last_input_tokens", 0) or 0)
|
|
462
|
+
max_last_uncached = int(token_efficiency.get("max_last_uncached_input_tokens", 0) or 0)
|
|
463
|
+
context_window = int(token_efficiency.get("max_model_context_window", 0) or 0)
|
|
464
|
+
if max_last_input:
|
|
465
|
+
context_note = f" of {_fmt_int(context_window)}" if context_window else ""
|
|
466
|
+
lines.append(
|
|
467
|
+
f"- Largest step: {_fmt_int(max_last_input)} input tokens{context_note}; "
|
|
468
|
+
f"{_fmt_int(max_last_uncached)} uncached"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
token_suggestions = build_token_suggestions(facts)
|
|
472
|
+
if token_suggestions:
|
|
473
|
+
lines.extend(["", "Token-saving moves:"])
|
|
474
|
+
for item in token_suggestions[: 5 if expert else 3]:
|
|
475
|
+
lines.append(f"- [{item['confidence']}] {item['title']}: {item['body']}")
|
|
476
|
+
if item.get("suggested_text"):
|
|
477
|
+
lines.extend(
|
|
478
|
+
[
|
|
479
|
+
f" Paste into: {item.get('paste_target', 'instructions')}",
|
|
480
|
+
"",
|
|
481
|
+
"```md",
|
|
482
|
+
str(item["suggested_text"]),
|
|
483
|
+
"```",
|
|
484
|
+
"",
|
|
485
|
+
]
|
|
486
|
+
)
|
|
487
|
+
else:
|
|
488
|
+
lines.extend(
|
|
489
|
+
[
|
|
490
|
+
"",
|
|
491
|
+
"Token-saving moves:",
|
|
492
|
+
"- No strong token-efficiency warning stood out. Keep routing routine work to cheaper models and reserve high effort for judgment-heavy turns.",
|
|
493
|
+
]
|
|
494
|
+
)
|
|
495
|
+
|
|
496
|
+
if expert:
|
|
497
|
+
capsules = [item for item in facts.get("project_capsules", []) if isinstance(item, dict)]
|
|
498
|
+
token_capsules = [item for item in capsules if (item.get("token_usage") or {}).get("input_tokens")]
|
|
499
|
+
if token_capsules:
|
|
500
|
+
token_capsules.sort(key=lambda item: int((item.get("token_usage") or {}).get("input_tokens", 0)), reverse=True)
|
|
501
|
+
lines.extend(["", "Top token projects:"])
|
|
502
|
+
for item in token_capsules[:5]:
|
|
503
|
+
token_usage = item.get("token_usage") or {}
|
|
504
|
+
lines.append(
|
|
505
|
+
f"- `{item.get('project')}`: {_fmt_int(int(token_usage.get('input_tokens', 0) or 0))} input, "
|
|
506
|
+
f"{_fmt_int(int(token_usage.get('uncached_input_tokens', 0) or 0))} uncached, "
|
|
507
|
+
f"{token_usage.get('cache_ratio', 0):.1%} cached"
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
return lines
|
|
511
|
+
|
|
512
|
+
|
|
304
513
|
def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bool) -> list[str]:
|
|
305
514
|
lines = ["", "## Instruction Playbook", ""]
|
|
306
515
|
if not isinstance(instruction_audit, dict) or not instruction_audit:
|
|
307
516
|
lines.append("No instruction audit was generated.")
|
|
308
517
|
return lines
|
|
309
518
|
|
|
519
|
+
lines.append("Plain English: this checks whether your global custom instructions and project `AGENTS.md` files are helping Codex, getting stale, or leaking project-specific rules into every repo.")
|
|
520
|
+
lines.append("")
|
|
310
521
|
lines.append(f"- Status: {instruction_audit.get('status', 'unknown')}")
|
|
311
522
|
lines.append(f"- Files reviewed: {instruction_audit.get('files_reviewed', 0)}")
|
|
312
523
|
lines.append(f"- Findings: {len(instruction_audit.get('findings', []))}")
|
|
@@ -325,9 +536,13 @@ def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bo
|
|
|
325
536
|
|
|
326
537
|
suggestions = [item for item in instruction_audit.get("suggestions", []) if isinstance(item, dict)]
|
|
327
538
|
if suggestions:
|
|
328
|
-
lines.extend(["", "Suggested playbook changes:"])
|
|
539
|
+
lines.extend(["", "Suggested playbook changes with pasteable examples:"])
|
|
329
540
|
for item in suggestions[: 8 if expert else 4]:
|
|
330
541
|
lines.append(f"- [{item.get('confidence', 'medium')}] {item.get('title')}: {item.get('body')}")
|
|
542
|
+
lines.append(f" Paste into: `{item.get('target', 'instruction file')}`")
|
|
543
|
+
suggested_text = str(item.get("suggested_text") or "").strip()
|
|
544
|
+
if suggested_text:
|
|
545
|
+
lines.extend(["", "```md", suggested_text, "```", ""])
|
|
331
546
|
|
|
332
547
|
if expert:
|
|
333
548
|
files = [item for item in instruction_audit.get("files", []) if isinstance(item, dict)]
|
|
@@ -343,6 +558,8 @@ def _instruction_playbook_lines(instruction_audit: dict[str, Any], *, expert: bo
|
|
|
343
558
|
|
|
344
559
|
|
|
345
560
|
def _render_suggestion_patch(suggestion: dict[str, str]) -> str:
|
|
561
|
+
paste_target = suggestion.get("paste_target", "custom instructions or project AGENTS.md")
|
|
562
|
+
suggested_text = suggestion.get("suggested_text") or f"- {suggestion['body']}"
|
|
346
563
|
return "\n".join(
|
|
347
564
|
[
|
|
348
565
|
f"# Suggested Codex Instruction Change: {suggestion['title']}",
|
|
@@ -357,8 +574,10 @@ def _render_suggestion_patch(suggestion: dict[str, str]) -> str:
|
|
|
357
574
|
"",
|
|
358
575
|
"## Suggested Text",
|
|
359
576
|
"",
|
|
577
|
+
f"Paste into: {paste_target}",
|
|
578
|
+
"",
|
|
360
579
|
"```md",
|
|
361
|
-
|
|
580
|
+
suggested_text,
|
|
362
581
|
"```",
|
|
363
582
|
"",
|
|
364
583
|
"## Rollback",
|
|
@@ -389,6 +608,8 @@ def _render_instruction_suggestion_patch(suggestion: dict[str, Any]) -> str:
|
|
|
389
608
|
"",
|
|
390
609
|
"## Suggested Text",
|
|
391
610
|
"",
|
|
611
|
+
f"Paste into: {suggestion.get('target', 'instruction file')}",
|
|
612
|
+
"",
|
|
392
613
|
"```md",
|
|
393
614
|
suggested_text,
|
|
394
615
|
"```",
|
|
@@ -407,3 +628,14 @@ def _confidence(value: float, *, high: float, medium: float) -> str:
|
|
|
407
628
|
if value >= medium:
|
|
408
629
|
return "medium"
|
|
409
630
|
return "low"
|
|
631
|
+
|
|
632
|
+
|
|
633
|
+
def _fmt_int(value: int) -> str:
|
|
634
|
+
return f"{int(value):,}"
|
|
635
|
+
|
|
636
|
+
|
|
637
|
+
def _fmt_float(value: Any) -> str:
|
|
638
|
+
try:
|
|
639
|
+
return f"{float(value):,.1f}"
|
|
640
|
+
except (TypeError, ValueError):
|
|
641
|
+
return "0.0"
|