codex-coach 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex-plugin/plugin.json +25 -0
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/assets/brand/codex-coach-icon.png +0 -0
- package/assets/brand/codex-coach-icon.svg +49 -0
- package/assets/brand/codex-coach-logo.png +0 -0
- package/assets/brand/codex-coach-logo.svg +62 -0
- package/assets/examples/how-it-works.png +0 -0
- package/assets/examples/how-it-works.svg +71 -0
- package/assets/examples/project-capsules.png +0 -0
- package/assets/examples/project-capsules.svg +59 -0
- package/assets/examples/prompt-lint.png +0 -0
- package/assets/examples/prompt-lint.svg +54 -0
- package/bin/codex-coach.js +53 -0
- package/install.ps1 +18 -0
- package/install.sh +50 -0
- package/package.json +38 -0
- package/pyproject.toml +31 -0
- package/skills/codex-coach/SKILL.md +71 -0
- package/skills/codex-coach/agents/openai.yaml +8 -0
- package/skills/codex-coach/references/config-suggestions.md +35 -0
- package/skills/codex-coach/references/privacy.md +25 -0
- package/skills/codex-coach/references/prompt-rubric.md +21 -0
- package/src/codex_coach/__init__.py +3 -0
- package/src/codex_coach/cli.py +146 -0
- package/src/codex_coach/install.py +230 -0
- package/src/codex_coach/parser.py +395 -0
- package/src/codex_coach/paths.py +49 -0
- package/src/codex_coach/prompts.py +151 -0
- package/src/codex_coach/redaction.py +35 -0
- package/src/codex_coach/reports.py +284 -0
- package/src/codex_coach/timeutil.py +49 -0
|
@@ -0,0 +1,284 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from datetime import UTC, datetime
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def write_json_facts(facts: dict[str, Any], path: Path) -> None:
|
|
10
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
11
|
+
path.write_text(json.dumps(facts, indent=2, sort_keys=True) + "\n", encoding="utf-8")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def render_markdown_report(
|
|
15
|
+
facts: dict[str, Any],
|
|
16
|
+
*,
|
|
17
|
+
generated_at: datetime | None = None,
|
|
18
|
+
expert: bool = False,
|
|
19
|
+
mode: str = "beginner",
|
|
20
|
+
) -> str:
|
|
21
|
+
generated_at = generated_at or datetime.now(UTC)
|
|
22
|
+
expert = expert or mode == "expert"
|
|
23
|
+
totals = facts.get("totals", {})
|
|
24
|
+
prompt_quality = facts.get("prompt_quality", {})
|
|
25
|
+
suggestions = build_suggestions(facts)
|
|
26
|
+
lines: list[str] = [
|
|
27
|
+
"# Codex Coach Report",
|
|
28
|
+
"",
|
|
29
|
+
f"Generated: {generated_at.isoformat(timespec='seconds')}",
|
|
30
|
+
f"Window: {facts.get('since') or 'all available local logs'}",
|
|
31
|
+
f"Mode: {'expert' if expert else 'beginner'}",
|
|
32
|
+
"",
|
|
33
|
+
"## Summary",
|
|
34
|
+
"",
|
|
35
|
+
f"- Sessions: {totals.get('sessions', 0)}",
|
|
36
|
+
f"- Turns: {totals.get('turns', 0)}",
|
|
37
|
+
f"- User messages: {totals.get('user_messages', 0)}",
|
|
38
|
+
f"- Tool calls: {totals.get('tool_calls', 0)}",
|
|
39
|
+
f"- Verification tool calls: {totals.get('verification_tool_calls', 0)}",
|
|
40
|
+
f"- Errors detected: {totals.get('errors', 0)}",
|
|
41
|
+
f"- Compactions: {totals.get('compactions', 0)}",
|
|
42
|
+
f"- Prompt quality average: {prompt_quality.get('average_score', 0)}/10",
|
|
43
|
+
"",
|
|
44
|
+
"## Top Coaching Notes",
|
|
45
|
+
"",
|
|
46
|
+
]
|
|
47
|
+
lines.extend(_coaching_notes(suggestions, limit=5 if expert else 3))
|
|
48
|
+
lines.extend(["", "## Project Mix", ""])
|
|
49
|
+
projects = facts.get("projects", [])[:8]
|
|
50
|
+
if projects:
|
|
51
|
+
lines.append("| Project | Sessions | Turns | User Messages | Tool Calls | Verification |")
|
|
52
|
+
lines.append("|---|---:|---:|---:|---:|---:|")
|
|
53
|
+
for project in projects:
|
|
54
|
+
lines.append(
|
|
55
|
+
f"| `{project.get('cwd')}` | {project.get('sessions', 0)} | {project.get('turns', 0)} | "
|
|
56
|
+
f"{project.get('user_messages', 0)} | {project.get('tool_calls', 0)} | "
|
|
57
|
+
f"{project.get('verification_tool_calls', 0)} |"
|
|
58
|
+
)
|
|
59
|
+
else:
|
|
60
|
+
lines.append("No project activity found.")
|
|
61
|
+
|
|
62
|
+
lines.extend(["", "## Project Capsules", ""])
|
|
63
|
+
capsules = facts.get("project_capsules", [])[:5]
|
|
64
|
+
if capsules:
|
|
65
|
+
for capsule in capsules:
|
|
66
|
+
lines.append(f"- `{capsule.get('project')}`: {capsule.get('likely_workflow')}. {capsule.get('recommended_instruction')}")
|
|
67
|
+
else:
|
|
68
|
+
lines.append("No project capsules available.")
|
|
69
|
+
|
|
70
|
+
lines.extend(["", "## Prompt Quality", ""])
|
|
71
|
+
categories = prompt_quality.get("categories", {})
|
|
72
|
+
if categories:
|
|
73
|
+
for name in ("excellent", "good", "needs_work"):
|
|
74
|
+
lines.append(f"- {name.replace('_', ' ').title()}: {categories.get(name, 0)}")
|
|
75
|
+
else:
|
|
76
|
+
lines.append("- No user prompts found.")
|
|
77
|
+
|
|
78
|
+
needs_work = prompt_quality.get("examples_needing_work", [])
|
|
79
|
+
if needs_work:
|
|
80
|
+
lines.extend(["", "Prompt rewrites to try, shown with redacted previews:"])
|
|
81
|
+
for item in needs_work[:5]:
|
|
82
|
+
missing = ", ".join(item.get("missing", [])) or "none"
|
|
83
|
+
lines.append(
|
|
84
|
+
f"- Score {item.get('score')}/10, missing {missing}: `{item.get('preview')}` -> "
|
|
85
|
+
f"{item.get('rewrite')}"
|
|
86
|
+
)
|
|
87
|
+
|
|
88
|
+
lines.extend(["", "## Suggested Improvements", ""])
|
|
89
|
+
for suggestion in suggestions:
|
|
90
|
+
lines.append(f"- [{suggestion['confidence']}] {suggestion['title']}: {suggestion['body']}")
|
|
91
|
+
|
|
92
|
+
skill_opportunities = build_skill_opportunities(facts)
|
|
93
|
+
if skill_opportunities:
|
|
94
|
+
lines.extend(["", "## Skill Opportunities", ""])
|
|
95
|
+
for item in skill_opportunities[:5]:
|
|
96
|
+
lines.append(f"- `{item['project']}`: {item['title']} - {item['body']}")
|
|
97
|
+
|
|
98
|
+
if expert:
|
|
99
|
+
lines.extend(["", "## Expert Metrics", ""])
|
|
100
|
+
lines.append(f"- Models: `{facts.get('models', {})}`")
|
|
101
|
+
lines.append(f"- Efforts: `{facts.get('efforts', {})}`")
|
|
102
|
+
lines.append(f"- Tools: `{facts.get('tools', {})}`")
|
|
103
|
+
lines.append(f"- Verification tools: `{facts.get('verification_tools', {})}`")
|
|
104
|
+
lines.append(f"- Errors: `{facts.get('errors', {})}`")
|
|
105
|
+
|
|
106
|
+
lines.extend(
|
|
107
|
+
[
|
|
108
|
+
"",
|
|
109
|
+
"## Privacy",
|
|
110
|
+
"",
|
|
111
|
+
"This report is generated from local Codex logs. Full prompt bodies and source code are not included by default.",
|
|
112
|
+
"",
|
|
113
|
+
]
|
|
114
|
+
)
|
|
115
|
+
return "\n".join(lines)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def write_markdown_report(report: str, reports_dir: Path, *, generated_at: datetime | None = None) -> tuple[Path, Path]:
|
|
119
|
+
generated_at = generated_at or datetime.now(UTC)
|
|
120
|
+
reports_dir.mkdir(parents=True, exist_ok=True)
|
|
121
|
+
latest = reports_dir / "latest.md"
|
|
122
|
+
weekly = reports_dir / f"weekly-{generated_at.date().isoformat()}.md"
|
|
123
|
+
latest.write_text(report, encoding="utf-8")
|
|
124
|
+
weekly.write_text(report, encoding="utf-8")
|
|
125
|
+
return latest, weekly
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def build_suggestions(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
129
|
+
totals = facts.get("totals", {})
|
|
130
|
+
suggestions: list[dict[str, str]] = []
|
|
131
|
+
efforts = facts.get("efforts", {})
|
|
132
|
+
turns = max(1, int(totals.get("turns", 0)))
|
|
133
|
+
high_effort = sum(int(efforts.get(name, 0)) for name in ("high", "xhigh"))
|
|
134
|
+
if high_effort / turns >= 0.6 and turns >= 5:
|
|
135
|
+
ratio = high_effort / turns
|
|
136
|
+
suggestions.append(
|
|
137
|
+
{
|
|
138
|
+
"id": "right-size-reasoning",
|
|
139
|
+
"title": "Right-size reasoning effort",
|
|
140
|
+
"confidence": _confidence(ratio, high=0.75, medium=0.6),
|
|
141
|
+
"body": "High reasoning dominates recent turns. Default simple status, search, and small edit tasks to medium; reserve high/xhigh for ambiguous debugging, architecture, security, or broad refactors.",
|
|
142
|
+
}
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
verification = int(totals.get("verification_tool_calls", 0))
|
|
146
|
+
tool_calls = max(1, int(totals.get("tool_calls", 0)))
|
|
147
|
+
if tool_calls >= 10 and verification / tool_calls < 0.12:
|
|
148
|
+
ratio = verification / tool_calls
|
|
149
|
+
suggestions.append(
|
|
150
|
+
{
|
|
151
|
+
"id": "verify-before-done",
|
|
152
|
+
"title": "Verify before calling work done",
|
|
153
|
+
"confidence": "high" if ratio < 0.08 and tool_calls >= 20 else "medium",
|
|
154
|
+
"body": "Verification commands are a small share of tool use. Ask Codex to run the smallest meaningful test, build, lint, browser check, or runtime probe before final status.",
|
|
155
|
+
}
|
|
156
|
+
)
|
|
157
|
+
|
|
158
|
+
if int(totals.get("compactions", 0)) > 0:
|
|
159
|
+
compactions = int(totals.get("compactions", 0))
|
|
160
|
+
suggestions.append(
|
|
161
|
+
{
|
|
162
|
+
"id": "checkpoint-long-runs",
|
|
163
|
+
"title": "Checkpoint long runs",
|
|
164
|
+
"confidence": "high" if compactions >= 3 else "medium",
|
|
165
|
+
"body": "Compactions appeared in the window. For long tasks, ask Codex to keep a small task ledger and validate durable files before resuming.",
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
prompt_quality = facts.get("prompt_quality", {})
|
|
170
|
+
categories = prompt_quality.get("categories", {})
|
|
171
|
+
needs_work = int(categories.get("needs_work", 0))
|
|
172
|
+
user_messages = max(1, int(totals.get("user_messages", 0)))
|
|
173
|
+
if needs_work / user_messages >= 0.08:
|
|
174
|
+
ratio = needs_work / user_messages
|
|
175
|
+
suggestions.append(
|
|
176
|
+
{
|
|
177
|
+
"id": "tighten-ambiguous-prompts",
|
|
178
|
+
"title": "Tighten ambiguous prompts",
|
|
179
|
+
"confidence": _confidence(ratio, high=0.15, medium=0.08),
|
|
180
|
+
"body": "A noticeable share of prompts are too short to identify the target. Include action, file/project, symptom, and success state when context is not obvious.",
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
projects = facts.get("projects", [])
|
|
185
|
+
if len(projects) >= 4:
|
|
186
|
+
suggestions.append(
|
|
187
|
+
{
|
|
188
|
+
"id": "project-capsules",
|
|
189
|
+
"title": "Use project capsules",
|
|
190
|
+
"confidence": "high" if len(projects) >= 6 else "medium",
|
|
191
|
+
"body": "Recent work spans several projects. Keep a short per-project AGENTS or context note so Codex does not rebuild project intent every time.",
|
|
192
|
+
}
|
|
193
|
+
)
|
|
194
|
+
|
|
195
|
+
if build_skill_opportunities(facts):
|
|
196
|
+
suggestions.append(
|
|
197
|
+
{
|
|
198
|
+
"id": "make-repeated-workflows-skills",
|
|
199
|
+
"title": "Turn repeated workflows into skills",
|
|
200
|
+
"confidence": "medium",
|
|
201
|
+
"body": "At least one project shows repeated tool patterns. Consider a small user skill with the workflow steps, validation commands, and resume rules.",
|
|
202
|
+
}
|
|
203
|
+
)
|
|
204
|
+
|
|
205
|
+
if not suggestions:
|
|
206
|
+
suggestions.append(
|
|
207
|
+
{
|
|
208
|
+
"id": "keep-current-loop",
|
|
209
|
+
"title": "Keep the current loop",
|
|
210
|
+
"confidence": "medium",
|
|
211
|
+
"body": "No strong coaching warnings stood out. Keep using explicit success states and ask for verification on user-facing or production-sensitive work.",
|
|
212
|
+
}
|
|
213
|
+
)
|
|
214
|
+
return suggestions
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def build_skill_opportunities(facts: dict[str, Any]) -> list[dict[str, str]]:
|
|
218
|
+
opportunities: list[dict[str, str]] = []
|
|
219
|
+
for capsule in facts.get("project_capsules", []):
|
|
220
|
+
if not capsule.get("skill_candidate"):
|
|
221
|
+
continue
|
|
222
|
+
project = str(capsule.get("project", "<unknown>"))
|
|
223
|
+
workflow = str(capsule.get("likely_workflow", "mixed workflow"))
|
|
224
|
+
opportunities.append(
|
|
225
|
+
{
|
|
226
|
+
"project": project,
|
|
227
|
+
"title": "Create a project workflow skill",
|
|
228
|
+
"body": (
|
|
229
|
+
f"Capture the {workflow} loop as a reusable skill: when to trigger, what context to gather, "
|
|
230
|
+
"which commands verify the work, and how to resume after interruption."
|
|
231
|
+
),
|
|
232
|
+
}
|
|
233
|
+
)
|
|
234
|
+
return opportunities
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def write_suggestion_files(facts: dict[str, Any], suggestions_dir: Path) -> list[Path]:
|
|
238
|
+
suggestions_dir.mkdir(parents=True, exist_ok=True)
|
|
239
|
+
written: list[Path] = []
|
|
240
|
+
for suggestion in build_suggestions(facts):
|
|
241
|
+
path = suggestions_dir / f"{suggestion['id']}.patch.md"
|
|
242
|
+
text = _render_suggestion_patch(suggestion)
|
|
243
|
+
path.write_text(text, encoding="utf-8")
|
|
244
|
+
written.append(path)
|
|
245
|
+
return written
|
|
246
|
+
|
|
247
|
+
|
|
248
|
+
def _coaching_notes(suggestions: list[dict[str, str]], *, limit: int) -> list[str]:
|
|
249
|
+
return [f"- [{item['confidence']}] {item['title']}: {item['body']}" for item in suggestions[:limit]]
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def _render_suggestion_patch(suggestion: dict[str, str]) -> str:
|
|
253
|
+
return "\n".join(
|
|
254
|
+
[
|
|
255
|
+
f"# Suggested Codex Instruction Change: {suggestion['title']}",
|
|
256
|
+
"",
|
|
257
|
+
"Review this suggestion before applying it. Codex Coach never edits your instructions automatically.",
|
|
258
|
+
"",
|
|
259
|
+
"## Why",
|
|
260
|
+
"",
|
|
261
|
+
f"Confidence: {suggestion['confidence']}",
|
|
262
|
+
"",
|
|
263
|
+
suggestion["body"],
|
|
264
|
+
"",
|
|
265
|
+
"## Suggested Text",
|
|
266
|
+
"",
|
|
267
|
+
"```md",
|
|
268
|
+
f"- {suggestion['body']}",
|
|
269
|
+
"```",
|
|
270
|
+
"",
|
|
271
|
+
"## Rollback",
|
|
272
|
+
"",
|
|
273
|
+
"Remove the added instruction if it does not improve your workflow after a week.",
|
|
274
|
+
"",
|
|
275
|
+
]
|
|
276
|
+
)
|
|
277
|
+
|
|
278
|
+
|
|
279
|
+
def _confidence(value: float, *, high: float, medium: float) -> str:
|
|
280
|
+
if value >= high:
|
|
281
|
+
return "high"
|
|
282
|
+
if value >= medium:
|
|
283
|
+
return "medium"
|
|
284
|
+
return "low"
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from datetime import UTC, datetime, timedelta
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def utc_now() -> datetime:
|
|
7
|
+
return datetime.now(UTC)
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def parse_timestamp(value: object) -> datetime | None:
|
|
11
|
+
if not isinstance(value, str) or not value:
|
|
12
|
+
return None
|
|
13
|
+
text = value.strip()
|
|
14
|
+
if text.endswith("Z"):
|
|
15
|
+
text = text[:-1] + "+00:00"
|
|
16
|
+
try:
|
|
17
|
+
parsed = datetime.fromisoformat(text)
|
|
18
|
+
except ValueError:
|
|
19
|
+
return None
|
|
20
|
+
if parsed.tzinfo is None:
|
|
21
|
+
return parsed.replace(tzinfo=UTC)
|
|
22
|
+
return parsed.astimezone(UTC)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def parse_since(value: str | None, *, now: datetime | None = None) -> datetime | None:
|
|
26
|
+
if not value:
|
|
27
|
+
return None
|
|
28
|
+
now = now or utc_now()
|
|
29
|
+
text = value.strip().lower()
|
|
30
|
+
units = {
|
|
31
|
+
"h": "hours",
|
|
32
|
+
"hour": "hours",
|
|
33
|
+
"hours": "hours",
|
|
34
|
+
"d": "days",
|
|
35
|
+
"day": "days",
|
|
36
|
+
"days": "days",
|
|
37
|
+
"w": "weeks",
|
|
38
|
+
"week": "weeks",
|
|
39
|
+
"weeks": "weeks",
|
|
40
|
+
}
|
|
41
|
+
for suffix, unit in sorted(units.items(), key=lambda item: len(item[0]), reverse=True):
|
|
42
|
+
if text.endswith(suffix):
|
|
43
|
+
amount = text[: -len(suffix)].strip()
|
|
44
|
+
if amount.isdigit():
|
|
45
|
+
return now - timedelta(**{unit: int(amount)})
|
|
46
|
+
parsed = parse_timestamp(value)
|
|
47
|
+
if parsed is not None:
|
|
48
|
+
return parsed
|
|
49
|
+
raise ValueError(f"Unsupported --since value: {value!r}. Use values like 7d, 2w, 24h, or an ISO timestamp.")
|