codex-coach 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.codex-plugin/plugin.json +25 -0
- package/LICENSE +21 -0
- package/README.md +97 -0
- package/assets/brand/codex-coach-icon.png +0 -0
- package/assets/brand/codex-coach-icon.svg +49 -0
- package/assets/brand/codex-coach-logo.png +0 -0
- package/assets/brand/codex-coach-logo.svg +62 -0
- package/assets/examples/how-it-works.png +0 -0
- package/assets/examples/how-it-works.svg +71 -0
- package/assets/examples/project-capsules.png +0 -0
- package/assets/examples/project-capsules.svg +59 -0
- package/assets/examples/prompt-lint.png +0 -0
- package/assets/examples/prompt-lint.svg +54 -0
- package/bin/codex-coach.js +53 -0
- package/install.ps1 +18 -0
- package/install.sh +50 -0
- package/package.json +38 -0
- package/pyproject.toml +31 -0
- package/skills/codex-coach/SKILL.md +71 -0
- package/skills/codex-coach/agents/openai.yaml +8 -0
- package/skills/codex-coach/references/config-suggestions.md +35 -0
- package/skills/codex-coach/references/privacy.md +25 -0
- package/skills/codex-coach/references/prompt-rubric.md +21 -0
- package/src/codex_coach/__init__.py +3 -0
- package/src/codex_coach/cli.py +146 -0
- package/src/codex_coach/install.py +230 -0
- package/src/codex_coach/parser.py +395 -0
- package/src/codex_coach/paths.py +49 -0
- package/src/codex_coach/prompts.py +151 -0
- package/src/codex_coach/redaction.py +35 -0
- package/src/codex_coach/reports.py +284 -0
- package/src/codex_coach/timeutil.py +49 -0
|
@@ -0,0 +1,395 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
from collections import Counter, defaultdict
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Iterable
|
|
8
|
+
|
|
9
|
+
from .prompts import score_prompt
|
|
10
|
+
from .redaction import stable_hash
|
|
11
|
+
from .timeutil import parse_timestamp
|
|
12
|
+
|
|
13
|
+
VERIFY_RE = (
|
|
14
|
+
"test",
|
|
15
|
+
"pytest",
|
|
16
|
+
"vitest",
|
|
17
|
+
"jest",
|
|
18
|
+
"playwright",
|
|
19
|
+
"lint",
|
|
20
|
+
"typecheck",
|
|
21
|
+
"tsc",
|
|
22
|
+
"mypy",
|
|
23
|
+
"ruff",
|
|
24
|
+
"build",
|
|
25
|
+
"cargo test",
|
|
26
|
+
"go test",
|
|
27
|
+
"gradle test",
|
|
28
|
+
)
|
|
29
|
+
ERROR_RE = (
|
|
30
|
+
"error",
|
|
31
|
+
"exception",
|
|
32
|
+
"traceback",
|
|
33
|
+
"failed",
|
|
34
|
+
"failure",
|
|
35
|
+
"timeout",
|
|
36
|
+
"panic",
|
|
37
|
+
"context_length_exceeded",
|
|
38
|
+
"usage_limit_reached",
|
|
39
|
+
"upstream_unavailable",
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
@dataclass
|
|
44
|
+
class ScanAccumulator:
|
|
45
|
+
since: str | None = None
|
|
46
|
+
files_scanned: int = 0
|
|
47
|
+
malformed_lines: int = 0
|
|
48
|
+
sessions: dict[str, dict[str, Any]] = field(default_factory=dict)
|
|
49
|
+
projects: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
50
|
+
project_sessions: dict[str, set[str]] = field(default_factory=lambda: defaultdict(set))
|
|
51
|
+
totals: Counter = field(default_factory=Counter)
|
|
52
|
+
tool_counts: Counter = field(default_factory=Counter)
|
|
53
|
+
model_counts: Counter = field(default_factory=Counter)
|
|
54
|
+
effort_counts: Counter = field(default_factory=Counter)
|
|
55
|
+
source_counts: Counter = field(default_factory=Counter)
|
|
56
|
+
originator_counts: Counter = field(default_factory=Counter)
|
|
57
|
+
project_tools: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
58
|
+
project_efforts: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
59
|
+
project_verification_tools: dict[str, Counter] = field(default_factory=lambda: defaultdict(Counter))
|
|
60
|
+
project_prompt_scores: dict[str, list[int]] = field(default_factory=lambda: defaultdict(list))
|
|
61
|
+
prompt_scores: list[dict[str, Any]] = field(default_factory=list)
|
|
62
|
+
error_counts: Counter = field(default_factory=Counter)
|
|
63
|
+
verification_tools: Counter = field(default_factory=Counter)
|
|
64
|
+
compacted_sessions: set[str] = field(default_factory=set)
|
|
65
|
+
current_file_session: dict[str, str] = field(default_factory=dict)
|
|
66
|
+
|
|
67
|
+
def to_facts(self) -> dict[str, Any]:
|
|
68
|
+
projects = []
|
|
69
|
+
project_capsules = []
|
|
70
|
+
for cwd, counts in self.projects.items():
|
|
71
|
+
project_counts = counts.copy()
|
|
72
|
+
project_counts["sessions"] = len(self.project_sessions[cwd])
|
|
73
|
+
verification_tool_calls = sum(self.project_verification_tools[cwd].values())
|
|
74
|
+
projects.append(
|
|
75
|
+
{
|
|
76
|
+
"cwd": cwd,
|
|
77
|
+
"sessions": project_counts["sessions"],
|
|
78
|
+
"turns": counts["turns"],
|
|
79
|
+
"user_messages": counts["user_messages"],
|
|
80
|
+
"tool_calls": counts["tool_calls"],
|
|
81
|
+
"verification_tool_calls": verification_tool_calls,
|
|
82
|
+
"compactions": counts["compactions"],
|
|
83
|
+
}
|
|
84
|
+
)
|
|
85
|
+
project_capsules.append(
|
|
86
|
+
_project_capsule(
|
|
87
|
+
cwd,
|
|
88
|
+
project_counts,
|
|
89
|
+
self.project_tools[cwd],
|
|
90
|
+
self.project_efforts[cwd],
|
|
91
|
+
self.project_prompt_scores[cwd],
|
|
92
|
+
verification_tool_calls,
|
|
93
|
+
)
|
|
94
|
+
)
|
|
95
|
+
projects.sort(key=lambda item: (item["user_messages"], item["tool_calls"], item["turns"]), reverse=True)
|
|
96
|
+
project_capsules.sort(key=lambda item: (item["user_messages"], item["tool_calls"], item["turns"]), reverse=True)
|
|
97
|
+
|
|
98
|
+
prompt_categories = Counter(item["category"] for item in self.prompt_scores)
|
|
99
|
+
avg_prompt = 0.0
|
|
100
|
+
if self.prompt_scores:
|
|
101
|
+
avg_prompt = round(sum(int(item["score"]) for item in self.prompt_scores) / len(self.prompt_scores), 2)
|
|
102
|
+
|
|
103
|
+
return {
|
|
104
|
+
"schema_version": 1,
|
|
105
|
+
"since": self.since,
|
|
106
|
+
"totals": {
|
|
107
|
+
"files_scanned": self.files_scanned,
|
|
108
|
+
"malformed_lines": self.malformed_lines,
|
|
109
|
+
"sessions": len(self.sessions),
|
|
110
|
+
"turns": self.totals["turns"],
|
|
111
|
+
"user_messages": self.totals["user_messages"],
|
|
112
|
+
"assistant_messages": self.totals["assistant_messages"],
|
|
113
|
+
"tool_calls": self.totals["tool_calls"],
|
|
114
|
+
"web_searches": self.totals["web_searches"],
|
|
115
|
+
"reasoning_items": self.totals["reasoning_items"],
|
|
116
|
+
"compactions": self.totals["compactions"],
|
|
117
|
+
"errors": sum(self.error_counts.values()),
|
|
118
|
+
"verification_tool_calls": sum(self.verification_tools.values()),
|
|
119
|
+
},
|
|
120
|
+
"projects": projects,
|
|
121
|
+
"project_capsules": project_capsules,
|
|
122
|
+
"models": dict(self.model_counts.most_common()),
|
|
123
|
+
"efforts": dict(self.effort_counts.most_common()),
|
|
124
|
+
"sources": dict(self.source_counts.most_common()),
|
|
125
|
+
"originators": dict(self.originator_counts.most_common()),
|
|
126
|
+
"tools": dict(self.tool_counts.most_common()),
|
|
127
|
+
"verification_tools": dict(self.verification_tools.most_common()),
|
|
128
|
+
"errors": dict(self.error_counts.most_common()),
|
|
129
|
+
"prompt_quality": {
|
|
130
|
+
"average_score": avg_prompt,
|
|
131
|
+
"categories": dict(prompt_categories),
|
|
132
|
+
"examples_needing_work": [item for item in self.prompt_scores if item["category"] == "needs_work"][:10],
|
|
133
|
+
"rewrite_examples": [item for item in self.prompt_scores if item["category"] == "needs_work"][:5],
|
|
134
|
+
},
|
|
135
|
+
"compacted_session_ids": sorted(self.compacted_sessions),
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def iter_log_paths(codex_home: Path) -> list[Path]:
|
|
140
|
+
paths: list[Path] = []
|
|
141
|
+
sessions_dir = codex_home / "sessions"
|
|
142
|
+
archived_dir = codex_home / "archived_sessions"
|
|
143
|
+
if sessions_dir.exists():
|
|
144
|
+
paths.extend(sessions_dir.glob("**/*.jsonl"))
|
|
145
|
+
if archived_dir.exists():
|
|
146
|
+
paths.extend(archived_dir.glob("*.jsonl"))
|
|
147
|
+
return sorted(path for path in paths if path.is_file())
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def scan_logs(codex_home: Path, *, since_dt=None, since_label: str | None = None) -> dict[str, Any]:
|
|
151
|
+
acc = ScanAccumulator(since=since_label)
|
|
152
|
+
for path in iter_log_paths(codex_home):
|
|
153
|
+
if since_dt is not None:
|
|
154
|
+
try:
|
|
155
|
+
if path.stat().st_mtime < since_dt.timestamp():
|
|
156
|
+
continue
|
|
157
|
+
except OSError:
|
|
158
|
+
acc.malformed_lines += 1
|
|
159
|
+
continue
|
|
160
|
+
_scan_file(path, acc, since_dt=since_dt)
|
|
161
|
+
return acc.to_facts()
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
def _scan_file(path: Path, acc: ScanAccumulator, *, since_dt) -> None:
|
|
165
|
+
acc.files_scanned += 1
|
|
166
|
+
fallback_session_id = path.stem
|
|
167
|
+
current_session_id = fallback_session_id
|
|
168
|
+
current_cwd = "<unknown>"
|
|
169
|
+
file_had_session = False
|
|
170
|
+
|
|
171
|
+
try:
|
|
172
|
+
lines = path.read_text(encoding="utf-8", errors="replace").splitlines()
|
|
173
|
+
except OSError:
|
|
174
|
+
acc.malformed_lines += 1
|
|
175
|
+
return
|
|
176
|
+
|
|
177
|
+
for line in lines:
|
|
178
|
+
if not line.strip():
|
|
179
|
+
continue
|
|
180
|
+
try:
|
|
181
|
+
event = json.loads(line)
|
|
182
|
+
except json.JSONDecodeError:
|
|
183
|
+
acc.malformed_lines += 1
|
|
184
|
+
continue
|
|
185
|
+
|
|
186
|
+
timestamp = parse_timestamp(event.get("timestamp"))
|
|
187
|
+
if since_dt is not None and timestamp is not None and timestamp < since_dt:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
event_type = event.get("type")
|
|
191
|
+
payload = event.get("payload") or {}
|
|
192
|
+
if event_type == "session_meta":
|
|
193
|
+
current_session_id, current_cwd = _handle_session(payload, acc, fallback_session_id)
|
|
194
|
+
if not file_had_session:
|
|
195
|
+
file_had_session = True
|
|
196
|
+
continue
|
|
197
|
+
if event_type == "turn_context":
|
|
198
|
+
current_cwd = _project_label(str(payload.get("cwd") or current_cwd or "<unknown>"))
|
|
199
|
+
acc.project_sessions[current_cwd].add(current_session_id)
|
|
200
|
+
_handle_turn(payload, acc, current_cwd)
|
|
201
|
+
continue
|
|
202
|
+
if event_type == "response_item":
|
|
203
|
+
_handle_response_item(payload, acc, current_cwd)
|
|
204
|
+
continue
|
|
205
|
+
if event_type == "event_msg":
|
|
206
|
+
_handle_event_msg(payload, acc)
|
|
207
|
+
continue
|
|
208
|
+
if event_type == "compacted":
|
|
209
|
+
acc.totals["compactions"] += 1
|
|
210
|
+
acc.projects[current_cwd]["compactions"] += 1
|
|
211
|
+
acc.compacted_sessions.add(current_session_id)
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _handle_session(payload: dict[str, Any], acc: ScanAccumulator, fallback_session_id: str) -> tuple[str, str]:
|
|
215
|
+
session_id = str(payload.get("id") or fallback_session_id)
|
|
216
|
+
cwd = _project_label(str(payload.get("cwd") or "<unknown>"))
|
|
217
|
+
acc.sessions.setdefault(
|
|
218
|
+
session_id,
|
|
219
|
+
{
|
|
220
|
+
"cwd": cwd,
|
|
221
|
+
"timestamp": payload.get("timestamp"),
|
|
222
|
+
"source": payload.get("source"),
|
|
223
|
+
"originator": payload.get("originator"),
|
|
224
|
+
"cli_version": payload.get("cli_version"),
|
|
225
|
+
},
|
|
226
|
+
)
|
|
227
|
+
if payload.get("source"):
|
|
228
|
+
acc.source_counts[str(payload["source"])] += 1
|
|
229
|
+
if payload.get("originator"):
|
|
230
|
+
acc.originator_counts[str(payload["originator"])] += 1
|
|
231
|
+
acc.project_sessions[cwd].add(session_id)
|
|
232
|
+
return session_id, cwd
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def _project_label(cwd: str) -> str:
|
|
236
|
+
if not cwd or cwd == "<unknown>":
|
|
237
|
+
return "<unknown>"
|
|
238
|
+
path = Path(cwd)
|
|
239
|
+
name = path.name or "project"
|
|
240
|
+
return f"{name} [{stable_hash(cwd)}]"
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
def _handle_turn(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
|
|
244
|
+
acc.totals["turns"] += 1
|
|
245
|
+
acc.projects[cwd]["turns"] += 1
|
|
246
|
+
model = payload.get("model")
|
|
247
|
+
effort = payload.get("effort")
|
|
248
|
+
if model:
|
|
249
|
+
acc.model_counts[str(model)] += 1
|
|
250
|
+
if effort:
|
|
251
|
+
acc.effort_counts[str(effort)] += 1
|
|
252
|
+
acc.project_efforts[cwd][str(effort)] += 1
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _handle_response_item(payload: dict[str, Any], acc: ScanAccumulator, cwd: str) -> None:
|
|
256
|
+
item = payload.get("item") if isinstance(payload.get("item"), dict) else payload
|
|
257
|
+
item_type = item.get("type")
|
|
258
|
+
if item_type == "message":
|
|
259
|
+
role = item.get("role")
|
|
260
|
+
text = _content_text(item.get("content"))
|
|
261
|
+
if role == "user":
|
|
262
|
+
acc.totals["user_messages"] += 1
|
|
263
|
+
acc.projects[cwd]["user_messages"] += 1
|
|
264
|
+
score = score_prompt(text)
|
|
265
|
+
acc.prompt_scores.append(
|
|
266
|
+
{
|
|
267
|
+
"score": score.score,
|
|
268
|
+
"category": score.category,
|
|
269
|
+
"reason": score.reason,
|
|
270
|
+
"preview": score.preview,
|
|
271
|
+
"missing": list(score.missing),
|
|
272
|
+
"rewrite": score.rewrite,
|
|
273
|
+
**score.stats,
|
|
274
|
+
}
|
|
275
|
+
)
|
|
276
|
+
acc.project_prompt_scores[cwd].append(score.score)
|
|
277
|
+
elif role == "assistant":
|
|
278
|
+
acc.totals["assistant_messages"] += 1
|
|
279
|
+
_count_errors(text, acc)
|
|
280
|
+
return
|
|
281
|
+
|
|
282
|
+
if item_type == "function_call":
|
|
283
|
+
name = str(item.get("name") or "<unknown>")
|
|
284
|
+
acc.totals["tool_calls"] += 1
|
|
285
|
+
acc.projects[cwd]["tool_calls"] += 1
|
|
286
|
+
acc.tool_counts[name] += 1
|
|
287
|
+
acc.project_tools[cwd][name] += 1
|
|
288
|
+
arguments = item.get("arguments")
|
|
289
|
+
arg_text = arguments if isinstance(arguments, str) else json.dumps(arguments, default=str)
|
|
290
|
+
if _looks_like_verification(name, arg_text):
|
|
291
|
+
acc.verification_tools[name] += 1
|
|
292
|
+
acc.project_verification_tools[cwd][name] += 1
|
|
293
|
+
return
|
|
294
|
+
|
|
295
|
+
if item_type == "function_call_output":
|
|
296
|
+
text = _content_text(item.get("output") or item.get("content"))
|
|
297
|
+
_count_errors(text, acc)
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
if item_type == "web_search_call":
|
|
301
|
+
acc.totals["web_searches"] += 1
|
|
302
|
+
return
|
|
303
|
+
|
|
304
|
+
if item_type == "reasoning":
|
|
305
|
+
acc.totals["reasoning_items"] += 1
|
|
306
|
+
|
|
307
|
+
|
|
308
|
+
def _handle_event_msg(payload: dict[str, Any], acc: ScanAccumulator) -> None:
|
|
309
|
+
message = _content_text(payload.get("message") or payload.get("text") or payload)
|
|
310
|
+
_count_errors(message, acc)
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _project_capsule(
|
|
314
|
+
cwd: str,
|
|
315
|
+
counts: Counter,
|
|
316
|
+
tools: Counter,
|
|
317
|
+
efforts: Counter,
|
|
318
|
+
prompt_scores: list[int],
|
|
319
|
+
verification_tool_calls: int,
|
|
320
|
+
) -> dict[str, Any]:
|
|
321
|
+
prompt_average = round(sum(prompt_scores) / len(prompt_scores), 2) if prompt_scores else 0.0
|
|
322
|
+
workflow = _infer_workflow(tools, counts)
|
|
323
|
+
instruction = _project_instruction(counts, prompt_average, verification_tool_calls)
|
|
324
|
+
return {
|
|
325
|
+
"project": cwd,
|
|
326
|
+
"sessions": counts["sessions"],
|
|
327
|
+
"turns": counts["turns"],
|
|
328
|
+
"user_messages": counts["user_messages"],
|
|
329
|
+
"tool_calls": counts["tool_calls"],
|
|
330
|
+
"verification_tool_calls": verification_tool_calls,
|
|
331
|
+
"compactions": counts["compactions"],
|
|
332
|
+
"prompt_quality_average": prompt_average,
|
|
333
|
+
"top_tools": dict(tools.most_common(5)),
|
|
334
|
+
"effort_mix": dict(efforts.most_common()),
|
|
335
|
+
"likely_workflow": workflow,
|
|
336
|
+
"recommended_instruction": instruction,
|
|
337
|
+
"skill_candidate": counts["turns"] >= 3 or counts["tool_calls"] >= 10,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def _infer_workflow(tools: Counter, counts: Counter) -> str:
|
|
342
|
+
names = set(tools)
|
|
343
|
+
if any(name.startswith("browser_") or name in {"screenshot", "view_image"} for name in names):
|
|
344
|
+
return "UI or browser verification"
|
|
345
|
+
if tools.get("web.run", 0) or "web_search" in names:
|
|
346
|
+
return "research and current-information checks"
|
|
347
|
+
if tools.get("exec_command", 0) >= max(2, counts["tool_calls"] // 2):
|
|
348
|
+
return "terminal-heavy implementation or diagnosis"
|
|
349
|
+
if tools.get("update_plan", 0) >= 2:
|
|
350
|
+
return "multi-step planning and execution"
|
|
351
|
+
if counts["tool_calls"] == 0:
|
|
352
|
+
return "conversation and planning"
|
|
353
|
+
return "mixed Codex workflow"
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def _project_instruction(counts: Counter, prompt_average: float, verification_tool_calls: int) -> str:
|
|
357
|
+
tool_calls = max(1, counts["tool_calls"])
|
|
358
|
+
if counts["compactions"] > 0:
|
|
359
|
+
return "For long tasks in this project, keep a short durable checklist and validate files before resuming after compaction."
|
|
360
|
+
if counts["tool_calls"] >= 6 and verification_tool_calls / tool_calls < 0.15:
|
|
361
|
+
return "Before final status in this project, run the smallest meaningful test, build, lint, browser check, or runtime probe."
|
|
362
|
+
if prompt_average and prompt_average < 5:
|
|
363
|
+
return "Start prompts for this project with action, target file or subsystem, symptom or goal, and success state."
|
|
364
|
+
return "Keep the project context compact: state the active subsystem, constraints, and the verification expected for user-facing changes."
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _content_text(value: Any) -> str:
|
|
368
|
+
if value is None:
|
|
369
|
+
return ""
|
|
370
|
+
if isinstance(value, str):
|
|
371
|
+
return value
|
|
372
|
+
if isinstance(value, list):
|
|
373
|
+
chunks: list[str] = []
|
|
374
|
+
for item in value:
|
|
375
|
+
if isinstance(item, str):
|
|
376
|
+
chunks.append(item)
|
|
377
|
+
elif isinstance(item, dict):
|
|
378
|
+
chunks.append(str(item.get("text") or item.get("content") or ""))
|
|
379
|
+
return "\n".join(part for part in chunks if part)
|
|
380
|
+
if isinstance(value, dict):
|
|
381
|
+
return str(value.get("text") or value.get("content") or value.get("message") or "")
|
|
382
|
+
return str(value)
|
|
383
|
+
|
|
384
|
+
|
|
385
|
+
def _looks_like_verification(tool_name: str, arguments: str) -> bool:
|
|
386
|
+
text = f"{tool_name} {arguments}".lower()
|
|
387
|
+
return any(marker in text for marker in VERIFY_RE)
|
|
388
|
+
|
|
389
|
+
|
|
390
|
+
def _count_errors(text: str, acc: ScanAccumulator) -> None:
|
|
391
|
+
lower = text.lower()
|
|
392
|
+
for marker in ERROR_RE:
|
|
393
|
+
if marker in lower:
|
|
394
|
+
acc.error_counts[marker] += 1
|
|
395
|
+
return
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(frozen=True)
|
|
9
|
+
class CoachPaths:
|
|
10
|
+
home: Path
|
|
11
|
+
codex_home: Path
|
|
12
|
+
coach_home: Path
|
|
13
|
+
|
|
14
|
+
@property
|
|
15
|
+
def reports_dir(self) -> Path:
|
|
16
|
+
return self.coach_home / "reports"
|
|
17
|
+
|
|
18
|
+
@property
|
|
19
|
+
def facts_dir(self) -> Path:
|
|
20
|
+
return self.coach_home / "facts"
|
|
21
|
+
|
|
22
|
+
@property
|
|
23
|
+
def suggestions_dir(self) -> Path:
|
|
24
|
+
return self.coach_home / "suggestions"
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def config_file(self) -> Path:
|
|
28
|
+
return self.coach_home / "config.toml"
|
|
29
|
+
|
|
30
|
+
@property
|
|
31
|
+
def app_dir(self) -> Path:
|
|
32
|
+
return self.coach_home / "app"
|
|
33
|
+
|
|
34
|
+
def ensure_output_dirs(self) -> None:
|
|
35
|
+
self.reports_dir.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
self.facts_dir.mkdir(parents=True, exist_ok=True)
|
|
37
|
+
self.suggestions_dir.mkdir(parents=True, exist_ok=True)
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def default_paths(
|
|
41
|
+
*,
|
|
42
|
+
home: str | Path | None = None,
|
|
43
|
+
codex_home: str | Path | None = None,
|
|
44
|
+
coach_home: str | Path | None = None,
|
|
45
|
+
) -> CoachPaths:
|
|
46
|
+
home_path = Path(home or os.environ.get("HOME") or Path.home()).expanduser()
|
|
47
|
+
codex_path = Path(codex_home or os.environ.get("CODEX_HOME") or home_path / ".codex").expanduser()
|
|
48
|
+
coach_path = Path(coach_home or os.environ.get("CODEX_COACH_HOME") or home_path / ".codex-coach").expanduser()
|
|
49
|
+
return CoachPaths(home=home_path, codex_home=codex_path, coach_home=coach_path)
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import re
|
|
4
|
+
from dataclasses import dataclass
|
|
5
|
+
|
|
6
|
+
from .redaction import redact_text, text_stats
|
|
7
|
+
|
|
8
|
+
PATH_HINT_RE = re.compile(r"(?:^|\s)(?:\.{0,2}/|~?/|[A-Za-z]:\\|[\w.-]+\.(?:py|ts|tsx|js|jsx|md|json|toml|yaml|yml|go|rs|java|cs|rb|php))")
|
|
9
|
+
URL_HINT_RE = re.compile(r"https?://")
|
|
10
|
+
ERROR_HINT_RE = re.compile(r"\b(error|exception|traceback|failed|failing|crash|stuck|timeout|panic|bug|regression|broken)\b", re.IGNORECASE)
|
|
11
|
+
SUCCESS_HINT_RE = re.compile(r"\b(success|done|verify|should|expected|acceptance|criteria|when|until|so that)\b", re.IGNORECASE)
|
|
12
|
+
COMMAND_HINT_RE = re.compile(r"`[^`]+`|\b(npm|pnpm|bun|uv|python|pytest|git|cargo|go test|make|docker|codex)\b", re.IGNORECASE)
|
|
13
|
+
ACTION_HINT_RE = re.compile(r"\b(run|implement|build|fix|debug|analyze|review|refactor|test|install|deploy|create|update|remove|delete|generate|explain)\b", re.IGNORECASE)
|
|
14
|
+
VAGUE_RE = re.compile(r"^(fix|run|update|delete|improve|optimi[sz]e|make better|where|why|ok|nice|worked|continue|proceed|do it|go)$", re.IGNORECASE)
|
|
15
|
+
VALID_CONTEXT_REPLY_RE = re.compile(r"^(yes|y|no|n|ok|okay|proceed|continue|go ahead|do it|run tests|commit|push|retry|try again|1|2|3|4|5|a|b|c|d)$", re.IGNORECASE)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass(frozen=True)
|
|
19
|
+
class PromptScore:
|
|
20
|
+
score: int
|
|
21
|
+
category: str
|
|
22
|
+
reason: str
|
|
23
|
+
preview: str
|
|
24
|
+
missing: tuple[str, ...]
|
|
25
|
+
rewrite: str
|
|
26
|
+
stats: dict[str, int | str]
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def score_prompt(text: str) -> PromptScore:
|
|
30
|
+
stripped = text.strip()
|
|
31
|
+
lower = stripped.lower()
|
|
32
|
+
words = [part for part in re.split(r"\s+", stripped) if part]
|
|
33
|
+
score = 4
|
|
34
|
+
reasons: list[str] = []
|
|
35
|
+
|
|
36
|
+
if not stripped:
|
|
37
|
+
return PromptScore(0, "needs_work", "empty prompt", "", ("action", "target", "success state"), _rewrite_prompt(()), text_stats(text))
|
|
38
|
+
|
|
39
|
+
if stripped.startswith("<environment_context>") or stripped.startswith("# AGENTS.md instructions"):
|
|
40
|
+
return PromptScore(
|
|
41
|
+
7,
|
|
42
|
+
"good",
|
|
43
|
+
"machine-provided context block",
|
|
44
|
+
redact_text(text),
|
|
45
|
+
(),
|
|
46
|
+
"No rewrite needed for machine-provided context blocks.",
|
|
47
|
+
text_stats(text),
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
if VALID_CONTEXT_REPLY_RE.match(stripped):
|
|
51
|
+
return PromptScore(
|
|
52
|
+
7,
|
|
53
|
+
"good",
|
|
54
|
+
"brief response that is usually valid with conversation context",
|
|
55
|
+
redact_text(text),
|
|
56
|
+
(),
|
|
57
|
+
"No rewrite needed when the previous Codex turn made the choice explicit.",
|
|
58
|
+
text_stats(text),
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if stripped.startswith("[$") and "](" in stripped:
|
|
62
|
+
return PromptScore(
|
|
63
|
+
7,
|
|
64
|
+
"good",
|
|
65
|
+
"Codex skill or plugin invocation",
|
|
66
|
+
redact_text(text),
|
|
67
|
+
(),
|
|
68
|
+
"No rewrite needed when the invocation intentionally activates a Codex skill or plugin.",
|
|
69
|
+
text_stats(text),
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
if len(words) <= 2 and VAGUE_RE.match(stripped):
|
|
73
|
+
missing = ("target", "success state")
|
|
74
|
+
return PromptScore(
|
|
75
|
+
3,
|
|
76
|
+
"needs_work",
|
|
77
|
+
"very short prompt with ambiguous target or action",
|
|
78
|
+
redact_text(text),
|
|
79
|
+
missing,
|
|
80
|
+
_rewrite_prompt(missing),
|
|
81
|
+
text_stats(text),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
has_action = bool(ACTION_HINT_RE.search(stripped))
|
|
85
|
+
has_target = bool(PATH_HINT_RE.search(stripped) or URL_HINT_RE.search(stripped))
|
|
86
|
+
has_failure = bool(ERROR_HINT_RE.search(stripped))
|
|
87
|
+
has_success = bool(SUCCESS_HINT_RE.search(stripped))
|
|
88
|
+
has_command = bool(COMMAND_HINT_RE.search(stripped))
|
|
89
|
+
|
|
90
|
+
if len(words) >= 8:
|
|
91
|
+
score += 1
|
|
92
|
+
reasons.append("clear length")
|
|
93
|
+
if has_action:
|
|
94
|
+
score += 1
|
|
95
|
+
reasons.append("action stated")
|
|
96
|
+
if has_target:
|
|
97
|
+
score += 1
|
|
98
|
+
reasons.append("file or path context")
|
|
99
|
+
if has_failure:
|
|
100
|
+
score += 1
|
|
101
|
+
reasons.append("failure context")
|
|
102
|
+
if has_success:
|
|
103
|
+
score += 1
|
|
104
|
+
reasons.append("success context")
|
|
105
|
+
if has_command:
|
|
106
|
+
score += 1
|
|
107
|
+
reasons.append("command context")
|
|
108
|
+
|
|
109
|
+
if len(words) < 4:
|
|
110
|
+
score -= 1
|
|
111
|
+
reasons.append("short")
|
|
112
|
+
if re.search(r"\b(fix|update|delete|improve|optimi[sz]e|make)\b", lower) and not (
|
|
113
|
+
has_target or has_failure or has_success
|
|
114
|
+
):
|
|
115
|
+
score -= 2
|
|
116
|
+
reasons.append("missing target detail")
|
|
117
|
+
|
|
118
|
+
missing: list[str] = []
|
|
119
|
+
if not has_action:
|
|
120
|
+
missing.append("action")
|
|
121
|
+
if not has_target and not has_failure and len(words) >= 3:
|
|
122
|
+
missing.append("target")
|
|
123
|
+
if (has_action or has_failure) and not has_success:
|
|
124
|
+
missing.append("success state")
|
|
125
|
+
if has_action and not has_command and any(verb in lower for verb in ("fix", "debug", "implement", "build", "refactor", "deploy")):
|
|
126
|
+
missing.append("verification")
|
|
127
|
+
|
|
128
|
+
score = max(0, min(10, score))
|
|
129
|
+
if score >= 8:
|
|
130
|
+
category = "excellent"
|
|
131
|
+
elif score >= 5:
|
|
132
|
+
category = "good"
|
|
133
|
+
else:
|
|
134
|
+
category = "needs_work"
|
|
135
|
+
reason = ", ".join(reasons[:4]) or "basic request"
|
|
136
|
+
missing_tuple = tuple(dict.fromkeys(missing))
|
|
137
|
+
return PromptScore(score, category, reason, redact_text(text), missing_tuple, _rewrite_prompt(missing_tuple), text_stats(text))
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def _rewrite_prompt(missing: tuple[str, ...]) -> str:
|
|
141
|
+
if not missing:
|
|
142
|
+
return "Keep the prompt shape. Add a success check only if the task touches code, config, deploys, or user-visible behavior."
|
|
143
|
+
parts = [
|
|
144
|
+
"[Action] in [project/file]",
|
|
145
|
+
"because [symptom or goal]",
|
|
146
|
+
"using [relevant command, link, or constraint]",
|
|
147
|
+
"Success means [observable verification result].",
|
|
148
|
+
]
|
|
149
|
+
if "verification" in missing and "success state" not in missing:
|
|
150
|
+
parts[-1] = "Verify with [smallest meaningful test/build/runtime check]."
|
|
151
|
+
return " ".join(parts)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import hashlib
|
|
4
|
+
import re
|
|
5
|
+
|
|
6
|
+
PATH_RE = re.compile(r"(?<!\w)(?:~|/Users/[^ \n\t]+|/home/[^ \n\t]+|/[A-Za-z0-9_.-]+(?:/[A-Za-z0-9_.-]+){2,})")
|
|
7
|
+
URL_RE = re.compile(r"https?://[^\s)>\]]+")
|
|
8
|
+
EMAIL_RE = re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b")
|
|
9
|
+
SECRET_RE = re.compile(r"\b(?:sk|pk|ghp|github_pat|xox[baprs])-?[A-Za-z0-9_\-]{12,}\b", re.IGNORECASE)
|
|
10
|
+
LONG_TOKEN_RE = re.compile(r"\b[A-Za-z0-9_\-]{32,}\b")
|
|
11
|
+
WHITESPACE_RE = re.compile(r"\s+")
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def stable_hash(text: str) -> str:
|
|
15
|
+
return hashlib.sha256(text.encode("utf-8", "ignore")).hexdigest()[:12]
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def redact_text(text: str, *, max_chars: int = 96) -> str:
|
|
19
|
+
text = URL_RE.sub("[url]", text)
|
|
20
|
+
text = EMAIL_RE.sub("[email]", text)
|
|
21
|
+
text = SECRET_RE.sub("[secret]", text)
|
|
22
|
+
text = PATH_RE.sub("[path]", text)
|
|
23
|
+
text = LONG_TOKEN_RE.sub("[token]", text)
|
|
24
|
+
text = WHITESPACE_RE.sub(" ", text).strip()
|
|
25
|
+
if len(text) > max_chars:
|
|
26
|
+
return text[: max_chars - 1].rstrip() + "..."
|
|
27
|
+
return text
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def text_stats(text: str) -> dict[str, int | str]:
|
|
31
|
+
return {
|
|
32
|
+
"chars": len(text),
|
|
33
|
+
"words": len([part for part in WHITESPACE_RE.split(text.strip()) if part]),
|
|
34
|
+
"hash": stable_hash(text),
|
|
35
|
+
}
|