selftune 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. package/CHANGELOG.md +23 -0
  2. package/README.md +259 -0
  3. package/bin/selftune.cjs +29 -0
  4. package/cli/selftune/constants.ts +71 -0
  5. package/cli/selftune/eval/hooks-to-evals.ts +422 -0
  6. package/cli/selftune/evolution/audit.ts +44 -0
  7. package/cli/selftune/evolution/deploy-proposal.ts +244 -0
  8. package/cli/selftune/evolution/evolve.ts +406 -0
  9. package/cli/selftune/evolution/extract-patterns.ts +145 -0
  10. package/cli/selftune/evolution/propose-description.ts +146 -0
  11. package/cli/selftune/evolution/rollback.ts +242 -0
  12. package/cli/selftune/evolution/stopping-criteria.ts +69 -0
  13. package/cli/selftune/evolution/validate-proposal.ts +137 -0
  14. package/cli/selftune/grading/grade-session.ts +459 -0
  15. package/cli/selftune/hooks/prompt-log.ts +52 -0
  16. package/cli/selftune/hooks/session-stop.ts +54 -0
  17. package/cli/selftune/hooks/skill-eval.ts +73 -0
  18. package/cli/selftune/index.ts +104 -0
  19. package/cli/selftune/ingestors/codex-rollout.ts +416 -0
  20. package/cli/selftune/ingestors/codex-wrapper.ts +332 -0
  21. package/cli/selftune/ingestors/opencode-ingest.ts +565 -0
  22. package/cli/selftune/init.ts +297 -0
  23. package/cli/selftune/monitoring/watch.ts +328 -0
  24. package/cli/selftune/observability.ts +255 -0
  25. package/cli/selftune/types.ts +255 -0
  26. package/cli/selftune/utils/jsonl.ts +75 -0
  27. package/cli/selftune/utils/llm-call.ts +192 -0
  28. package/cli/selftune/utils/logging.ts +40 -0
  29. package/cli/selftune/utils/schema-validator.ts +47 -0
  30. package/cli/selftune/utils/seeded-random.ts +31 -0
  31. package/cli/selftune/utils/transcript.ts +260 -0
  32. package/package.json +29 -0
  33. package/skill/SKILL.md +120 -0
  34. package/skill/Workflows/Doctor.md +145 -0
  35. package/skill/Workflows/Evals.md +193 -0
  36. package/skill/Workflows/Evolve.md +159 -0
  37. package/skill/Workflows/Grade.md +157 -0
  38. package/skill/Workflows/Ingest.md +159 -0
  39. package/skill/Workflows/Initialize.md +125 -0
  40. package/skill/Workflows/Rollback.md +131 -0
  41. package/skill/Workflows/Watch.md +128 -0
  42. package/skill/references/grading-methodology.md +176 -0
  43. package/skill/references/invocation-taxonomy.md +144 -0
  44. package/skill/references/logs.md +168 -0
  45. package/skill/settings_snippet.json +41 -0
@@ -0,0 +1,332 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Codex CLI wrapper: codex-wrapper.ts
4
+ *
5
+ * Drop-in wrapper for `codex exec --json` that tees the JSONL event stream
6
+ * into our shared skill eval log format.
7
+ *
8
+ * Usage:
9
+ * bun codex-wrapper.ts --full-auto "make me a slide deck"
10
+ *
11
+ * The wrapper:
12
+ * 1. Runs `codex exec --json <your args>` as a subprocess
13
+ * 2. Streams stdout (JSONL events) to your terminal in real time
14
+ * 3. Parses events and writes to:
15
+ * ~/.claude/all_queries_log.jsonl
16
+ * ~/.claude/session_telemetry_log.jsonl
17
+ * ~/.claude/skill_usage_log.jsonl
18
+ */
19
+
20
+ import { existsSync, readdirSync, statSync } from "node:fs";
21
+ import { homedir } from "node:os";
22
+ import { join } from "node:path";
23
+ import { QUERY_LOG, SKILL_LOG, TELEMETRY_LOG } from "../constants.js";
24
+ import type { QueryLogRecord, SessionTelemetryRecord, SkillUsageRecord } from "../types.js";
25
+ import { appendJsonl } from "../utils/jsonl.js";
26
+
27
+ const CODEX_SKILLS_DIRS = [
28
+ join(process.cwd(), ".codex", "skills"),
29
+ join(homedir(), ".codex", "skills"),
30
+ ];
31
+
32
+ /** Return the set of skill names installed in Codex skill directories. */
33
+ export function findCodexSkillNames(): Set<string> {
34
+ const names = new Set<string>();
35
+ for (const dir of CODEX_SKILLS_DIRS) {
36
+ if (!existsSync(dir)) continue;
37
+ for (const entry of readdirSync(dir)) {
38
+ const skillDir = join(dir, entry);
39
+ try {
40
+ if (statSync(skillDir).isDirectory() && existsSync(join(skillDir, "SKILL.md"))) {
41
+ names.add(entry);
42
+ }
43
+ } catch {
44
+ // Skip broken symlinks or inaccessible entries
45
+ }
46
+ }
47
+ }
48
+ return names;
49
+ }
50
+
51
+ /**
52
+ * Extract the user prompt from codex exec args.
53
+ * The prompt is the last positional argument (not a flag).
54
+ */
55
+ export function extractPromptFromArgs(args: string[]): string {
56
+ const positional = args.filter((a) => !a.startsWith("-"));
57
+ return positional.length > 0 ? positional[positional.length - 1] : "";
58
+ }
59
+
60
+ export interface ParsedCodexStream {
61
+ thread_id: string;
62
+ tool_calls: Record<string, number>;
63
+ total_tool_calls: number;
64
+ bash_commands: string[];
65
+ skills_triggered: string[];
66
+ assistant_turns: number;
67
+ errors_encountered: number;
68
+ input_tokens: number;
69
+ output_tokens: number;
70
+ agent_summary: string;
71
+ transcript_chars: number;
72
+ }
73
+
74
+ /**
75
+ * Parse Codex JSONL event lines and extract telemetry.
76
+ */
77
+ export function parseJsonlStream(lines: string[], skillNames: Set<string>): ParsedCodexStream {
78
+ let threadId = "unknown";
79
+ const toolCalls: Record<string, number> = {};
80
+ const bashCommands: string[] = [];
81
+ const skillsTriggered: string[] = [];
82
+ let errors = 0;
83
+ let turns = 0;
84
+ let inputTokens = 0;
85
+ let outputTokens = 0;
86
+ const agentMessages: string[] = [];
87
+
88
+ // Precompile word-boundary regex for each skill name (avoids rebuilding per item)
89
+ const skillPatterns = Array.from(skillNames, (name) => ({
90
+ name,
91
+ pattern: new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i"),
92
+ }));
93
+
94
+ for (const rawLine of lines) {
95
+ const line = rawLine.trim();
96
+ if (!line) continue;
97
+
98
+ let event: Record<string, unknown>;
99
+ try {
100
+ event = JSON.parse(line);
101
+ } catch {
102
+ continue;
103
+ }
104
+
105
+ const etype = (event.type as string) ?? "";
106
+
107
+ if (etype === "thread.started") {
108
+ threadId = (event.thread_id as string) ?? "unknown";
109
+ } else if (etype === "turn.started") {
110
+ turns += 1;
111
+ } else if (etype === "turn.completed") {
112
+ const usage = (event.usage as Record<string, number>) ?? {};
113
+ inputTokens += usage.input_tokens ?? 0;
114
+ outputTokens += usage.output_tokens ?? 0;
115
+ } else if (etype === "turn.failed") {
116
+ errors += 1;
117
+ } else if (etype === "item.completed" || etype === "item.started" || etype === "item.updated") {
118
+ const item = (event.item as Record<string, unknown>) ?? {};
119
+ const itemType = (item.item_type as string) ?? (item.type as string) ?? "";
120
+
121
+ if (etype === "item.completed") {
122
+ if (itemType === "command_execution") {
123
+ toolCalls.command_execution = (toolCalls.command_execution ?? 0) + 1;
124
+ const cmd = ((item.command as string) ?? "").trim();
125
+ if (cmd) bashCommands.push(cmd);
126
+ if ((item.exit_code as number) !== 0 && item.exit_code !== undefined) {
127
+ errors += 1;
128
+ }
129
+ } else if (itemType === "file_change") {
130
+ toolCalls.file_change = (toolCalls.file_change ?? 0) + 1;
131
+ } else if (itemType === "mcp_tool_call") {
132
+ const toolName = (item.tool as string) ?? "unknown";
133
+ const key = `mcp:${toolName}`;
134
+ toolCalls[key] = (toolCalls[key] ?? 0) + 1;
135
+ } else if (itemType === "web_search") {
136
+ toolCalls.web_search = (toolCalls.web_search ?? 0) + 1;
137
+ } else if (itemType === "agent_message") {
138
+ const text = (item.text as string) ?? "";
139
+ if (text) agentMessages.push(text.slice(0, 500));
140
+ } else if (itemType === "reasoning") {
141
+ toolCalls.reasoning = (toolCalls.reasoning ?? 0) + 1;
142
+ }
143
+ }
144
+
145
+ // Detect skill names in text on completed events (whole-word match)
146
+ const textContent = ((item.text as string) ?? "") + ((item.command as string) ?? "");
147
+ for (const { name: sName, pattern } of skillPatterns) {
148
+ if (
149
+ etype === "item.completed" &&
150
+ !skillsTriggered.includes(sName) &&
151
+ pattern.test(textContent)
152
+ ) {
153
+ skillsTriggered.push(sName);
154
+ }
155
+ }
156
+ } else if (etype === "error") {
157
+ errors += 1;
158
+ }
159
+ }
160
+
161
+ return {
162
+ thread_id: threadId,
163
+ tool_calls: toolCalls,
164
+ total_tool_calls: Object.values(toolCalls).reduce((a, b) => a + b, 0),
165
+ bash_commands: bashCommands,
166
+ skills_triggered: skillsTriggered,
167
+ assistant_turns: turns,
168
+ errors_encountered: errors,
169
+ input_tokens: inputTokens,
170
+ output_tokens: outputTokens,
171
+ agent_summary: agentMessages.slice(0, 3).join(" | "),
172
+ transcript_chars: lines.reduce((sum, l) => sum + l.length, 0),
173
+ };
174
+ }
175
+
176
+ /** Append the user prompt to all_queries_log.jsonl. */
177
+ export function logQuery(prompt: string, sessionId: string, logPath: string = QUERY_LOG): void {
178
+ if (!prompt || prompt.length < 4) return;
179
+ const record: QueryLogRecord = {
180
+ timestamp: new Date().toISOString(),
181
+ session_id: sessionId,
182
+ query: prompt,
183
+ source: "codex",
184
+ };
185
+ appendJsonl(logPath, record);
186
+ }
187
+
188
+ /** Append session metrics to session_telemetry_log.jsonl. */
189
+ export function logTelemetry(
190
+ metrics: Omit<ParsedCodexStream, "thread_id">,
191
+ prompt: string,
192
+ sessionId: string,
193
+ cwd: string,
194
+ logPath: string = TELEMETRY_LOG,
195
+ ): void {
196
+ const record: SessionTelemetryRecord = {
197
+ timestamp: new Date().toISOString(),
198
+ session_id: sessionId,
199
+ cwd,
200
+ transcript_path: "",
201
+ last_user_query: prompt,
202
+ source: "codex",
203
+ ...metrics,
204
+ };
205
+ appendJsonl(logPath, record);
206
+ }
207
+
208
+ /** Append a skill trigger to skill_usage_log.jsonl. */
209
+ export function logSkillTrigger(
210
+ skillName: string,
211
+ prompt: string,
212
+ sessionId: string,
213
+ logPath: string = SKILL_LOG,
214
+ ): void {
215
+ const record: SkillUsageRecord = {
216
+ timestamp: new Date().toISOString(),
217
+ session_id: sessionId,
218
+ skill_name: skillName,
219
+ skill_path: `(codex:${skillName})`,
220
+ query: prompt,
221
+ triggered: true,
222
+ source: "codex",
223
+ };
224
+ appendJsonl(logPath, record);
225
+ }
226
+
227
+ // --- CLI main ---
228
+ export async function cliMain(): Promise<void> {
229
+ const extraArgs = process.argv.slice(2);
230
+
231
+ if (extraArgs.length === 0) {
232
+ process.stderr.write("Usage: codex-wrapper.ts [codex exec flags] <prompt>\n");
233
+ process.stderr.write(" Wraps `codex exec --json` and logs skill eval telemetry.\n");
234
+ process.exit(1);
235
+ }
236
+
237
+ const prompt = extractPromptFromArgs(extraArgs);
238
+ const skillNames = findCodexSkillNames();
239
+ const cwd = process.cwd();
240
+
241
+ // Build the codex command -- always add --json
242
+ let cmd = ["codex", "exec", "--json", ...extraArgs];
243
+
244
+ // Deduplicate --json
245
+ const seen = new Set<string>();
246
+ const deduped: string[] = [];
247
+ for (const c of cmd) {
248
+ if (c === "--json" && seen.has("--json")) continue;
249
+ deduped.push(c);
250
+ seen.add(c);
251
+ }
252
+ cmd = deduped;
253
+
254
+ const collectedLines: string[] = [];
255
+ let threadId = "unknown";
256
+
257
+ try {
258
+ const proc = Bun.spawn(cmd, {
259
+ stdout: "pipe",
260
+ stderr: "inherit",
261
+ });
262
+
263
+ const reader = proc.stdout.getReader();
264
+ const decoder = new TextDecoder();
265
+ let buffer = "";
266
+
267
+ while (true) {
268
+ const { done, value } = await reader.read();
269
+ if (done) break;
270
+ const chunk = decoder.decode(value, { stream: true });
271
+ process.stdout.write(chunk);
272
+ buffer += chunk;
273
+
274
+ // Process complete lines
275
+ const parts = buffer.split("\n");
276
+ buffer = parts.pop() ?? "";
277
+ for (const line of parts) {
278
+ const trimmed = line.trim();
279
+ if (trimmed) {
280
+ collectedLines.push(trimmed);
281
+ try {
282
+ const ev = JSON.parse(trimmed);
283
+ if (ev.type === "thread.started") {
284
+ threadId = ev.thread_id ?? "unknown";
285
+ }
286
+ } catch {
287
+ // skip
288
+ }
289
+ }
290
+ }
291
+ }
292
+
293
+ // Process remaining buffer
294
+ if (buffer.trim()) {
295
+ collectedLines.push(buffer.trim());
296
+ }
297
+
298
+ await proc.exited;
299
+
300
+ // Parse and log
301
+ const metrics = parseJsonlStream(collectedLines, skillNames);
302
+ const actualThreadId = metrics.thread_id;
303
+ const sessionId = actualThreadId !== "unknown" ? actualThreadId : threadId;
304
+
305
+ const { thread_id: _, ...metricsWithoutThread } = metrics;
306
+
307
+ logQuery(prompt, sessionId);
308
+ logTelemetry(metricsWithoutThread, prompt, sessionId, cwd);
309
+
310
+ for (const skillName of metrics.skills_triggered) {
311
+ logSkillTrigger(skillName, prompt, sessionId);
312
+ }
313
+
314
+ process.exit(proc.exitCode ?? 0);
315
+ } catch (e) {
316
+ if (e instanceof Error && e.message.includes("ENOENT")) {
317
+ process.stderr.write(
318
+ "[codex-wrapper] Error: `codex` not found in PATH. Is Codex CLI installed?\n",
319
+ );
320
+ process.exit(1);
321
+ }
322
+ throw e;
323
+ }
324
+ }
325
+
326
+ // Run main if executed directly
327
+ if (import.meta.main) {
328
+ cliMain().catch((err) => {
329
+ console.error(err);
330
+ process.exit(1);
331
+ });
332
+ }