osborn 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ /**
2
+ * Prompt-review CLI for Osborn dev-logged sessions.
3
+ *
4
+ * Workflow:
5
+ * 1. Find the newest captured session under `.osborn/dev-logs/`
6
+ * (or accept a path arg).
7
+ * 2. Extract voice mode + provider from the log via regex.
8
+ * 3. Dynamic-import the current prompts from `../src/prompts.ts` so the
9
+ * reviewer sees the LATEST version of each prompt (not a snapshot).
10
+ * 4. Extract the pipeline fast-brain `buildSystemPrompt` function source
11
+ * from `../src/pipeline-fastbrain.ts` (which is module-private and
12
+ * cannot be dynamic-imported as a value).
13
+ * 5. Build a markdown review brief: session summary + raw log + active
14
+ * prompts + review task instructions.
15
+ * 6. Size-check — if too large for argv, write the brief to a file and
16
+ * pass a pointer instead.
17
+ * 7. Spawn `claude` CLI with `stdio: 'inherit'` and `--add-dir agent/src`
18
+ * so the reviewing Claude can Read / Grep / Edit the prompt files.
19
+ *
20
+ * Usage:
21
+ * npm run review # review the latest log
22
+ * npm run review <path-to-log-file> # review a specific log
23
+ *
24
+ * This script is OUT-OF-LOOP from the agent — removing it has zero impact
25
+ * on runtime behavior. Pair with `npm run dev:logged` (scripts/dev-logged.ts)
26
+ * to capture sessions.
27
+ */
28
+
29
+ import { spawn } from 'node:child_process'
30
+ import {
31
+ readFileSync,
32
+ readdirSync,
33
+ statSync,
34
+ existsSync,
35
+ writeFileSync,
36
+ } from 'node:fs'
37
+ import { join, dirname, resolve } from 'node:path'
38
+ import { fileURLToPath, pathToFileURL } from 'node:url'
39
+
40
+ // ============================================================================
41
+ // Paths
42
+ // ============================================================================
43
+
44
+ const __dirname = dirname(fileURLToPath(import.meta.url))
45
+ // scripts/ is sibling to src/, both under agent/
46
+ const agentDir = resolve(__dirname, '..')
47
+ const agentSrcDir = join(agentDir, 'src')
48
+ const logDir = join(agentDir, '.osborn', 'dev-logs')
49
+
50
+ // ============================================================================
51
+ // Step 1 — find the log file
52
+ // ============================================================================
53
+
54
+ function findLatestLog(): string | null {
55
+ if (!existsSync(logDir)) return null
56
+ const files = readdirSync(logDir)
57
+ .filter((f) => f.endsWith('.log'))
58
+ .map((f) => {
59
+ const p = join(logDir, f)
60
+ return { name: f, path: p, mtime: statSync(p).mtimeMs }
61
+ })
62
+ .sort((a, b) => b.mtime - a.mtime)
63
+ return files.length > 0 ? files[0].path : null
64
+ }
65
+
66
+ const argLogPath = process.argv[2]
67
+ const logPath = argLogPath ? resolve(argLogPath) : findLatestLog()
68
+
69
+ if (!logPath) {
70
+ console.error('❌ No dev log found.')
71
+ console.error(` Run \`npm run dev:logged\` to capture a session first.`)
72
+ console.error(` Log dir: ${logDir}`)
73
+ process.exit(1)
74
+ }
75
+
76
+ if (!existsSync(logPath)) {
77
+ console.error(`❌ Log file not found: ${logPath}`)
78
+ process.exit(1)
79
+ }
80
+
81
+ console.log(`📖 Reading ${logPath}`)
82
+ const logContent = readFileSync(logPath, 'utf-8')
83
+
84
+ // ============================================================================
85
+ // Step 2 — extract voice mode, provider, working dir
86
+ // ============================================================================
87
+
88
+ const voiceModeFromMeta = logContent.match(/🎙️ Using voice mode from frontend: (\w+)/)
89
+ const voiceModeFromMarker = logContent.match(/🎯 (DIRECT|PIPELINE|REALTIME) MODE/i)
90
+ const voiceMode =
91
+ voiceModeFromMeta?.[1]?.toLowerCase() ??
92
+ voiceModeFromMarker?.[1]?.toLowerCase() ??
93
+ 'pipeline' // safe default — pipeline is the default mode
94
+
95
+ const providerMatch = logContent.match(/🎙️ Using provider from frontend: (\w+)/)
96
+ const provider = providerMatch?.[1] ?? null
97
+
98
+ const workingDirFromStart = logContent.match(/📂 Working directory \(cwd\): ([^\n]+)/)
99
+ const workingDirFromFrontend = logContent.match(/📂 Working directory from frontend: ([^\n]+)/)
100
+ const workingDir =
101
+ workingDirFromFrontend?.[1]?.trim() ??
102
+ workingDirFromStart?.[1]?.trim() ??
103
+ process.cwd()
104
+
105
+ const userTurnCount =
106
+ (logContent.match(/📝 User \(/g)?.length ?? 0) +
107
+ (logContent.match(/📝 Text \(/g)?.length ?? 0) +
108
+ (logContent.match(/📥 \[pipeline\] chat\(\) call/g)?.length ?? 0)
109
+
110
+ console.log(` Mode: ${voiceMode}, Provider: ${provider ?? 'unknown'}, User turns: ~${userTurnCount}`)
111
+
112
+ // ============================================================================
113
+ // Step 3 — dynamic-import current prompts
114
+ // ============================================================================
115
+
116
+ interface PromptSection {
117
+ label: string
118
+ text: string
119
+ }
120
+
121
+ async function loadPrompts(mode: string, wd: string): Promise<PromptSection[]> {
122
+ const sections: PromptSection[] = []
123
+
124
+ const promptsPath = join(agentSrcDir, 'prompts.ts')
125
+ if (!existsSync(promptsPath)) {
126
+ console.error(`❌ Cannot find ${promptsPath}`)
127
+ return sections
128
+ }
129
+
130
+ // tsx ESM loader handles .ts imports at runtime. Use file:// URL to be safe
131
+ // across platforms.
132
+ const promptsModule: any = await import(pathToFileURL(promptsPath).href)
133
+
134
+ if (mode === 'direct') {
135
+ sections.push({
136
+ label: 'Direct Mode Research Prompt (Claude SDK — agent/src/prompts.ts: getDirectModeResearchPrompt)',
137
+ text: promptsModule.getDirectModeResearchPrompt?.(wd) ?? '(export not found)',
138
+ })
139
+ } else if (mode === 'pipeline') {
140
+ sections.push({
141
+ label: 'Research System Prompt (Claude SDK — agent/src/prompts.ts: getResearchSystemPrompt)',
142
+ text: promptsModule.getResearchSystemPrompt?.(wd) ?? '(export not found)',
143
+ })
144
+ // Pipeline fast brain system prompt — see extractPipelineFastBrainSource below.
145
+ const pfbSource = extractPipelineFastBrainSource()
146
+ sections.push({
147
+ label: 'Pipeline Fast Brain — buildSystemPrompt function source (agent/src/pipeline-fastbrain.ts)',
148
+ text: pfbSource ?? '(extraction failed — pipeline-fastbrain.ts format may have changed)',
149
+ })
150
+ } else if (mode === 'realtime') {
151
+ sections.push({
152
+ label: 'Realtime Voice Model Instructions (agent/src/prompts.ts: getRealtimeInstructions)',
153
+ text: promptsModule.getRealtimeInstructions?.(wd) ?? '(export not found)',
154
+ })
155
+ sections.push({
156
+ label: 'Research System Prompt (Claude SDK sub-research — agent/src/prompts.ts: getResearchSystemPrompt)',
157
+ text: promptsModule.getResearchSystemPrompt?.(wd) ?? '(export not found)',
158
+ })
159
+ sections.push({
160
+ label: 'Fast Brain System Prompt (agent/src/prompts.ts: FAST_BRAIN_SYSTEM_PROMPT)',
161
+ text: promptsModule.FAST_BRAIN_SYSTEM_PROMPT ?? '(export not found)',
162
+ })
163
+ }
164
+
165
+ return sections
166
+ }
167
+
168
+ // ============================================================================
169
+ // buildSystemPrompt extractor (brace-match, since it's not exported)
170
+ // ============================================================================
171
+
172
+ function extractPipelineFastBrainSource(): string | null {
173
+ const pfbPath = join(agentSrcDir, 'pipeline-fastbrain.ts')
174
+ if (!existsSync(pfbPath)) return null
175
+ const src = readFileSync(pfbPath, 'utf-8')
176
+
177
+ const startIdx = src.indexOf('function buildSystemPrompt')
178
+ if (startIdx === -1) return null
179
+
180
+ // Step 1: find the opening PAREN of the parameter list.
181
+ const openParenIdx = src.indexOf('(', startIdx)
182
+ if (openParenIdx === -1) return null
183
+
184
+ // Step 2: paren-match to find the closing paren. We MUST skip past the
185
+ // parameter list before looking for the function body's opening brace,
186
+ // because TypeScript parameter type annotations contain braces
187
+ // (e.g. `chatHistory?: { role: string; content: string }[]`). A naive
188
+ // brace walker would latch onto the first `{` inside the param list,
189
+ // decrement on its matching `}`, and return a 2-line "function body"
190
+ // that's just the signature prefix.
191
+ let parenDepth = 1
192
+ let i = openParenIdx + 1
193
+ while (i < src.length && parenDepth > 0) {
194
+ const ch = src[i]
195
+ if (ch === '(') parenDepth++
196
+ else if (ch === ')') parenDepth--
197
+ i++
198
+ }
199
+ if (parenDepth !== 0) return null
200
+ const afterParamsIdx = i // position just after the closing `)`
201
+
202
+ // Step 3: find the function body's opening brace AFTER the parameter list.
203
+ // This skips any optional `: ReturnType` annotation between `)` and `{`.
204
+ const openBraceIdx = src.indexOf('{', afterParamsIdx)
205
+ if (openBraceIdx === -1) return null
206
+
207
+ // Step 4: brace-match the function body. Doesn't handle braces in strings
208
+ // or comments, which is fine for the current source (template literals
209
+ // with `${...}` interpolations are balanced).
210
+ let braceDepth = 1
211
+ i = openBraceIdx + 1
212
+ while (i < src.length && braceDepth > 0) {
213
+ const ch = src[i]
214
+ if (ch === '{') braceDepth++
215
+ else if (ch === '}') braceDepth--
216
+ i++
217
+ }
218
+ if (braceDepth !== 0) return null
219
+
220
+ return src.substring(startIdx, i)
221
+ }
222
+
223
+ // ============================================================================
224
+ // Step 4 — build markdown review brief
225
+ // ============================================================================
226
+
227
+ const prompts = await loadPrompts(voiceMode, workingDir)
228
+ const timestamp = new Date().toISOString()
229
+
230
+ const promptSections = prompts
231
+ .map((p) => `### ${p.label}\n\n\`\`\`\n${p.text}\n\`\`\``)
232
+ .join('\n\n')
233
+
234
+ const brief = `# Osborn Prompt Review
235
+
236
+ ## Session summary
237
+
238
+ - **Voice mode**: ${voiceMode}
239
+ - **Provider**: ${provider ?? 'unknown'}
240
+ - **Working dir**: ${workingDir}
241
+ - **User turns (approx)**: ${userTurnCount}
242
+ - **Log path**: ${logPath}
243
+ - **Review generated**: ${timestamp}
244
+
245
+ ## Raw session log
246
+
247
+ The following is the full untruncated terminal output from an \`npm run dev:logged\` session with Osborn, a voice AI research assistant. Pattern guide:
248
+
249
+ - \`📝 User (conv_item, N chars): "..."\` / \`📝 Text (N chars): "..."\` — what the user said (voice) or typed (data channel)
250
+ - \`📥 [pipeline] chat() call #N (chars): "..."\` — pipeline-mode user turn entering the Claude SDK
251
+ - \`💬 Agent (conv_item, N chars): "..."\` — what the agent said back via the voice model
252
+ - \`💬 Claude text (N chars): ...\` / \`📋 Claude result (N chars): ...\` — Claude SDK streaming output / final result
253
+ - \`🧠⚡ [FAST_BRAIN TYPE +Nms]: "..."\` / \`🧠⚡ [pipeline-fb] AFC: ...\` — fast brain classifications / responses
254
+ - \`🔧 Claude: X\` / \`✅ Done: X\` — tool calls
255
+ - \`🎙️ / 🎯\` — voice mode + provider markers
256
+
257
+ \`\`\`
258
+ ${logContent}
259
+ \`\`\`
260
+
261
+ ## Active prompts (at review time)
262
+
263
+ These are the prompts currently in \`agent/src/prompts.ts\` (and the function source from \`agent/src/pipeline-fastbrain.ts\` for pipeline mode) that shaped behavior during this \`${voiceMode}\`-mode session. They are loaded dynamically at review time, so they reflect the CURRENT state of the files — not a snapshot.
264
+
265
+ ${promptSections}
266
+
267
+ ## Review task
268
+
269
+ **This is a design audit, not a rule-checking pass.** Reading each prompt section in isolation and grepping the log for violations is the wrong approach and produces shallow, checklist-style output. The prompts are a system — they interlock, depend on each other, and only make sense in the context of what the user is trying to build. Your job is to understand that system as a whole, share your understanding with the user, and THEN grade the session against it.
270
+
271
+ Do this in phases, in order. Do not skip ahead.
272
+
273
+ ### Phase 1 — Build a mental model of the system
274
+
275
+ Before you touch the session log, understand Osborn as a system end-to-end.
276
+
277
+ 1. **Read every prompt section above in full.** Don't skim. Notice how each prompt assumes things about what the other layers do.
278
+ 2. **Read \`agent/CLAUDE.md\`** via the Read tool. It describes the voice mode architecture (direct / pipeline / realtime), sub-agent orchestration (researcher / reasoner / writer), the fast brain middle tier, and the layered responsibility model. The prompts only make sense in this context.
279
+ 3. **Grep briefly** for how prompts are selected at runtime: look at \`agent/src/claude-llm.ts\` around line 880-920 and \`agent/src/index.ts\` around the fast brain routing. Understand how a turn actually flows through the layers — not just the static prompt text.
280
+
281
+ Then write down, as the first content of your response to the user, an explicit mental model:
282
+ - What does a user turn flow look like end-to-end? (STT → fast brain classification → Claude SDK orchestrator → sub-agents or direct response → TTS back to the user)
283
+ - What's each prompt's intended role? How do they interlock?
284
+ - What would an "ideal" turn look like, if everything worked perfectly?
285
+
286
+ ### Phase 2 — Develop a theory of user intent
287
+
288
+ Based on the session log AND your mental model, form a short theory: **what does THIS user want Osborn to be?** What's their ideal experience? Read between the lines of how they talk to the agent, what they interrupt, what they repeat, what they correct. 2-3 sentences max.
289
+
290
+ ### Phase 3 — STOP and confirm with the user before grading
291
+
292
+ Show the user your mental model and your theory of their intent. Ask: "Does this match how you think about it? Anything I'm missing or getting wrong?" **Wait for a response.** Do not proceed to auditing until the user has confirmed or corrected your understanding.
293
+
294
+ This is the most important phase. Skip it and you'll be grading the session against your assumptions instead of the user's. The whole point of this review is to have a conversation about the design, not to hand-deliver a list of violations.
295
+
296
+ ### Phase 4 — Walk the log through the confirmed model
297
+
298
+ Only now, read the raw session log turn by turn. For each meaningful turn, ask three questions in this exact order:
299
+ - **What should have happened here**, given the system as designed and the user's confirmed intent?
300
+ - **What actually happened?**
301
+ - **Why the gap?**
302
+
303
+ Categorize each gap by root cause:
304
+ - **Expression gap** — the intent is present in a prompt but worded weakly. Fix: tighten wording.
305
+ - **Omission gap** — the intent isn't in any prompt at all. Fix: add new guidance, considering where it belongs.
306
+ - **Conflict gap** — two prompts contradict each other or leave a gap between them. Fix: reconcile.
307
+ - **Architecture gap** — the system design can't actually produce the intended behavior. Fix: flag and discuss. Do NOT patch a prompt to work around an architectural problem.
308
+ - **Model miss** — the prompt says it clearly and the model ignored it. Fix: usually can't be fixed with tighter wording; flag and move on.
309
+
310
+ ### Phase 5 — Propose changes holistically, one at a time
311
+
312
+ Do NOT batch-propose a list of six edits. Each proposed change should be brought to the user individually with:
313
+ 1. The gap you're trying to close and its category (from Phase 4)
314
+ 2. The exact before/after prompt text
315
+ 3. How the change interacts with OTHER prompt sections — does it conflict with anything? Does it create a new gap? Is it addressing root cause or a symptom?
316
+ 4. A pause for user confirmation before you use Edit
317
+
318
+ ### Phase 6 — Apply edits
319
+
320
+ Only after the user has explicitly agreed to a specific change, use the Edit tool on:
321
+ - \`${agentSrcDir}/prompts.ts\`
322
+ - \`${agentSrcDir}/pipeline-fastbrain.ts\`
323
+
324
+ ---
325
+
326
+ **Start with Phase 1 right now.** Don't jump to the log. Don't start summarizing the session. The very first thing in your response should be your mental model of the Osborn system as you understand it after reading the prompts, CLAUDE.md, and the relevant source.
327
+ `
328
+
329
+ // ============================================================================
330
+ // Step 5 — write brief to file, print manual fallback, spawn claude with pointer
331
+ // ============================================================================
332
+
333
+ // Always write the brief to a file. Passing a 50KB brief as argv has several
334
+ // problems:
335
+ // - `claude "query"` in Claude Code CLI is NON-INTERACTIVE print mode, not
336
+ // interactive-with-initial-message. A huge positional arg makes claude
337
+ // print and exit, not open a chat.
338
+ // - Argv size limits (ARG_MAX) are platform-dependent and the `npm → tsx →
339
+ // spawn` chain adds layers that can fail silently on large args.
340
+ // - Shell-quoting a 50KB multi-line markdown string is fragile.
341
+ // File + short pointer is dramatically more reliable.
342
+ const briefTs = new Date().toISOString().replace(/[-:T.]/g, '').slice(0, 14)
343
+ const briefPath = join(logDir, `review-brief-${briefTs}.md`)
344
+ writeFileSync(briefPath, brief, 'utf-8')
345
+ const briefKB = Math.round(Buffer.byteLength(brief, 'utf-8') / 1024)
346
+ console.log(`📄 Brief written: ${briefPath}`)
347
+ console.log(` Size: ${briefKB}KB`)
348
+
349
+ // Dry-run mode: print the brief and exit cleanly without spawning claude.
350
+ // Useful for testing + previewing the brief. Set REVIEW_DRY_RUN=1 to activate.
351
+ if (process.env.REVIEW_DRY_RUN === '1') {
352
+ console.log('\n=== REVIEW_DRY_RUN=1 — brief below (claude NOT spawned) ===\n')
353
+ console.log(brief)
354
+ console.log('\n=== end of brief ===')
355
+ process.exit(0)
356
+ }
357
+
358
+ // Short pointer message for claude's initial argv. Tells claude to open the
359
+ // brief file and follow the review task instructions. Small enough to avoid
360
+ // all the argv / CLI-mode pitfalls above.
361
+ const pointerMessage =
362
+ `I've captured a prompt-review brief for an Osborn voice session at ${briefPath}. ` +
363
+ `Please Read that file in full and follow the "Review task" instructions at the bottom.`
364
+
365
+ // Print manual fallback command — if the auto-spawn below doesn't land
366
+ // cleanly (terminal state weirdness, claude CLI version differences, etc.),
367
+ // the user can copy-paste this into a fresh terminal.
368
+ console.log(``)
369
+ console.log(`─────────────────────────────────────────────────────────────`)
370
+ console.log(`If the auto-launch doesn't work, run these commands manually:`)
371
+ console.log(``)
372
+ console.log(` cd ${agentDir}`)
373
+ console.log(` claude --add-dir ${agentSrcDir}`)
374
+ console.log(``)
375
+ console.log(`Then inside the claude session, type or paste:`)
376
+ console.log(``)
377
+ console.log(` Read ${briefPath} and follow the "Review task" at the bottom.`)
378
+ console.log(`─────────────────────────────────────────────────────────────`)
379
+ console.log(``)
380
+
381
+ // Allow skipping the auto-launch entirely — useful when the user prefers
382
+ // manual control, or when nesting under tsx/npm causes stdio weirdness.
383
+ if (process.env.REVIEW_NO_LAUNCH === '1') {
384
+ console.log(`REVIEW_NO_LAUNCH=1 — not auto-launching claude. Use the manual commands above.`)
385
+ process.exit(0)
386
+ }
387
+
388
+ console.log(`🚀 Auto-launching: claude --add-dir ${agentSrcDir} "<pointer message>"`)
389
+ console.log(` (set REVIEW_NO_LAUNCH=1 to skip auto-launch next time)`)
390
+ console.log(``)
391
+
392
+ try {
393
+ const claude = spawn('claude', ['--add-dir', agentSrcDir, pointerMessage], {
394
+ stdio: 'inherit',
395
+ cwd: agentDir,
396
+ })
397
+
398
+ console.log(` claude PID: ${claude.pid}\n`)
399
+
400
+ claude.on('error', (err: any) => {
401
+ if (err.code === 'ENOENT') {
402
+ console.error(`\n❌ \`claude\` CLI not found on PATH.`)
403
+ console.error(` Install with: npm install -g @anthropic-ai/claude-code`)
404
+ console.error(` Or use the manual commands printed above.`)
405
+ process.exit(1)
406
+ }
407
+ console.error(`\n❌ Failed to spawn claude: ${err.message}`)
408
+ console.error(` Try the manual commands printed above.`)
409
+ process.exit(1)
410
+ })
411
+
412
+ claude.on('exit', (code, signal) => {
413
+ if (code !== 0 || signal) {
414
+ console.log(`\n📝 claude exited (code=${code ?? 'null'} signal=${signal ?? 'null'})`)
415
+ if (code !== 0 && signal === null) {
416
+ console.log(` If the session didn't render properly, try the manual commands above.`)
417
+ }
418
+ }
419
+ process.exit(code ?? 0)
420
+ })
421
+ } catch (err) {
422
+ console.error(`❌ Unexpected error: ${(err as Error).message}`)
423
+ console.error(` Try the manual commands printed above.`)
424
+ process.exit(1)
425
+ }