osborn 0.8.6 → 0.8.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,425 @@
1
+ /**
2
+ * Prompt-review CLI for Osborn dev-logged sessions.
3
+ *
4
+ * Workflow:
5
+ * 1. Find the newest captured session under `.osborn/dev-logs/`
6
+ * (or accept a path arg).
7
+ * 2. Extract voice mode + provider from the log via regex.
8
+ * 3. Dynamic-import the current prompts from `../src/prompts.ts` so the
9
+ * reviewer sees the LATEST version of each prompt (not a snapshot).
10
+ * 4. Extract the pipeline fast-brain `buildSystemPrompt` function source
11
+ * from `../src/pipeline-fastbrain.ts` (which is module-private and
12
+ * cannot be dynamic-imported as a value).
13
+ * 5. Build a markdown review brief: session summary + raw log + active
14
+ * prompts + review task instructions.
15
+ * 6. Size-check — if too large for argv, write the brief to a file and
16
+ * pass a pointer instead.
17
+ * 7. Spawn `claude` CLI with `stdio: 'inherit'` and `--add-dir agent/src`
18
+ * so the reviewing Claude can Read / Grep / Edit the prompt files.
19
+ *
20
+ * Usage:
21
+ * npm run review # review the latest log
22
+ * npm run review <path-to-log-file> # review a specific log
23
+ *
24
+ * This script is OUT-OF-LOOP from the agent — removing it has zero impact
25
+ * on runtime behavior. Pair with `npm run dev:logged` (scripts/dev-logged.ts)
26
+ * to capture sessions.
27
+ */
28
+
29
+ import { spawn } from 'node:child_process'
30
+ import {
31
+ readFileSync,
32
+ readdirSync,
33
+ statSync,
34
+ existsSync,
35
+ writeFileSync,
36
+ } from 'node:fs'
37
+ import { join, dirname, resolve } from 'node:path'
38
+ import { fileURLToPath, pathToFileURL } from 'node:url'
39
+
40
+ // ============================================================================
41
+ // Paths
42
+ // ============================================================================
43
+
44
+ const __dirname = dirname(fileURLToPath(import.meta.url))
45
+ // scripts/ is sibling to src/, both under agent/
46
+ const agentDir = resolve(__dirname, '..')
47
+ const agentSrcDir = join(agentDir, 'src')
48
+ const logDir = join(agentDir, '.osborn', 'dev-logs')
49
+
50
+ // ============================================================================
51
+ // Step 1 — find the log file
52
+ // ============================================================================
53
+
54
+ function findLatestLog(): string | null {
55
+ if (!existsSync(logDir)) return null
56
+ const files = readdirSync(logDir)
57
+ .filter((f) => f.endsWith('.log'))
58
+ .map((f) => {
59
+ const p = join(logDir, f)
60
+ return { name: f, path: p, mtime: statSync(p).mtimeMs }
61
+ })
62
+ .sort((a, b) => b.mtime - a.mtime)
63
+ return files.length > 0 ? files[0].path : null
64
+ }
65
+
66
+ const argLogPath = process.argv[2]
67
+ const logPath = argLogPath ? resolve(argLogPath) : findLatestLog()
68
+
69
+ if (!logPath) {
70
+ console.error('❌ No dev log found.')
71
+ console.error(` Run \`npm run dev:logged\` to capture a session first.`)
72
+ console.error(` Log dir: ${logDir}`)
73
+ process.exit(1)
74
+ }
75
+
76
+ if (!existsSync(logPath)) {
77
+ console.error(`❌ Log file not found: ${logPath}`)
78
+ process.exit(1)
79
+ }
80
+
81
+ console.log(`📖 Reading ${logPath}`)
82
+ const logContent = readFileSync(logPath, 'utf-8')
83
+
84
+ // ============================================================================
85
+ // Step 2 — extract voice mode, provider, working dir
86
+ // ============================================================================
87
+
88
+ const voiceModeFromMeta = logContent.match(/🎙️ Using voice mode from frontend: (\w+)/)
89
+ const voiceModeFromMarker = logContent.match(/🎯 (DIRECT|PIPELINE|REALTIME) MODE/i)
90
+ const voiceMode =
91
+ voiceModeFromMeta?.[1]?.toLowerCase() ??
92
+ voiceModeFromMarker?.[1]?.toLowerCase() ??
93
+ 'pipeline' // safe default — pipeline is the default mode
94
+
95
+ const providerMatch = logContent.match(/🎙️ Using provider from frontend: (\w+)/)
96
+ const provider = providerMatch?.[1] ?? null
97
+
98
+ const workingDirFromStart = logContent.match(/📂 Working directory \(cwd\): ([^\n]+)/)
99
+ const workingDirFromFrontend = logContent.match(/📂 Working directory from frontend: ([^\n]+)/)
100
+ const workingDir =
101
+ workingDirFromFrontend?.[1]?.trim() ??
102
+ workingDirFromStart?.[1]?.trim() ??
103
+ process.cwd()
104
+
105
+ const userTurnCount =
106
+ (logContent.match(/📝 User \(/g)?.length ?? 0) +
107
+ (logContent.match(/📝 Text \(/g)?.length ?? 0) +
108
+ (logContent.match(/📥 \[pipeline\] chat\(\) call/g)?.length ?? 0)
109
+
110
+ console.log(` Mode: ${voiceMode}, Provider: ${provider ?? 'unknown'}, User turns: ~${userTurnCount}`)
111
+
112
+ // ============================================================================
113
+ // Step 3 — dynamic-import current prompts
114
+ // ============================================================================
115
+
116
+ interface PromptSection {
117
+ label: string
118
+ text: string
119
+ }
120
+
121
+ async function loadPrompts(mode: string, wd: string): Promise<PromptSection[]> {
122
+ const sections: PromptSection[] = []
123
+
124
+ const promptsPath = join(agentSrcDir, 'prompts.ts')
125
+ if (!existsSync(promptsPath)) {
126
+ console.error(`❌ Cannot find ${promptsPath}`)
127
+ return sections
128
+ }
129
+
130
+ // tsx ESM loader handles .ts imports at runtime. Use file:// URL to be safe
131
+ // across platforms.
132
+ const promptsModule: any = await import(pathToFileURL(promptsPath).href)
133
+
134
+ if (mode === 'direct') {
135
+ sections.push({
136
+ label: 'Direct Mode Research Prompt (Claude SDK — agent/src/prompts.ts: getDirectModeResearchPrompt)',
137
+ text: promptsModule.getDirectModeResearchPrompt?.(wd) ?? '(export not found)',
138
+ })
139
+ } else if (mode === 'pipeline') {
140
+ sections.push({
141
+ label: 'Research System Prompt (Claude SDK — agent/src/prompts.ts: getResearchSystemPrompt)',
142
+ text: promptsModule.getResearchSystemPrompt?.(wd) ?? '(export not found)',
143
+ })
144
+ // Pipeline fast brain system prompt — see extractPipelineFastBrainSource below.
145
+ const pfbSource = extractPipelineFastBrainSource()
146
+ sections.push({
147
+ label: 'Pipeline Fast Brain — buildSystemPrompt function source (agent/src/pipeline-fastbrain.ts)',
148
+ text: pfbSource ?? '(extraction failed — pipeline-fastbrain.ts format may have changed)',
149
+ })
150
+ } else if (mode === 'realtime') {
151
+ sections.push({
152
+ label: 'Realtime Voice Model Instructions (agent/src/prompts.ts: getRealtimeInstructions)',
153
+ text: promptsModule.getRealtimeInstructions?.(wd) ?? '(export not found)',
154
+ })
155
+ sections.push({
156
+ label: 'Research System Prompt (Claude SDK sub-research — agent/src/prompts.ts: getResearchSystemPrompt)',
157
+ text: promptsModule.getResearchSystemPrompt?.(wd) ?? '(export not found)',
158
+ })
159
+ sections.push({
160
+ label: 'Fast Brain System Prompt (agent/src/prompts.ts: FAST_BRAIN_SYSTEM_PROMPT)',
161
+ text: promptsModule.FAST_BRAIN_SYSTEM_PROMPT ?? '(export not found)',
162
+ })
163
+ }
164
+
165
+ return sections
166
+ }
167
+
168
+ // ============================================================================
169
+ // buildSystemPrompt extractor (brace-match, since it's not exported)
170
+ // ============================================================================
171
+
172
+ function extractPipelineFastBrainSource(): string | null {
173
+ const pfbPath = join(agentSrcDir, 'pipeline-fastbrain.ts')
174
+ if (!existsSync(pfbPath)) return null
175
+ const src = readFileSync(pfbPath, 'utf-8')
176
+
177
+ const startIdx = src.indexOf('function buildSystemPrompt')
178
+ if (startIdx === -1) return null
179
+
180
+ // Step 1: find the opening PAREN of the parameter list.
181
+ const openParenIdx = src.indexOf('(', startIdx)
182
+ if (openParenIdx === -1) return null
183
+
184
+ // Step 2: paren-match to find the closing paren. We MUST skip past the
185
+ // parameter list before looking for the function body's opening brace,
186
+ // because TypeScript parameter type annotations contain braces
187
+ // (e.g. `chatHistory?: { role: string; content: string }[]`). A naive
188
+ // brace walker would latch onto the first `{` inside the param list,
189
+ // decrement on its matching `}`, and return a 2-line "function body"
190
+ // that's just the signature prefix.
191
+ let parenDepth = 1
192
+ let i = openParenIdx + 1
193
+ while (i < src.length && parenDepth > 0) {
194
+ const ch = src[i]
195
+ if (ch === '(') parenDepth++
196
+ else if (ch === ')') parenDepth--
197
+ i++
198
+ }
199
+ if (parenDepth !== 0) return null
200
+ const afterParamsIdx = i // position just after the closing `)`
201
+
202
+ // Step 3: find the function body's opening brace AFTER the parameter list.
203
+ // This skips any optional `: ReturnType` annotation between `)` and `{`.
204
+ const openBraceIdx = src.indexOf('{', afterParamsIdx)
205
+ if (openBraceIdx === -1) return null
206
+
207
+ // Step 4: brace-match the function body. Doesn't handle braces in strings
208
+ // or comments, which is fine for the current source (template literals
209
+ // with `${...}` interpolations are balanced).
210
+ let braceDepth = 1
211
+ i = openBraceIdx + 1
212
+ while (i < src.length && braceDepth > 0) {
213
+ const ch = src[i]
214
+ if (ch === '{') braceDepth++
215
+ else if (ch === '}') braceDepth--
216
+ i++
217
+ }
218
+ if (braceDepth !== 0) return null
219
+
220
+ return src.substring(startIdx, i)
221
+ }
222
+
223
+ // ============================================================================
224
+ // Step 4 — build markdown review brief
225
+ // ============================================================================
226
+
227
+ const prompts = await loadPrompts(voiceMode, workingDir)
228
+ const timestamp = new Date().toISOString()
229
+
230
+ const promptSections = prompts
231
+ .map((p) => `### ${p.label}\n\n\`\`\`\n${p.text}\n\`\`\``)
232
+ .join('\n\n')
233
+
234
+ const brief = `# Osborn Prompt Review
235
+
236
+ ## Session summary
237
+
238
+ - **Voice mode**: ${voiceMode}
239
+ - **Provider**: ${provider ?? 'unknown'}
240
+ - **Working dir**: ${workingDir}
241
+ - **User turns (approx)**: ${userTurnCount}
242
+ - **Log path**: ${logPath}
243
+ - **Review generated**: ${timestamp}
244
+
245
+ ## Raw session log
246
+
247
+ The following is the full untruncated terminal output from an \`npm run dev:logged\` session with Osborn, a voice AI research assistant. Pattern guide:
248
+
249
+ - \`📝 User (conv_item, N chars): "..."\` / \`📝 Text (N chars): "..."\` — what the user said (voice) or typed (data channel)
250
+ - \`📥 [pipeline] chat() call #N (chars): "..."\` — pipeline-mode user turn entering the Claude SDK
251
+ - \`💬 Agent (conv_item, N chars): "..."\` — what the agent said back via the voice model
252
+ - \`💬 Claude text (N chars): ...\` / \`📋 Claude result (N chars): ...\` — Claude SDK streaming output / final result
253
+ - \`🧠⚡ [FAST_BRAIN TYPE +Nms]: "..."\` / \`🧠⚡ [pipeline-fb] AFC: ...\` — fast brain classifications / responses
254
+ - \`🔧 Claude: X\` / \`✅ Done: X\` — tool calls
255
+ - \`🎙️ / 🎯\` — voice mode + provider markers
256
+
257
+ \`\`\`
258
+ ${logContent}
259
+ \`\`\`
260
+
261
+ ## Active prompts (at review time)
262
+
263
+ These are the prompts currently in \`agent/src/prompts.ts\` (and the function source from \`agent/src/pipeline-fastbrain.ts\` for pipeline mode) that shaped behavior during this \`${voiceMode}\`-mode session. They are loaded dynamically at review time, so they reflect the CURRENT state of the files — not a snapshot.
264
+
265
+ ${promptSections}
266
+
267
+ ## Review task
268
+
269
+ **This is a design audit, not a rule-checking pass.** Reading each prompt section in isolation and grepping the log for violations is the wrong approach and produces shallow, checklist-style output. The prompts are a system — they interlock, depend on each other, and only make sense in the context of what the user is trying to build. Your job is to understand that system as a whole, share your understanding with the user, and THEN grade the session against it.
270
+
271
+ Do this in phases, in order. Do not skip ahead.
272
+
273
+ ### Phase 1 — Build a mental model of the system
274
+
275
+ Before you touch the session log, understand Osborn as a system end-to-end.
276
+
277
+ 1. **Read every prompt section above in full.** Don't skim. Notice how each prompt assumes things about what the other layers do.
278
+ 2. **Read \`agent/CLAUDE.md\`** via the Read tool. It describes the voice mode architecture (direct / pipeline / realtime), sub-agent orchestration (researcher / reasoner / writer), the fast brain middle tier, and the layered responsibility model. The prompts only make sense in this context.
279
+ 3. **Grep briefly** for how prompts are selected at runtime: look at \`agent/src/claude-llm.ts\` around line 880-920 and \`agent/src/index.ts\` around the fast brain routing. Understand how a turn actually flows through the layers — not just the static prompt text.
280
+
281
+ Then write down, as the first content of your response to the user, an explicit mental model:
282
+ - What does a user turn flow look like end-to-end? (STT → fast brain classification → Claude SDK orchestrator → sub-agents or direct response → TTS back to the user)
283
+ - What's each prompt's intended role? How do they interlock?
284
+ - What would an "ideal" turn look like, if everything worked perfectly?
285
+
286
+ ### Phase 2 — Develop a theory of user intent
287
+
288
+ Based on the session log AND your mental model, form a short theory: **what does THIS user want Osborn to be?** What's their ideal experience? Read between the lines of how they talk to the agent, what they interrupt, what they repeat, what they correct. 2-3 sentences max.
289
+
290
+ ### Phase 3 — STOP and confirm with the user before grading
291
+
292
+ Show the user your mental model and your theory of their intent. Ask: "Does this match how you think about it? Anything I'm missing or getting wrong?" **Wait for a response.** Do not proceed to auditing until the user has confirmed or corrected your understanding.
293
+
294
+ This is the most important phase. Skip it and you'll be grading the session against your assumptions instead of the user's. The whole point of this review is to have a conversation about the design, not to hand-deliver a list of violations.
295
+
296
+ ### Phase 4 — Walk the log through the confirmed model
297
+
298
+ Only now, read the raw session log turn by turn. For each meaningful turn, ask three questions in this exact order:
299
+ - **What should have happened here**, given the system as designed and the user's confirmed intent?
300
+ - **What actually happened?**
301
+ - **Why the gap?**
302
+
303
+ Categorize each gap by root cause:
304
+ - **Expression gap** — the intent is present in a prompt but worded weakly. Fix: tighten wording.
305
+ - **Omission gap** — the intent isn't in any prompt at all. Fix: add new guidance, considering where it belongs.
306
+ - **Conflict gap** — two prompts contradict each other or leave a gap between them. Fix: reconcile.
307
+ - **Architecture gap** — the system design can't actually produce the intended behavior. Fix: flag and discuss. Do NOT patch a prompt to work around an architectural problem.
308
+ - **Model miss** — the prompt says it clearly and the model ignored it. Fix: usually can't be fixed with tighter wording; flag and move on.
309
+
310
+ ### Phase 5 — Propose changes holistically, one at a time
311
+
312
+ Do NOT batch-propose a list of six edits. Each proposed change should be brought to the user individually with:
313
+ 1. The gap you're trying to close and its category (from Phase 4)
314
+ 2. The exact before/after prompt text
315
+ 3. How the change interacts with OTHER prompt sections — does it conflict with anything? Does it create a new gap? Is it addressing root cause or a symptom?
316
+ 4. A pause for user confirmation before you use Edit
317
+
318
+ ### Phase 6 — Apply edits
319
+
320
+ Only after the user has explicitly agreed to a specific change, use the Edit tool on:
321
+ - \`${agentSrcDir}/prompts.ts\`
322
+ - \`${agentSrcDir}/pipeline-fastbrain.ts\`
323
+
324
+ ---
325
+
326
+ **Start with Phase 1 right now.** Don't jump to the log. Don't start summarizing the session. The very first thing in your response should be your mental model of the Osborn system as you understand it after reading the prompts, CLAUDE.md, and the relevant source.
327
+ `
328
+
329
+ // ============================================================================
330
+ // Step 5 — write brief to file, print manual fallback, spawn claude with pointer
331
+ // ============================================================================
332
+
333
+ // Always write the brief to a file. Passing a 50KB brief as argv has several
334
+ // problems:
335
+ // - `claude "query"` in Claude Code CLI is NON-INTERACTIVE print mode, not
336
+ // interactive-with-initial-message. A huge positional arg makes claude
337
+ // print and exit, not open a chat.
338
+ // - Argv size limits (ARG_MAX) are platform-dependent and the `npm → tsx →
339
+ // spawn` chain adds layers that can fail silently on large args.
340
+ // - Shell-quoting a 50KB multi-line markdown string is fragile.
341
+ // File + short pointer is dramatically more reliable.
342
+ const briefTs = new Date().toISOString().replace(/[-:T.]/g, '').slice(0, 14)
343
+ const briefPath = join(logDir, `review-brief-${briefTs}.md`)
344
+ writeFileSync(briefPath, brief, 'utf-8')
345
+ const briefKB = Math.round(Buffer.byteLength(brief, 'utf-8') / 1024)
346
+ console.log(`📄 Brief written: ${briefPath}`)
347
+ console.log(` Size: ${briefKB}KB`)
348
+
349
+ // Dry-run mode: print the brief and exit cleanly without spawning claude.
350
+ // Useful for testing + previewing the brief. Set REVIEW_DRY_RUN=1 to activate.
351
+ if (process.env.REVIEW_DRY_RUN === '1') {
352
+ console.log('\n=== REVIEW_DRY_RUN=1 — brief below (claude NOT spawned) ===\n')
353
+ console.log(brief)
354
+ console.log('\n=== end of brief ===')
355
+ process.exit(0)
356
+ }
357
+
358
+ // Short pointer message for claude's initial argv. Tells claude to open the
359
+ // brief file and follow the review task instructions. Small enough to avoid
360
+ // all the argv / CLI-mode pitfalls above.
361
+ const pointerMessage =
362
+ `I've captured a prompt-review brief for an Osborn voice session at ${briefPath}. ` +
363
+ `Please Read that file in full and follow the "Review task" instructions at the bottom.`
364
+
365
+ // Print manual fallback command — if the auto-spawn below doesn't land
366
+ // cleanly (terminal state weirdness, claude CLI version differences, etc.),
367
+ // the user can copy-paste this into a fresh terminal.
368
+ console.log(``)
369
+ console.log(`─────────────────────────────────────────────────────────────`)
370
+ console.log(`If the auto-launch doesn't work, run these commands manually:`)
371
+ console.log(``)
372
+ console.log(` cd ${agentDir}`)
373
+ console.log(` claude --add-dir ${agentSrcDir}`)
374
+ console.log(``)
375
+ console.log(`Then inside the claude session, type or paste:`)
376
+ console.log(``)
377
+ console.log(` Read ${briefPath} and follow the "Review task" at the bottom.`)
378
+ console.log(`─────────────────────────────────────────────────────────────`)
379
+ console.log(``)
380
+
381
+ // Allow skipping the auto-launch entirely — useful when the user prefers
382
+ // manual control, or when nesting under tsx/npm causes stdio weirdness.
383
+ if (process.env.REVIEW_NO_LAUNCH === '1') {
384
+ console.log(`REVIEW_NO_LAUNCH=1 — not auto-launching claude. Use the manual commands above.`)
385
+ process.exit(0)
386
+ }
387
+
388
+ console.log(`🚀 Auto-launching: claude --add-dir ${agentSrcDir} "<pointer message>"`)
389
+ console.log(` (set REVIEW_NO_LAUNCH=1 to skip auto-launch next time)`)
390
+ console.log(``)
391
+
392
+ try {
393
+ const claude = spawn('claude', ['--add-dir', agentSrcDir, pointerMessage], {
394
+ stdio: 'inherit',
395
+ cwd: agentDir,
396
+ })
397
+
398
+ console.log(` claude PID: ${claude.pid}\n`)
399
+
400
+ claude.on('error', (err: any) => {
401
+ if (err.code === 'ENOENT') {
402
+ console.error(`\n❌ \`claude\` CLI not found on PATH.`)
403
+ console.error(` Install with: npm install -g @anthropic-ai/claude-code`)
404
+ console.error(` Or use the manual commands printed above.`)
405
+ process.exit(1)
406
+ }
407
+ console.error(`\n❌ Failed to spawn claude: ${err.message}`)
408
+ console.error(` Try the manual commands printed above.`)
409
+ process.exit(1)
410
+ })
411
+
412
+ claude.on('exit', (code, signal) => {
413
+ if (code !== 0 || signal) {
414
+ console.log(`\n📝 claude exited (code=${code ?? 'null'} signal=${signal ?? 'null'})`)
415
+ if (code !== 0 && signal === null) {
416
+ console.log(` If the session didn't render properly, try the manual commands above.`)
417
+ }
418
+ }
419
+ process.exit(code ?? 0)
420
+ })
421
+ } catch (err) {
422
+ console.error(`❌ Unexpected error: ${(err as Error).message}`)
423
+ console.error(` Try the manual commands printed above.`)
424
+ process.exit(1)
425
+ }
@@ -1,9 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(ps:*)",
5
- "Bash(osascript:*)",
6
- "Bash(curl -s http://localhost:3000)"
7
- ]
8
- }
9
- }
@@ -1,29 +0,0 @@
1
- # Skill: Markdown to PDF
2
-
3
- Export Markdown documents as formatted PDF files.
4
-
5
- ## When to use
6
- When the user wants to create a PDF from a Markdown file, spec, or research findings.
7
-
8
- ## How to execute
9
-
10
- Option 1 — Using md-to-pdf (best quality):
11
- ```bash
12
- npx --yes md-to-pdf "<MARKDOWN_PATH>"
13
- ```
14
- This creates a PDF alongside the source file with the same name.
15
-
16
- Option 2 — Using pandoc (if available):
17
- ```bash
18
- pandoc "<MARKDOWN_PATH>" -o "<OUTPUT_PATH>.pdf" --pdf-engine=wkhtmltopdf
19
- ```
20
-
21
- Option 3 — Using markdown-pdf:
22
- ```bash
23
- npx --yes markdown-pdf "<MARKDOWN_PATH>" -o "<OUTPUT_PATH>.pdf"
24
- ```
25
-
26
- ## Output
27
- - Save the PDF to the session workspace (e.g., `library/{name}.pdf`)
28
- - Confirm the output path and file size to the user
29
- - If the source is spec.md, name the output `spec-export.pdf`
@@ -1,28 +0,0 @@
1
- # Skill: PDF to Markdown
2
-
3
- Convert PDF documents to readable Markdown text.
4
-
5
- ## When to use
6
- When the user provides a PDF file path and wants to read, search, or work with its contents.
7
-
8
- ## How to execute
9
-
10
- Option 1 — Using the built-in Read tool:
11
- The Read tool can directly read PDF files. Use `pages` parameter for large PDFs (max 20 pages per request).
12
-
13
- Option 2 — Full extraction via CLI (for better formatting or batch processing):
14
- ```bash
15
- npx --yes pdf-parse-cli "<PDF_PATH>"
16
- ```
17
-
18
- Option 3 — Using pdftotext (if available):
19
- ```bash
20
- pdftotext -layout "<PDF_PATH>" -
21
- ```
22
-
23
- ## Output
24
- Save the converted content to the session workspace as `library/{filename}.md` with:
25
- - Document title and source path at the top
26
- - Preserved heading structure where detectable
27
- - Tables converted to Markdown tables where possible
28
- - Page numbers as section markers
@@ -1,90 +0,0 @@
1
- # Skill: Playwright Browser Automation
2
-
3
- Automate web browser interactions — navigate pages, click buttons, fill forms, take screenshots, and extract content.
4
-
5
- ## When to use
6
- - Navigate to a URL and interact with it
7
- - Click buttons or links by their text or role
8
- - Fill form fields and submit data
9
- - Take screenshots of web pages
10
- - Extract text or structured data from pages
11
- - Automate multi-step web workflows (e.g. join a room, test a UI flow)
12
-
13
- ## How to execute
14
-
15
- Uses `@playwright/cli` via npx — no global install needed. Token-efficient: uses element references (e.g. `e15`) instead of pixel coordinates.
16
-
17
- ### First time only — install browser binaries
18
- ```bash
19
- npx playwright install chromium
20
- ```
21
-
22
- ### Step 1 — Open a URL
23
- ```bash
24
- npx @playwright/cli open https://localhost:3000
25
- ```
26
-
27
- ### Step 2 — Get page structure and element references
28
- ```bash
29
- npx @playwright/cli snapshot
30
- ```
31
- Returns an accessibility tree with element IDs like e1, e2, e15. Use these in subsequent commands.
32
-
33
- ### Step 3 — Interact with elements
34
- ```bash
35
- npx @playwright/cli click e15
36
- npx @playwright/cli fill e3 "some text"
37
- npx @playwright/cli press e3 Enter
38
- npx @playwright/cli select e7 "optionValue"
39
- npx @playwright/cli check e9
40
- npx @playwright/cli hover e12
41
- ```
42
-
43
- ### Take a screenshot
44
- ```bash
45
- npx @playwright/cli screenshot --path=/tmp/page.png
46
- ```
47
-
48
- ### Take a screenshot at a specific viewport size (mobile check)
49
- ```bash
50
- npx @playwright/cli screenshot --viewport-size=375,812 --path=/tmp/page-mobile.png
51
- ```
52
- Common mobile sizes: `375,812` (iPhone 14), `390,844` (iPhone 14 Pro), `412,915` (Pixel 7), `768,1024` (iPad).
53
-
54
- ### Close the browser
55
- ```bash
56
- npx @playwright/cli close
57
- ```
58
-
59
- ### Named sessions (persistent state across commands)
60
- ```bash
61
- npx @playwright/cli -s=myflow open https://localhost:3000
62
- npx @playwright/cli -s=myflow snapshot
63
- npx @playwright/cli -s=myflow fill e3 "abc123"
64
- npx @playwright/cli -s=myflow click e5
65
- npx @playwright/cli -s=myflow close
66
- ```
67
-
68
- ## Complete example — join Osborn voice room
69
- ```bash
70
- npx @playwright/cli open http://localhost:3000
71
- npx @playwright/cli snapshot
72
- npx @playwright/cli fill e3 "abc123"
73
- npx @playwright/cli click e4
74
- npx @playwright/cli screenshot --path=/tmp/osborn-joined.png
75
- npx @playwright/cli close
76
- ```
77
-
78
- ## Complete example — check mobile layout
79
- ```bash
80
- npx @playwright/cli open http://localhost:3000
81
- npx @playwright/cli screenshot --viewport-size=375,812 --path=/tmp/mobile-375.png
82
- npx @playwright/cli close
83
- ```
84
-
85
- ## Notes
86
- - Runs headless by default. Add --headed to see the browser window.
87
- - Install browsers first if needed: npx playwright install chromium
88
- - Element IDs are session-scoped — run snapshot again after page changes
89
- - Use `--viewport-size=WIDTH,HEIGHT` to simulate mobile screen sizes (e.g. `375,812` for iPhone 14)
90
- - Use `--storage-state=/tmp/state.json` to save and restore session state (cookies, localStorage) across runs