osborn 0.8.27 → 0.8.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -554,15 +554,15 @@ async function main() {
554
554
  * If interrupted: gather spoken text + JSONL context. Does NOT send to Claude yet —
555
555
  * that happens when the user's transcribed message arrives via chat().
556
556
  */
557
- async function handleSpeechDone(handle, fullText) {
557
+ async function handleSpeechDone(handle, fullText, fullBlockText) {
558
558
  if (!handle.interrupted) {
559
559
  lastInterruption = null;
560
560
  return;
561
561
  }
562
- // fullText is what was being spoken when interrupted (passed from tts_say handler).
563
- // No word-level cutoff for say() only generateReply pipeline has that — but Claude
564
- // knows its own output from JSONL, so the full block is enough context.
565
- console.log(`🔇 Speech interrupted. Was speaking (${fullText.length} chars): "${fullText}"`);
562
+ // fullText is the synchronized (word-accurate) transcript of what was actually spoken
563
+ // before the interruption. fullBlockText is the original full TTS segment text.
564
+ const fullBlockLen = fullBlockText?.length ?? fullText.length;
565
+ console.log(`🔇 Speech interrupted. Heard (${fullText.length} chars): "${fullText}" [full block was ${fullBlockLen} chars]`);
566
566
  // Read last 10 assistant messages from JSONL (Claude's full untruncated output).
567
567
  // SessionMessage.text is pre-joined from all text content blocks.
568
568
  let recentMessages = '';
@@ -1109,10 +1109,62 @@ async function main() {
1109
1109
  if (handle && typeof handle.addDoneCallback === 'function') {
1110
1110
  // SpeechHandle — track it and register interruption callback
1111
1111
  currentSpeechHandle = handle;
1112
+ // Wall-clock timer: capture when audio actually starts playing (first frame)
1113
+ // Used as fallback if LiveKit's playbackPosition is 0 (race condition)
1114
+ let playbackStartedAt = null;
1115
+ const audioOutputRef = currentSession?._activity?.agentSession?.output?.audio;
1116
+ if (audioOutputRef && typeof audioOutputRef.on === 'function') {
1117
+ const onPlaybackStarted = () => {
1118
+ playbackStartedAt = Date.now();
1119
+ console.log(`🔊 [${sayId}] audio first frame out (playbackStarted)`);
1120
+ audioOutputRef.off('playbackStarted', onPlaybackStarted);
1121
+ };
1122
+ audioOutputRef.on('playbackStarted', onPlaybackStarted);
1123
+ }
1112
1124
  handle.addDoneCallback((sh) => {
1113
1125
  if (sh.interrupted) {
1114
1126
  console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
1115
- handleSpeechDone(sh, data.text);
1127
+ const audioOutput = currentSession?._activity?.agentSession?.output?.audio;
1128
+ const sdkTranscript = audioOutput?.lastPlaybackEvent?.synchronizedTranscript;
1129
+ const sdkPlaybackSec = audioOutput?.lastPlaybackEvent?.playbackPosition ?? 0;
1130
+ let spokenText;
1131
+ let method;
1132
+ if (sdkTranscript) {
1133
+ // Best case: LiveKit gave us word-accurate transcript (requires alignedTranscript TTS)
1134
+ spokenText = sdkTranscript;
1135
+ method = 'sdk-transcript';
1136
+ }
1137
+ else if (sdkPlaybackSec > 0) {
1138
+ // Second: LiveKit gave us playback duration — estimate chars from it
1139
+ const CHARS_PER_SEC = 14;
1140
+ const charCount = Math.min(Math.round(sdkPlaybackSec * CHARS_PER_SEC), data.text.length);
1141
+ const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
1142
+ spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
1143
+ method = 'sdk-position';
1144
+ }
1145
+ else if (playbackStartedAt !== null) {
1146
+ // Third: use our wall-clock timer from first audio frame
1147
+ const elapsedSec = (Date.now() - playbackStartedAt) / 1000;
1148
+ const CHARS_PER_SEC = 14;
1149
+ const charCount = Math.min(Math.round(elapsedSec * CHARS_PER_SEC), data.text.length);
1150
+ const slicePoint = data.text.lastIndexOf(' ', charCount) || charCount;
1151
+ spokenText = slicePoint > 0 ? data.text.slice(0, slicePoint) : data.text;
1152
+ method = 'wall-clock';
1153
+ }
1154
+ else {
1155
+ // Fallback: interrupt fired before first frame — pass full block
1156
+ spokenText = data.text;
1157
+ method = 'full-block-fallback';
1158
+ }
1159
+ console.log('🔇 Interruption estimate:', JSON.stringify({
1160
+ method,
1161
+ sdkPlaybackSec,
1162
+ isSynced: !!sdkTranscript,
1163
+ spokenChars: spokenText.length,
1164
+ fullChars: data.text.length,
1165
+ heard: spokenText.slice(0, 80) + (spokenText.length > 80 ? '...' : '')
1166
+ }));
1167
+ handleSpeechDone(sh, spokenText, data.text);
1116
1168
  }
1117
1169
  else {
1118
1170
  console.log(`✅ [${sayId}] session.say DONE`);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "osborn",
3
- "version": "0.8.27",
3
+ "version": "0.8.29",
4
4
  "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
5
  "type": "module",
6
6
  "bin": {
@@ -1,9 +0,0 @@
1
- {
2
- "permissions": {
3
- "allow": [
4
- "Bash(ps:*)",
5
- "Bash(osascript:*)",
6
- "Bash(curl -s http://localhost:3000)"
7
- ]
8
- }
9
- }
@@ -1,114 +0,0 @@
1
- # Skill: Browser Apply — Step-by-Step Workday Application
2
-
3
- Automate Workday job applications interactively, one step at a time. Each step takes a screenshot, confirms what's on screen, fills the current page, and waits before proceeding.
4
-
5
- **This skill uses the Playwright MCP tools** (`mcp__playwright__browser_*`) for direct browser control — no scripts needed.
6
-
7
- ## When to Use
8
- - Any Workday ATS application (`*.wd1.myworkdayjobs.com`)
9
- - Any multi-step JS-heavy job application form
10
- - When you want visible, confirmable progress at each step
11
-
12
- ## Key Principle: Step-by-Step, Not One Big Script
13
-
14
- Do NOT write a monolithic automation script. Instead:
15
- 1. Navigate to the URL
16
- 2. Take a screenshot — confirm what's on screen
17
- 3. Fill only the current step's fields
18
- 4. Take another screenshot — confirm fields filled correctly
19
- 5. Ask the user "Ready for next step?" before clicking Next
20
- 6. Click Next, wait for page load, screenshot again
21
- 7. Repeat for each step
22
-
23
- This approach catches rendering issues, unexpected fields, and errors before they cascade.
24
-
25
- ## Step-by-Step Execution Pattern
26
-
27
- ### Step 0 — Open the browser
28
- Use: `mcp__playwright__browser_navigate` with the applyManually URL
29
-
30
- Then: `mcp__playwright__browser_take_screenshot` — show the user what loaded
31
-
32
- ### Step 1 — Create Account / Sign In
33
- Take a snapshot with `mcp__playwright__browser_snapshot` to see element refs.
34
- Fill fields using `mcp__playwright__browser_fill_form` or individual `mcp__playwright__browser_type` calls.
35
- Screenshot to confirm. Then ask user before clicking Create Account / Sign In.
36
-
37
- ### Step 2 — Start Application
38
- If "Start Your Application" screen appears with Apply Manually button:
39
- Screenshot it. Click "Apply Manually" using `mcp__playwright__browser_click`.
40
- Screenshot after.
41
-
42
- ### Step 3 — My Information
43
- Snapshot → fill each field → screenshot → ask user before clicking Next.
44
-
45
- Fields to fill:
46
- - First Name, Last Name, Phone
47
- - Address, City, State (dropdown), Zip
48
- - Work authorization: Yes
49
- - Sponsorship: No
50
-
51
- ### Step 4 — My Experience
52
- Snapshot → click Add for each job entry → fill title/company/dates/description → save each → screenshot.
53
- Then add education entries.
54
- Ask user before clicking Next.
55
-
56
- ### Step 5 — Application Questions
57
- Snapshot to see all questions. Fill each one. **Always confirm salary expectation with user before filling** — never guess. Screenshot. Ask before Next.
58
-
59
- ### Step 6 — Voluntary Disclosures
60
- Select "I do not wish to answer" / "Prefer not to disclose" for all. Screenshot. Ask before Next.
61
-
62
- ### Step 7 — Self Identify
63
- Fill name and date. Select disability option. Screenshot. Ask before Next.
64
-
65
- ### Step 8 — Review
66
- Screenshot the full review page. Confirm with user before clicking Submit.
67
-
68
- ### Step 9 — Confirm submission
69
- Screenshot the confirmation dialog. Save it.
70
-
71
- ## Candidate Data (Osborn Ojure)
72
-
73
- - Email: osbornojure@gmail.com
74
- - Password: Workday2026!
75
- - First: Osborn, Last: Ojure
76
- - Phone: 3127185561
77
- - Address: 1234 N Michigan Ave, Chicago, IL 60601
78
-
79
- Jobs:
80
- 1. Meta API Consultant at Prehype / Audos — April 2024 to Present
81
- 2. Full Stack Developer, Freelance — January 2016 to Present
82
-
83
- Education:
84
- 1. A.S. Information Systems
85
- 2. B.S. Psychology
86
-
87
- ## Workday data-automation-id Selector Reference
88
-
89
- | Field | Selector |
90
- |---|---|
91
- | Email | `input[type="email"]` |
92
- | Password | `input[type="password"]` |
93
- | First name | `[data-automation-id="legalNameSection_firstName"]` |
94
- | Last name | `[data-automation-id="legalNameSection_lastName"]` |
95
- | Phone | `[data-automation-id="phone-number"]` |
96
- | Address | `[data-automation-id="addressSection_addressLine1"]` |
97
- | City | `[data-automation-id="addressSection_city"]` |
98
- | Zip | `[data-automation-id="addressSection_postalCode"]` |
99
- | Job title | `[data-automation-id="jobTitle"]` |
100
- | Company | `[data-automation-id="company"]` |
101
- | Description | `[data-automation-id="description"]` |
102
- | Next button | `[data-automation-id="bottom-navigation-next-btn"]` |
103
- | Create Account | `[data-automation-id="click_filter"][aria-label="Create Account"]` |
104
-
105
- ## Critical Rules
106
- - headless: false always (Workday renders blank in headless)
107
- - Confirm salary with user before every submission — never auto-fill
108
- - After "You already applied to this job" error — that confirms a previous submission worked
109
- - Use `{ force: true }` on Workday buttons — overlay click filters block normal clicks
110
- - Always wait for networkidle or waitForSelector after navigation before interacting
111
-
112
- ## Playwright Install Location
113
- Run scripts from: `/Users/newupgrade/Desktop/Developer/osborn/frontend`
114
- (playwright is in `node_modules` there)
@@ -1,29 +0,0 @@
1
- # Skill: Markdown to PDF
2
-
3
- Export Markdown documents as formatted PDF files.
4
-
5
- ## When to use
6
- When the user wants to create a PDF from a Markdown file, spec, or research findings.
7
-
8
- ## How to execute
9
-
10
- Option 1 — Using md-to-pdf (best quality):
11
- ```bash
12
- npx --yes md-to-pdf "<MARKDOWN_PATH>"
13
- ```
14
- This creates a PDF alongside the source file with the same name.
15
-
16
- Option 2 — Using pandoc (if available):
17
- ```bash
18
- pandoc "<MARKDOWN_PATH>" -o "<OUTPUT_PATH>.pdf" --pdf-engine=wkhtmltopdf
19
- ```
20
-
21
- Option 3 — Using markdown-pdf:
22
- ```bash
23
- npx --yes markdown-pdf "<MARKDOWN_PATH>" -o "<OUTPUT_PATH>.pdf"
24
- ```
25
-
26
- ## Output
27
- - Save the PDF to the session workspace (e.g., `library/{name}.pdf`)
28
- - Confirm the output path and file size to the user
29
- - If the source is spec.md, name the output `spec-export.pdf`
@@ -1,28 +0,0 @@
1
- # Skill: PDF to Markdown
2
-
3
- Convert PDF documents to readable Markdown text.
4
-
5
- ## When to use
6
- When the user provides a PDF file path and wants to read, search, or work with its contents.
7
-
8
- ## How to execute
9
-
10
- Option 1 — Using the built-in Read tool:
11
- The Read tool can directly read PDF files. Use `pages` parameter for large PDFs (max 20 pages per request).
12
-
13
- Option 2 — Full extraction via CLI (for better formatting or batch processing):
14
- ```bash
15
- npx --yes pdf-parse-cli "<PDF_PATH>"
16
- ```
17
-
18
- Option 3 — Using pdftotext (if available):
19
- ```bash
20
- pdftotext -layout "<PDF_PATH>" -
21
- ```
22
-
23
- ## Output
24
- Save the converted content to the session workspace as `library/{filename}.md` with:
25
- - Document title and source path at the top
26
- - Preserved heading structure where detectable
27
- - Tables converted to Markdown tables where possible
28
- - Page numbers as section markers
@@ -1,90 +0,0 @@
1
- # Skill: Playwright Browser Automation
2
-
3
- Automate web browser interactions — navigate pages, click buttons, fill forms, take screenshots, and extract content.
4
-
5
- ## When to use
6
- - Navigate to a URL and interact with it
7
- - Click buttons or links by their text or role
8
- - Fill form fields and submit data
9
- - Take screenshots of web pages
10
- - Extract text or structured data from pages
11
- - Automate multi-step web workflows (e.g. join a room, test a UI flow)
12
-
13
- ## How to execute
14
-
15
- Uses `@playwright/cli` via npx — no global install needed. Token-efficient: uses element references (e.g. `e15`) instead of pixel coordinates.
16
-
17
- ### First time only — install browser binaries
18
- ```bash
19
- npx playwright install chromium
20
- ```
21
-
22
- ### Step 1 — Open a URL
23
- ```bash
24
- npx @playwright/cli open https://localhost:3000
25
- ```
26
-
27
- ### Step 2 — Get page structure and element references
28
- ```bash
29
- npx @playwright/cli snapshot
30
- ```
31
- Returns an accessibility tree with element IDs like e1, e2, e15. Use these in subsequent commands.
32
-
33
- ### Step 3 — Interact with elements
34
- ```bash
35
- npx @playwright/cli click e15
36
- npx @playwright/cli fill e3 "some text"
37
- npx @playwright/cli press e3 Enter
38
- npx @playwright/cli select e7 "optionValue"
39
- npx @playwright/cli check e9
40
- npx @playwright/cli hover e12
41
- ```
42
-
43
- ### Take a screenshot
44
- ```bash
45
- npx @playwright/cli screenshot --path=/tmp/page.png
46
- ```
47
-
48
- ### Take a screenshot at a specific viewport size (mobile check)
49
- ```bash
50
- npx @playwright/cli screenshot --viewport-size=375,812 --path=/tmp/page-mobile.png
51
- ```
52
- Common mobile sizes: `375,812` (iPhone 14), `390,844` (iPhone 14 Pro), `412,915` (Pixel 7), `768,1024` (iPad).
53
-
54
- ### Close the browser
55
- ```bash
56
- npx @playwright/cli close
57
- ```
58
-
59
- ### Named sessions (persistent state across commands)
60
- ```bash
61
- npx @playwright/cli -s=myflow open https://localhost:3000
62
- npx @playwright/cli -s=myflow snapshot
63
- npx @playwright/cli -s=myflow fill e3 "abc123"
64
- npx @playwright/cli -s=myflow click e5
65
- npx @playwright/cli -s=myflow close
66
- ```
67
-
68
- ## Complete example — join Osborn voice room
69
- ```bash
70
- npx @playwright/cli open http://localhost:3000
71
- npx @playwright/cli snapshot
72
- npx @playwright/cli fill e3 "abc123"
73
- npx @playwright/cli click e4
74
- npx @playwright/cli screenshot --path=/tmp/osborn-joined.png
75
- npx @playwright/cli close
76
- ```
77
-
78
- ## Complete example — check mobile layout
79
- ```bash
80
- npx @playwright/cli open http://localhost:3000
81
- npx @playwright/cli screenshot --viewport-size=375,812 --path=/tmp/mobile-375.png
82
- npx @playwright/cli close
83
- ```
84
-
85
- ## Notes
86
- - Runs headless by default. Add --headed to see the browser window.
87
- - Install browsers first if needed: npx playwright install chromium
88
- - Element IDs are session-scoped — run snapshot again after page changes
89
- - Use `--viewport-size=WIDTH,HEIGHT` to simulate mobile screen sizes (e.g. `375,812` for iPhone 14)
90
- - Use `--storage-state=/tmp/state.json` to save and restore session state (cookies, localStorage) across runs
@@ -1,232 +0,0 @@
1
- # Skill: shadcn/ui Components
2
-
3
- Add and configure shadcn/ui components in a Next.js or React project.
4
-
5
- ## When to use
6
- When the user wants to add UI components (buttons, dialogs, cards, forms, tables, etc.) using shadcn/ui — the copy-paste component library built on Radix UI and Tailwind CSS.
7
-
8
- ## Setup (first time only)
9
-
10
- Initialize shadcn in the project root (where package.json lives):
11
- ```bash
12
- npx shadcn@latest init
13
- ```
14
-
15
- This creates `components.json` and sets up `src/components/ui/`. Answer the prompts to match your project's style preferences.
16
-
17
- ## Add a component
18
-
19
- ```bash
20
- npx shadcn@latest add <component-name>
21
- ```
22
-
23
- ## Add multiple components at once
24
- ```bash
25
- npx shadcn@latest add button card dialog input form
26
- ```
27
-
28
- ## Commonly used components
29
-
30
- | Component | Install name | Description |
31
- |-----------|-------------|-------------|
32
- | Button | `button` | Clickable button with variants |
33
- | Card | `card` | Container with header/content/footer |
34
- | Input | `input` | Text input field |
35
- | Label | `label` | Form label |
36
- | Textarea | `textarea` | Multi-line text input |
37
- | Select | `select` | Dropdown select |
38
- | Checkbox | `checkbox` | Checkbox with label |
39
- | Switch | `switch` | Toggle switch |
40
- | Dialog | `dialog` | Modal dialog |
41
- | Sheet | `sheet` | Slide-in panel (drawer) |
42
- | Popover | `popover` | Floating content panel |
43
- | Tooltip | `tooltip` | Hover tooltip |
44
- | Dropdown Menu | `dropdown-menu` | Contextual dropdown |
45
- | Command | `command` | Command palette / search |
46
- | Badge | `badge` | Small status indicator |
47
- | Avatar | `avatar` | User avatar with fallback |
48
- | Separator | `separator` | Visual divider |
49
- | Table | `table` | Data table |
50
- | Tabs | `tabs` | Tabbed interface |
51
- | Accordion | `accordion` | Collapsible sections |
52
- | Sonner | `sonner` | Toast notifications (preferred over toast) |
53
- | Alert | `alert` | Inline alert message |
54
- | Alert Dialog | `alert-dialog` | Confirmation dialog |
55
- | Progress | `progress` | Progress bar |
56
- | Skeleton | `skeleton` | Loading placeholder |
57
- | Scroll Area | `scroll-area` | Custom scrollbar container |
58
- | Calendar | `calendar` | Date picker calendar |
59
- | Form | `form` | React Hook Form integration |
60
- | Slider | `slider` | Range slider |
61
- | Toggle | `toggle` | Toggle button |
62
- | Navigation Menu | `navigation-menu` | Site navigation |
63
- | Breadcrumb | `breadcrumb` | Page navigation breadcrumbs |
64
- | Collapsible | `collapsible` | Expandable/collapsible section |
65
- | Context Menu | `context-menu` | Right-click context menu |
66
- | Menubar | `menubar` | Application menu bar |
67
- | Resizable | `resizable` | Resizable panel groups |
68
-
69
- ## Import pattern
70
-
71
- ```tsx
72
- import { Button } from "@/components/ui/button"
73
- import { Card, CardContent, CardHeader, CardTitle, CardDescription, CardFooter } from "@/components/ui/card"
74
- import { Input } from "@/components/ui/input"
75
- import { Label } from "@/components/ui/label"
76
- import {
77
- Dialog,
78
- DialogContent,
79
- DialogDescription,
80
- DialogHeader,
81
- DialogTitle,
82
- DialogTrigger,
83
- } from "@/components/ui/dialog"
84
- ```
85
-
86
- ## Example — Button variants
87
-
88
- ```tsx
89
- <Button>Default</Button>
90
- <Button variant="destructive">Delete</Button>
91
- <Button variant="outline">Cancel</Button>
92
- <Button variant="ghost">Ghost</Button>
93
- <Button variant="link">Link</Button>
94
- <Button size="sm">Small</Button>
95
- <Button size="lg">Large</Button>
96
- <Button disabled>Disabled</Button>
97
- ```
98
-
99
- ## Example — Card
100
-
101
- ```tsx
102
- <Card>
103
- <CardHeader>
104
- <CardTitle>Card Title</CardTitle>
105
- <CardDescription>Card description goes here</CardDescription>
106
- </CardHeader>
107
- <CardContent>
108
- <p>Card content here</p>
109
- </CardContent>
110
- <CardFooter>
111
- <Button>Action</Button>
112
- </CardFooter>
113
- </Card>
114
- ```
115
-
116
- ## Example — Form with validation (React Hook Form + Zod)
117
-
118
- ```bash
119
- npx shadcn@latest add form input
120
- npm install zod react-hook-form @hookform/resolvers
121
- ```
122
-
123
- ```tsx
124
- import { useForm } from "react-hook-form"
125
- import { zodResolver } from "@hookform/resolvers/zod"
126
- import * as z from "zod"
127
- import { Form, FormControl, FormField, FormItem, FormLabel, FormMessage } from "@/components/ui/form"
128
- import { Input } from "@/components/ui/input"
129
- import { Button } from "@/components/ui/button"
130
-
131
- const formSchema = z.object({
132
- email: z.string().email("Invalid email address"),
133
- password: z.string().min(8, "Password must be at least 8 characters"),
134
- })
135
-
136
- export function LoginForm() {
137
- const form = useForm<z.infer<typeof formSchema>>({
138
- resolver: zodResolver(formSchema),
139
- defaultValues: { email: "", password: "" },
140
- })
141
-
142
- function onSubmit(values: z.infer<typeof formSchema>) {
143
- console.log(values)
144
- }
145
-
146
- return (
147
- <Form {...form}>
148
- <form onSubmit={form.handleSubmit(onSubmit)} className="space-y-4">
149
- <FormField
150
- control={form.control}
151
- name="email"
152
- render={({ field }) => (
153
- <FormItem>
154
- <FormLabel>Email</FormLabel>
155
- <FormControl>
156
- <Input placeholder="you@example.com" {...field} />
157
- </FormControl>
158
- <FormMessage />
159
- </FormItem>
160
- )}
161
- />
162
- <FormField
163
- control={form.control}
164
- name="password"
165
- render={({ field }) => (
166
- <FormItem>
167
- <FormLabel>Password</FormLabel>
168
- <FormControl>
169
- <Input type="password" {...field} />
170
- </FormControl>
171
- <FormMessage />
172
- </FormItem>
173
- )}
174
- />
175
- <Button type="submit" className="w-full">Sign in</Button>
176
- </form>
177
- </Form>
178
- )
179
- }
180
- ```
181
-
182
- ## Example — Toast notifications (Sonner)
183
-
184
- ```bash
185
- npx shadcn@latest add sonner
186
- ```
187
-
188
- ```tsx
189
- // In your root layout — add the Toaster once
190
- import { Toaster } from "@/components/ui/sonner"
191
- export default function RootLayout({ children }) {
192
- return (
193
- <html>
194
- <body>
195
- {children}
196
- <Toaster />
197
- </body>
198
- </html>
199
- )
200
- }
201
-
202
- // Then anywhere in your app
203
- import { toast } from "sonner"
204
-
205
- toast("Event created")
206
- toast.success("Profile saved")
207
- toast.error("Something went wrong")
208
- toast.promise(saveData(), {
209
- loading: "Saving...",
210
- success: "Saved!",
211
- error: "Failed to save",
212
- })
213
- ```
214
-
215
- ## The cn() utility
216
-
217
- shadcn uses `clsx` + `tailwind-merge` via a `cn()` helper for conditional classes:
218
-
219
- ```tsx
220
- import { cn } from "@/lib/utils"
221
-
222
- <div className={cn("base-class", isActive && "active-class", className)} />
223
- ```
224
-
225
- ## Notes
226
- - Components are **copied into your project** (not a node_modules dependency) — edit them freely
227
- - Requires **Tailwind CSS** configured in the project
228
- - Uses **Radix UI** primitives for accessibility
229
- - Components land in `src/components/ui/` by default
230
- - Run `npx shadcn@latest diff` to see if upstream components have changed
231
- - Check `components.json` for path and style config
232
- - Use Sonner (`sonner`) instead of the legacy `toast` component for notifications
Binary file
@@ -1,24 +0,0 @@
1
- # Skill: YouTube Transcript
2
-
3
- Fetch and save transcripts from YouTube videos.
4
-
5
- ## When to use
6
- When the user asks to get a transcript, subtitles, captions, or summary from a YouTube video URL.
7
-
8
- ## How to execute
9
-
10
- yt-dlp is installed on this system. Use this exact command:
11
-
12
- ```bash
13
- yt-dlp --skip-download --write-auto-sub --sub-lang en --convert-subs srt -o "/tmp/yt-%(id)s" "<VIDEO_URL>"
14
- ```
15
-
16
- This downloads auto-generated English subtitles as an SRT file to /tmp/yt-{video-id}.en.srt
17
-
18
- Then read the SRT file and strip the timing markers to get clean transcript text.
19
-
20
- ## Output
21
- Save the cleaned transcript to the session workspace as `library/youtube-{video-id}-transcript.md` with:
22
- - Video title and URL at the top
23
- - Cleaned transcript text (strip SRT timing markers and duplicate lines)
24
- - Key timestamps preserved as section markers if meaningful breaks exist
@@ -1,50 +0,0 @@
1
- const { chromium } = require('playwright');
2
-
3
- (async () => {
4
- const browser = await chromium.launch({ headless: false });
5
- const page = await browser.newPage();
6
-
7
- await page.goto('https://caresource.wd1.myworkdayjobs.com/CareSource/job/Remote/AI-Developer_R10487/apply', { waitUntil: 'networkidle' });
8
- await page.waitForTimeout(2000);
9
-
10
- // Click Apply Manually
11
- console.log('Clicking Apply Manually...');
12
- await page.click('text=Apply Manually');
13
- await page.waitForTimeout(3000);
14
-
15
- // Fill in email
16
- console.log('Filling email field...');
17
- const emailInput = await page.$('input[type="email"]');
18
- if (emailInput) {
19
- await emailInput.fill('osbornojure@gmail.com');
20
- console.log('Email filled: osbornojure@gmail.com');
21
- }
22
-
23
- // Fill in password
24
- console.log('Filling password fields...');
25
- const passwordInputs = await page.$$('input[type="password"]');
26
- if (passwordInputs.length >= 2) {
27
- await passwordInputs[0].fill('workday2026!');
28
- await passwordInputs[1].fill('workday2026!');
29
- console.log('Passwords filled');
30
- }
31
-
32
- await page.waitForTimeout(1000);
33
-
34
- // Click Create Account
35
- const createBtn = await page.$('text=Create Account');
36
- if (createBtn) {
37
- console.log('Clicking Create Account...');
38
- await createBtn.click();
39
- await page.waitForTimeout(3000);
40
- }
41
-
42
- // Take screenshot
43
- await page.screenshot({ path: '/tmp/caresource-step1.png' });
44
- console.log('Screenshot saved to /tmp/caresource-step1.png');
45
-
46
- // Get current page text
47
- const text = await page.evaluate(() => document.body.innerText);
48
- console.log('\n--- PAGE CONTENT ---');
49
- console.log(text);
50
- })();
@@ -1,34 +0,0 @@
1
- import { chromium } from 'playwright';
2
-
3
- (async () => {
4
- const browser = await chromium.launch({ headless: false });
5
- const page = await browser.newPage();
6
-
7
- console.log('🌐 Navigating to CareSource...');
8
- await page.goto('https://caresource.wd1.myworkdayjobs.com/CareSource/job/Remote/AI-Developer_R10487/apply', { waitUntil: 'networkidle' });
9
- await page.waitForTimeout(2000);
10
-
11
- console.log('📋 Clicking Apply Manually...');
12
- await page.click('text=Apply Manually');
13
- await page.waitForTimeout(3000);
14
-
15
- console.log('📧 Filling email...');
16
- const emailInput = await page.$('input[type="email"]');
17
- if (emailInput) {
18
- await emailInput.fill('osbornojure@gmail.com');
19
- }
20
-
21
- console.log('🔐 Filling passwords...');
22
- const passwordInputs = await page.$$('input[type="password"]');
23
- if (passwordInputs.length >= 2) {
24
- await passwordInputs[0].fill('workday2026!');
25
- await passwordInputs[1].fill('workday2026!');
26
- }
27
-
28
- await page.waitForTimeout(1000);
29
- await page.screenshot({ path: '/tmp/caresource-step1.png' });
30
-
31
- console.log('✅ Account form filled. Screenshot saved.');
32
- console.log('\n📝 Browser is now open. You can review before submitting.');
33
- console.log('Keep this window open and ready to proceed.\n');
34
- })();
@@ -1,92 +0,0 @@
1
- /**
2
- * Conversation Brain - Gemini 2.5 Pro powered conversation manager
3
- *
4
- * This is the "smart brain" that:
5
- * 1. Keeps conversation alive with relevant questions
6
- * 2. Builds context until we understand what user wants
7
- * 3. Dispatches background research agents
8
- * 4. Receives progress updates and decides when to execute
9
- * 5. Handles direct commands immediately
10
- */
11
- export interface ConversationMessage {
12
- role: 'user' | 'assistant' | 'system';
13
- content: string;
14
- timestamp: Date;
15
- }
16
- export interface ResearchTask {
17
- id: string;
18
- query: string;
19
- status: 'pending' | 'running' | 'completed' | 'failed';
20
- result?: string;
21
- startedAt?: Date;
22
- completedAt?: Date;
23
- }
24
- export interface BrainDecision {
25
- action: 'speak' | 'research' | 'execute' | 'clarify' | 'direct_command';
26
- speech?: string;
27
- researchQueries?: string[];
28
- executeTask?: string;
29
- directCommand?: string;
30
- reasoning?: string;
31
- }
32
- export interface BrainState {
33
- conversationHistory: ConversationMessage[];
34
- userGoal: string | null;
35
- userGoalConfidence: number;
36
- pendingResearch: ResearchTask[];
37
- completedResearch: ResearchTask[];
38
- readyToExecute: boolean;
39
- executionPlan: string | null;
40
- }
41
- export declare class ConversationBrain {
42
- private llm;
43
- private state;
44
- private workingDir;
45
- private onSpeak;
46
- private onStateChange;
47
- constructor(config: {
48
- workingDir: string;
49
- onSpeak: (text: string) => Promise<void>;
50
- onStateChange: (state: string) => Promise<void>;
51
- });
52
- /**
53
- * Process user input and decide what to do
54
- */
55
- processUserInput(input: string): Promise<BrainDecision>;
56
- /**
57
- * Receive research results from background agents
58
- */
59
- receiveResearchResult(taskId: string, result: string, success: boolean): void;
60
- /**
61
- * Check if we should provide a status update
62
- */
63
- shouldProvideUpdate(): boolean;
64
- /**
65
- * Generate a status update based on completed research
66
- */
67
- generateStatusUpdate(): Promise<string | null>;
68
- /**
69
- * Get current state for debugging/display
70
- */
71
- getState(): BrainState;
72
- /**
73
- * Reset conversation state
74
- */
75
- reset(): void;
76
- private buildAnalysisPrompt;
77
- private parseDecision;
78
- private updateState;
79
- /**
80
- * Create research tasks for background agents
81
- */
82
- getPendingResearchTasks(): ResearchTask[];
83
- /**
84
- * Mark a research task as running
85
- */
86
- markResearchRunning(taskId: string): void;
87
- }
88
- export declare function createConversationBrain(config: {
89
- workingDir: string;
90
- onSpeak: (text: string) => Promise<void>;
91
- onStateChange: (state: string) => Promise<void>;
92
- }): ConversationBrain;
@@ -1,360 +0,0 @@
1
- /**
2
- * Conversation Brain - Gemini 2.5 Pro powered conversation manager
3
- *
4
- * This is the "smart brain" that:
5
- * 1. Keeps conversation alive with relevant questions
6
- * 2. Builds context until we understand what user wants
7
- * 3. Dispatches background research agents
8
- * 4. Receives progress updates and decides when to execute
9
- * 5. Handles direct commands immediately
10
- */
11
- import { llm } from '@livekit/agents';
12
- import * as google from '@livekit/agents-plugin-google';
13
- // ============================================================
14
- // Conversation Brain Class
15
- // ============================================================
16
- export class ConversationBrain {
17
- llm;
18
- state;
19
- workingDir;
20
- onSpeak;
21
- onStateChange;
22
- constructor(config) {
23
- // Use Gemini 2.0 Flash for brain decisions - it's faster and less likely to conflict
24
- // with the Gemini Realtime voice session which uses a different model
25
- this.llm = new google.LLM({
26
- model: 'gemini-2.0-flash',
27
- // Set lower temperature for more consistent decisions
28
- temperature: 0.3,
29
- });
30
- // Add error handler to prevent unhandled rejection crashes
31
- this.llm.on('error', (err) => {
32
- const errorMsg = err.error?.message || String(err);
33
- // Only log non-abort errors
34
- if (!errorMsg.includes('aborted') && !errorMsg.includes('AbortError')) {
35
- console.error('🧠 [Brain LLM] Error:', errorMsg);
36
- }
37
- });
38
- this.workingDir = config.workingDir;
39
- this.onSpeak = config.onSpeak;
40
- this.onStateChange = config.onStateChange;
41
- this.state = {
42
- conversationHistory: [],
43
- userGoal: null,
44
- userGoalConfidence: 0,
45
- pendingResearch: [],
46
- completedResearch: [],
47
- readyToExecute: false,
48
- executionPlan: null,
49
- };
50
- }
51
- /**
52
- * Process user input and decide what to do
53
- */
54
- async processUserInput(input) {
55
- // Add to conversation history
56
- this.state.conversationHistory.push({
57
- role: 'user',
58
- content: input,
59
- timestamp: new Date(),
60
- });
61
- // Build the analysis prompt
62
- const prompt = this.buildAnalysisPrompt(input);
63
- let stream = null;
64
- let streamError = null;
65
- try {
66
- // Call Gemini 2.5 Pro for decision
67
- const chatCtx = new llm.ChatContext();
68
- chatCtx.addMessage({ role: 'user', content: prompt });
69
- let response = '';
70
- stream = this.llm.chat({ chatCtx });
71
- // Create a promise that wraps the stream iteration with proper error handling
72
- const collectResponse = async () => {
73
- let result = '';
74
- try {
75
- for await (const chunk of stream) {
76
- if (chunk.delta?.content) {
77
- result += chunk.delta.content;
78
- }
79
- }
80
- }
81
- catch (e) {
82
- streamError = e;
83
- throw e;
84
- }
85
- return result;
86
- };
87
- // Race the stream against a timeout to prevent hanging
88
- const timeoutPromise = new Promise((_, reject) => {
89
- setTimeout(() => reject(new Error('Brain timeout after 30s')), 30000);
90
- });
91
- response = await Promise.race([collectResponse(), timeoutPromise]);
92
- // Parse the decision
93
- const decision = this.parseDecision(response);
94
- // Update state based on decision
95
- this.updateState(decision);
96
- // Add assistant response to history
97
- if (decision.speech) {
98
- this.state.conversationHistory.push({
99
- role: 'assistant',
100
- content: decision.speech,
101
- timestamp: new Date(),
102
- });
103
- }
104
- return decision;
105
- }
106
- catch (err) {
107
- const errorMsg = err.message || String(err);
108
- // Only log if not an abort (which is expected when user interrupts)
109
- if (!errorMsg.includes('aborted')) {
110
- console.error('🧠 [Brain] Stream error:', errorMsg);
111
- }
112
- // Close stream if it exists to prevent further errors
113
- if (stream) {
114
- try {
115
- stream.close();
116
- }
117
- catch { }
118
- }
119
- return {
120
- action: 'speak',
121
- speech: "I'm having trouble processing that. Could you try again?",
122
- };
123
- }
124
- }
125
- /**
126
- * Receive research results from background agents
127
- */
128
- receiveResearchResult(taskId, result, success) {
129
- const task = this.state.pendingResearch.find(t => t.id === taskId);
130
- if (task) {
131
- task.status = success ? 'completed' : 'failed';
132
- task.result = result;
133
- task.completedAt = new Date();
134
- // Move to completed
135
- this.state.pendingResearch = this.state.pendingResearch.filter(t => t.id !== taskId);
136
- this.state.completedResearch.push(task);
137
- console.log(`🧠 Research completed: ${taskId.substring(0, 8)}... (${this.state.completedResearch.length} done)`);
138
- }
139
- }
140
- /**
141
- * Check if we should provide a status update
142
- */
143
- shouldProvideUpdate() {
144
- // Provide update if research just completed
145
- return this.state.completedResearch.length > 0 &&
146
- this.state.pendingResearch.length === 0 &&
147
- !this.state.readyToExecute;
148
- }
149
- /**
150
- * Generate a status update based on completed research
151
- */
152
- async generateStatusUpdate() {
153
- if (this.state.completedResearch.length === 0)
154
- return null;
155
- const researchSummary = this.state.completedResearch
156
- .map(r => `- ${r.query}: ${r.result?.substring(0, 200) || 'No result'}`)
157
- .join('\n');
158
- const prompt = `Based on completed research, provide a brief conversational status update.
159
-
160
- RESEARCH RESULTS:
161
- ${researchSummary}
162
-
163
- USER GOAL (so far): ${this.state.userGoal || 'Still understanding...'}
164
-
165
- Generate a 1-2 sentence update that:
166
- 1. Summarizes what you learned
167
- 2. Either asks a clarifying question OR proposes next steps
168
- 3. Sounds natural and conversational
169
-
170
- Just the update text, no JSON.`;
171
- let stream = null;
172
- try {
173
- const chatCtx = new llm.ChatContext();
174
- chatCtx.addMessage({ role: 'user', content: prompt });
175
- let response = '';
176
- stream = this.llm.chat({ chatCtx });
177
- for await (const chunk of stream) {
178
- if (chunk.delta?.content) {
179
- response += chunk.delta.content;
180
- }
181
- }
182
- return response.trim();
183
- }
184
- catch (err) {
185
- console.error('🧠 [Brain] Status update error:', err.message || err);
186
- if (stream) {
187
- try {
188
- stream.close();
189
- }
190
- catch { }
191
- }
192
- return null;
193
- }
194
- }
195
- /**
196
- * Get current state for debugging/display
197
- */
198
- getState() {
199
- return { ...this.state };
200
- }
201
- /**
202
- * Reset conversation state
203
- */
204
- reset() {
205
- this.state = {
206
- conversationHistory: [],
207
- userGoal: null,
208
- userGoalConfidence: 0,
209
- pendingResearch: [],
210
- completedResearch: [],
211
- readyToExecute: false,
212
- executionPlan: null,
213
- };
214
- }
215
- // ============================================================
216
- // Private Methods
217
- // ============================================================
218
- buildAnalysisPrompt(userInput) {
219
- const recentHistory = this.state.conversationHistory.slice(-6)
220
- .map(m => `${m.role.toUpperCase()}: ${m.content}`)
221
- .join('\n');
222
- const researchContext = this.state.completedResearch.length > 0
223
- ? `\nCOMPLETED RESEARCH:\n${this.state.completedResearch.map(r => `- ${r.query}: ${r.result?.substring(0, 300)}`).join('\n')}`
224
- : '';
225
- const pendingContext = this.state.pendingResearch.length > 0
226
- ? `\nPENDING RESEARCH: ${this.state.pendingResearch.map(r => r.query).join(', ')}`
227
- : '';
228
- return `You are the brain of a voice AI coding assistant. Analyze this input and decide what to do.
229
-
230
- WORKING DIRECTORY: ${this.workingDir}
231
-
232
- CAPABILITIES:
233
- - Full internet access (web search, fetch URLs, API calls)
234
- - Read/write files in the working directory
235
- - Run shell commands (npm, git, etc.)
236
- - Search and analyze codebases
237
-
238
- CONVERSATION HISTORY:
239
- ${recentHistory}
240
-
241
- CURRENT USER INPUT: "${userInput}"
242
-
243
- CURRENT UNDERSTANDING:
244
- - User Goal: ${this.state.userGoal || 'Unknown - still gathering context'}
245
- - Confidence: ${Math.round(this.state.userGoalConfidence * 100)}%
246
- - Ready to Execute: ${this.state.readyToExecute}
247
- ${researchContext}
248
- ${pendingContext}
249
-
250
- DECIDE WHAT TO DO:
251
-
252
- 1. DIRECT_COMMAND - If user gives a simple, clear command:
253
- - "read file X" → direct_command
254
- - "run npm test" → direct_command
255
- - "show me the package.json" → direct_command
256
- - "search the web for X" → direct_command
257
- - "look up X online" → direct_command
258
-
259
- 2. CLARIFY - If request is ambiguous, ask a specific question to understand better
260
-
261
- 3. RESEARCH - If we need more info, start background research (2-3 queries max)
262
- - Search codebase, read docs, explore files
263
- - Search the web for information
264
- - Keep conversation going while research runs
265
-
266
- 4. EXECUTE - ONLY if we have HIGH confidence (>80%) about what user wants
267
- - Must have clear plan
268
- - Only ONE execution task at a time
269
-
270
- 5. SPEAK - Just respond conversationally (greetings, status, etc.)
271
-
272
- IMPORTANT: Keep speech SHORT and conversational. No markdown formatting.
273
-
274
- Respond in JSON:
275
- {
276
- "action": "speak" | "research" | "execute" | "clarify" | "direct_command",
277
- "speech": "What to say to user RIGHT NOW (keep it short, conversational, NO markdown)",
278
- "researchQueries": ["query1", "query2"], // Only if action=research
279
- "executeTask": "detailed task description", // Only if action=execute
280
- "directCommand": "simple command to run", // Only if action=direct_command
281
- "updatedGoal": "What we think user wants now",
282
- "goalConfidence": 0.0-1.0,
283
- "reasoning": "Brief explanation"
284
- }`;
285
- }
286
- parseDecision(response) {
287
- try {
288
- // Extract JSON from response
289
- const jsonMatch = response.match(/\{[\s\S]*\}/);
290
- if (!jsonMatch) {
291
- return {
292
- action: 'speak',
293
- speech: response.substring(0, 200),
294
- };
295
- }
296
- const parsed = JSON.parse(jsonMatch[0]);
297
- // Update goal tracking
298
- if (parsed.updatedGoal) {
299
- this.state.userGoal = parsed.updatedGoal;
300
- }
301
- if (typeof parsed.goalConfidence === 'number') {
302
- this.state.userGoalConfidence = parsed.goalConfidence;
303
- }
304
- return {
305
- action: parsed.action || 'speak',
306
- speech: parsed.speech,
307
- researchQueries: parsed.researchQueries,
308
- executeTask: parsed.executeTask,
309
- directCommand: parsed.directCommand,
310
- reasoning: parsed.reasoning,
311
- };
312
- }
313
- catch (err) {
314
- console.error('Failed to parse brain decision:', err);
315
- return {
316
- action: 'speak',
317
- speech: "Let me think about that...",
318
- };
319
- }
320
- }
321
- updateState(decision) {
322
- // Track research tasks
323
- if (decision.action === 'research' && decision.researchQueries) {
324
- for (const query of decision.researchQueries) {
325
- this.state.pendingResearch.push({
326
- id: `research-${Date.now()}-${Math.random().toString(36).substring(2, 6)}`,
327
- query,
328
- status: 'pending',
329
- });
330
- }
331
- }
332
- // Mark ready to execute
333
- if (decision.action === 'execute') {
334
- this.state.readyToExecute = true;
335
- this.state.executionPlan = decision.executeTask || null;
336
- }
337
- }
338
- /**
339
- * Create research tasks for background agents
340
- */
341
- getPendingResearchTasks() {
342
- return this.state.pendingResearch.filter(t => t.status === 'pending');
343
- }
344
- /**
345
- * Mark a research task as running
346
- */
347
- markResearchRunning(taskId) {
348
- const task = this.state.pendingResearch.find(t => t.id === taskId);
349
- if (task) {
350
- task.status = 'running';
351
- task.startedAt = new Date();
352
- }
353
- }
354
- }
355
- // ============================================================
356
- // Factory function
357
- // ============================================================
358
- export function createConversationBrain(config) {
359
- return new ConversationBrain(config);
360
- }
@@ -1,15 +0,0 @@
1
- interface AcknowledgmentResult {
2
- acknowledgment: string;
3
- isCodingTask: boolean;
4
- intent: 'code' | 'chat' | 'question' | 'permission';
5
- }
6
- /**
7
- * Fast LLM for immediate acknowledgment and intent classification
8
- * Target: < 200ms response time
9
- */
10
- export declare function getAcknowledgment(userInput: string): Promise<AcknowledgmentResult>;
11
- /**
12
- * Summarize Claude's response for voice output
13
- */
14
- export declare function summarizeForVoice(claudeResponse: string): Promise<string>;
15
- export {};
package/dist/fast-llm.js DELETED
@@ -1,81 +0,0 @@
1
- import Groq from 'groq-sdk';
2
- const groq = new Groq({
3
- apiKey: process.env.GROQ_API_KEY,
4
- });
5
- /**
6
- * Fast LLM for immediate acknowledgment and intent classification
7
- * Target: < 200ms response time
8
- */
9
- export async function getAcknowledgment(userInput) {
10
- const response = await groq.chat.completions.create({
11
- model: 'llama-3.3-70b-versatile', // Fast model
12
- messages: [
13
- {
14
- role: 'system',
15
- content: `You are Osborn, a voice-enabled coding assistant. Classify user requests:
16
-
17
- CODING TASKS (isCodingTask: true) - Anything involving:
18
- - Files: read, write, create, edit, list, find, search
19
- - Directories: what directory, current folder, list files, project structure
20
- - Code: fix bugs, refactor, explain code, run tests
21
- - Terminal: run commands, install packages, git operations
22
- - Project: what's in this project, show me files, analyze codebase
23
-
24
- NOT CODING (isCodingTask: false):
25
- - General chat: hi, how are you, tell me a joke
26
- - Off-topic: weather, news, recipes
27
- - Permission responses: yes, no, approve, deny
28
-
29
- Respond in JSON:
30
- {
31
- "acknowledgment": "brief friendly response",
32
- "isCodingTask": true/false,
33
- "intent": "code|chat|question|permission"
34
- }
35
-
36
- Examples:
37
- - "What directory are we in?" → {"acknowledgment": "Let me check", "isCodingTask": true, "intent": "code"}
38
- - "What files are in this project?" → {"acknowledgment": "I'll list them", "isCodingTask": true, "intent": "code"}
39
- - "Read package.json" → {"acknowledgment": "On it", "isCodingTask": true, "intent": "code"}
40
- - "Fix the bug" → {"acknowledgment": "Let me look", "isCodingTask": true, "intent": "code"}
41
- - "Hello" → {"acknowledgment": "Hi there!", "isCodingTask": false, "intent": "chat"}
42
- - "Yes, do it" → {"acknowledgment": "Got it", "isCodingTask": false, "intent": "permission"}`,
43
- },
44
- {
45
- role: 'user',
46
- content: userInput,
47
- },
48
- ],
49
- temperature: 0.3,
50
- max_tokens: 150,
51
- response_format: { type: 'json_object' },
52
- });
53
- const content = response.choices[0]?.message?.content || '{}';
54
- return JSON.parse(content);
55
- }
56
- /**
57
- * Summarize Claude's response for voice output
58
- */
59
- export async function summarizeForVoice(claudeResponse) {
60
- if (claudeResponse.length < 200) {
61
- return claudeResponse;
62
- }
63
- const response = await groq.chat.completions.create({
64
- model: 'llama-3.3-70b-versatile',
65
- messages: [
66
- {
67
- role: 'system',
68
- content: `Summarize this coding assistant response for voice output.
69
- Keep it under 2 sentences. Focus on what was done and the result.
70
- Don't include code - just describe what happened.`,
71
- },
72
- {
73
- role: 'user',
74
- content: claudeResponse,
75
- },
76
- ],
77
- temperature: 0.3,
78
- max_tokens: 100,
79
- });
80
- return response.choices[0]?.message?.content || claudeResponse;
81
- }