cursor-buddy 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,10 +7,17 @@ import { LanguageModel, SpeechModel, Tool, TranscriptionModel } from "ai";
7
7
  interface CursorBuddyHandlerConfig {
8
8
  /** AI SDK language model for chat (e.g., openai("gpt-4o")) */
9
9
  model: LanguageModel;
10
- /** AI SDK speech model for TTS (e.g., openai.speech("tts-1")) */
11
- speechModel: SpeechModel;
12
- /** AI SDK transcription model (e.g., openai.transcription("whisper-1")) */
13
- transcriptionModel: TranscriptionModel;
10
+ modelProviderMetadata?: Record<string, any>;
11
+ /**
12
+ * AI SDK speech model for TTS (e.g., openai.speech("tts-1")).
13
+ * Optional when clients use browser-only speech.
14
+ */
15
+ speechModel?: SpeechModel;
16
+ /**
17
+ * AI SDK transcription model (e.g., openai.transcription("whisper-1")).
18
+ * Optional when clients use browser-only transcription.
19
+ */
20
+ transcriptionModel?: TranscriptionModel;
14
21
  /**
15
22
  * System prompt for the AI. Can be a string or a function that receives
16
23
  * the default prompt and returns a modified version.
@@ -34,4 +41,4 @@ interface CursorBuddyHandler {
34
41
  }
35
42
  //#endregion
36
43
  export { CursorBuddyHandlerConfig as n, CursorBuddyHandler as t };
37
- //# sourceMappingURL=types-L97cq8UK.d.mts.map
44
+ //# sourceMappingURL=types-BxBhjZju.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"types-BxBhjZju.d.mts","names":[],"sources":["../src/server/types.ts"],"mappings":";;;;;AAKA;UAAiB,wBAAA;;EAEf,KAAA,EAAO,aAAA;EACP,qBAAA,GAAwB,MAAA;EAMV;;;;EAAd,WAAA,GAAc,WAAA;EAeA;;;;EATd,kBAAA,GAAqB,kBAAA;EANrB;;;;EAYA,MAAA,cAAoB,GAAA;IAAO,aAAA;EAAA;EAG3B;EAAA,KAAA,GAAQ,MAAA,SAAe,IAAA;EAAA;EAGvB,UAAA;AAAA;;AAMF;;UAAiB,kBAAA;EAEI;EAAnB,OAAA,GAAU,OAAA,EAAS,OAAA,KAAY,OAAA,CAAQ,QAAA;EAAR;EAG/B,MAAA,EAAQ,wBAAA;AAAA"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cursor-buddy",
3
- "version": "0.0.2",
3
+ "version": "0.0.4",
4
4
  "description": "AI-powered cursor companion for web apps",
5
5
  "type": "module",
6
6
  "license": "MIT",
package/README.md DELETED
@@ -1,344 +0,0 @@
1
- # cursor-buddy
2
-
3
- AI-powered cursor companion for web apps. Push-to-talk voice assistant that can see your screen and point at things.
4
-
5
- ## Features
6
-
7
- - **Push-to-talk voice input** — Hold a hotkey to speak, release to send
8
- - **Screenshot context** — AI sees your current viewport
9
- - **Voice responses** — Text-to-speech playback
10
- - **Cursor pointing** — AI can point at UI elements it references
11
- - **Voice interruption** — Start talking again to cut off current response
12
- - **Framework agnostic** — Core client works without React, adapter-based architecture
13
- - **Customizable** — CSS variables, custom components, headless mode
14
-
15
- ## Installation
16
-
17
- ```bash
18
- npm install cursor-buddy
19
- # or
20
- pnpm add cursor-buddy
21
- ```
22
-
23
- ## Quick Start
24
-
25
- ### 1. Server Setup
26
-
27
- Create an API route that handles chat, transcription, and TTS.
28
-
29
- ```ts
30
- // lib/cursor-buddy.ts
31
- import { createCursorBuddyHandler } from "cursor-buddy/server"
32
- import { openai } from "@ai-sdk/openai"
33
-
34
- export const cursorBuddy = createCursorBuddyHandler({
35
- model: openai("gpt-4o"),
36
- speechModel: openai.speech("tts-1"),
37
- transcriptionModel: openai.transcription("whisper-1"),
38
- })
39
- ```
40
-
41
- #### Next.js App Router
42
-
43
- ```ts
44
- // app/api/cursor-buddy/[...path]/route.ts
45
- import { toNextJsHandler } from "cursor-buddy/server/next"
46
- import { cursorBuddy } from "@/lib/cursor-buddy"
47
-
48
- export const { GET, POST } = toNextJsHandler(cursorBuddy)
49
- ```
50
-
51
- ### 2. Client Setup
52
-
53
- Add the `<CursorBuddy />` component to your app.
54
-
55
- ```tsx
56
- // app/layout.tsx
57
- import { CursorBuddy } from "cursor-buddy/react"
58
-
59
- export default function RootLayout({ children }) {
60
- return (
61
- <html>
62
- <body>
63
- {children}
64
- <CursorBuddy endpoint="/api/cursor-buddy" />
65
- </body>
66
- </html>
67
- )
68
- }
69
- ```
70
-
71
- That's it! Hold **Ctrl+Alt** to speak, release to send.
72
-
73
- ## Server Configuration
74
-
75
- ```ts
76
- createCursorBuddyHandler({
77
- // Required
78
- model: LanguageModel, // AI SDK chat model
79
- speechModel: SpeechModel, // AI SDK speech model
80
- transcriptionModel: TranscriptionModel, // AI SDK transcription model
81
-
82
- // Optional
83
- system: string | ((ctx) => string), // Custom system prompt
84
- tools: Record<string, Tool>, // AI SDK tools
85
- maxHistory: number, // Max conversation history (default: 10)
86
- })
87
- ```
88
-
89
- ### Custom System Prompt
90
-
91
- ```ts
92
- createCursorBuddyHandler({
93
- model: openai("gpt-4o"),
94
- speechModel: openai.speech("tts-1"),
95
- transcriptionModel: openai.transcription("whisper-1"),
96
-
97
- // Extend the default prompt
98
- system: ({ defaultPrompt }) => `
99
- ${defaultPrompt}
100
-
101
- You are helping users navigate a project management dashboard.
102
- The sidebar contains: Projects, Tasks, Calendar, Settings.
103
- `,
104
- })
105
- ```
106
-
107
- ## Client Configuration
108
-
109
- ```tsx
110
- <CursorBuddy
111
- // Required
112
- endpoint="/api/cursor-buddy"
113
-
114
- // Optional
115
- hotkey="ctrl+alt" // Push-to-talk hotkey (default: "ctrl+alt")
116
- container={element} // Portal container (default: document.body)
117
-
118
- // Custom components
119
- cursor={(props) => <CustomCursor {...props} />}
120
- speechBubble={(props) => <CustomBubble {...props} />}
121
- waveform={(props) => <CustomWaveform {...props} />}
122
-
123
- // Callbacks
124
- onTranscript={(text) => {}} // Called when speech is transcribed
125
- onResponse={(text) => {}} // Called when AI responds
126
- onPoint={(target) => {}} // Called when AI points at element
127
- onStateChange={(state) => {}} // Called on state change
128
- onError={(error) => {}} // Called on error
129
- />
130
- ```
131
-
132
- ## Customization
133
-
134
- ### CSS Variables
135
-
136
- Cursor buddy styles are customizable via CSS variables. Override them in your stylesheet:
137
-
138
- ```css
139
- :root {
140
- /* Cursor colors by state */
141
- --cursor-buddy-color-idle: #3b82f6;
142
- --cursor-buddy-color-listening: #ef4444;
143
- --cursor-buddy-color-processing: #eab308;
144
- --cursor-buddy-color-responding: #22c55e;
145
-
146
- /* Speech bubble */
147
- --cursor-buddy-bubble-bg: #ffffff;
148
- --cursor-buddy-bubble-text: #1f2937;
149
- --cursor-buddy-bubble-radius: 8px;
150
- --cursor-buddy-bubble-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
151
-
152
- /* Waveform */
153
- --cursor-buddy-waveform-color: #ef4444;
154
- }
155
- ```
156
-
157
- ### Custom Components
158
-
159
- Replace default components with your own:
160
-
161
- ```tsx
162
- import { CursorBuddy, type CursorRenderProps } from "cursor-buddy/react"
163
-
164
- function MyCursor({ state, rotation, scale }: CursorRenderProps) {
165
- return (
166
- <div style={{ transform: `rotate(${rotation}rad) scale(${scale})` }}>
167
- {state === "listening" ? "🎤" : "👆"}
168
- </div>
169
- )
170
- }
171
-
172
- <CursorBuddy
173
- endpoint="/api/cursor-buddy"
174
- cursor={(props) => <MyCursor {...props} />}
175
- />
176
- ```
177
-
178
- ## Headless Mode
179
-
180
- For full control, use the provider and hook directly:
181
-
182
- ```tsx
183
- import {
184
- CursorBuddyProvider,
185
- useCursorBuddy
186
- } from "cursor-buddy/react"
187
-
188
- function App() {
189
- return (
190
- <CursorBuddyProvider endpoint="/api/cursor-buddy">
191
- <MyCustomUI />
192
- </CursorBuddyProvider>
193
- )
194
- }
195
-
196
- function MyCustomUI() {
197
- const {
198
- state, // "idle" | "listening" | "processing" | "responding"
199
- transcript, // Latest user speech
200
- response, // Latest AI response
201
- audioLevel, // 0-1, for waveform visualization
202
- isEnabled,
203
- isPointing,
204
- error,
205
-
206
- // Actions
207
- startListening,
208
- stopListening,
209
- setEnabled,
210
- pointAt, // Manually point at coordinates
211
- dismissPointing,
212
- reset,
213
- } = useCursorBuddy()
214
-
215
- return (
216
- <div>
217
- <p>State: {state}</p>
218
- <button
219
- onMouseDown={startListening}
220
- onMouseUp={stopListening}
221
- >
222
- Hold to speak
223
- </button>
224
- </div>
225
- )
226
- }
227
- ```
228
-
229
- ## Framework-Agnostic Usage
230
-
231
- For non-React environments, use the core client directly:
232
-
233
- ```ts
234
- import { CursorBuddyClient } from "cursor-buddy"
235
-
236
- const client = new CursorBuddyClient("/api/cursor-buddy", {
237
- onStateChange: (state) => console.log("State:", state),
238
- onTranscript: (text) => console.log("Transcript:", text),
239
- onResponse: (text) => console.log("Response:", text),
240
- onError: (err) => console.error("Error:", err),
241
- })
242
-
243
- // Subscribe to state changes
244
- client.subscribe(() => {
245
- const snapshot = client.getSnapshot()
246
- console.log(snapshot)
247
- })
248
-
249
- // Trigger voice interaction
250
- client.startListening()
251
- // ... user speaks ...
252
- client.stopListening()
253
- ```
254
-
255
- ## Render Props Types
256
-
257
- ```ts
258
- interface CursorRenderProps {
259
- state: "idle" | "listening" | "processing" | "responding"
260
- isPointing: boolean
261
- rotation: number // Radians, direction of travel
262
- scale: number // 1.0 normal, up to 1.3 during flight
263
- }
264
-
265
- interface SpeechBubbleRenderProps {
266
- text: string
267
- isVisible: boolean
268
- }
269
-
270
- interface WaveformRenderProps {
271
- audioLevel: number // 0-1
272
- isListening: boolean
273
- }
274
- ```
275
-
276
- ## API Reference
277
-
278
- ### Core Exports (`cursor-buddy`)
279
-
280
- | Export | Description |
281
- |--------|-------------|
282
- | `CursorBuddyClient` | Framework-agnostic client class |
283
- | `VoiceState` | Type: `"idle" \| "listening" \| "processing" \| "responding"` |
284
- | `PointingTarget` | Type: `{ x: number, y: number, label: string }` |
285
- | `Point` | Type: `{ x: number, y: number }` |
286
-
287
- ### Server Exports (`cursor-buddy/server`)
288
-
289
- | Export | Description |
290
- |--------|-------------|
291
- | `createCursorBuddyHandler` | Create the main request handler |
292
- | `DEFAULT_SYSTEM_PROMPT` | Default system prompt for reference |
293
- | `CursorBuddyHandlerConfig` | Type for handler configuration |
294
- | `CursorBuddyHandler` | Return type of `createCursorBuddyHandler` |
295
-
296
- ### Server Adapters (`cursor-buddy/server/next`)
297
-
298
- | Export | Description |
299
- |--------|-------------|
300
- | `toNextJsHandler` | Convert handler to Next.js App Router format |
301
-
302
- ### React Exports (`cursor-buddy/react`)
303
-
304
- | Export | Description |
305
- |--------|-------------|
306
- | `CursorBuddy` | Drop-in component with built-in UI |
307
- | `CursorBuddyProvider` | Headless provider for custom UI |
308
- | `useCursorBuddy` | Hook to access state and actions |
309
-
310
- ### Types (`cursor-buddy/react`)
311
-
312
- | Export | Description |
313
- |--------|-------------|
314
- | `CursorBuddyProps` | Props for `<CursorBuddy />` |
315
- | `CursorBuddyProviderProps` | Props for `<CursorBuddyProvider />` |
316
- | `UseCursorBuddyReturn` | Return type of `useCursorBuddy()` |
317
- | `CursorRenderProps` | Props passed to custom cursor |
318
- | `SpeechBubbleRenderProps` | Props passed to custom speech bubble |
319
- | `WaveformRenderProps` | Props passed to custom waveform |
320
-
321
- ## How It Works
322
-
323
- 1. User holds the hotkey (Ctrl+Alt)
324
- 2. Microphone captures audio, waveform shows audio level
325
- 3. User releases hotkey
326
- 4. Screenshot of viewport is captured
327
- 5. Audio is transcribed via AI SDK
328
- 6. Screenshot + capture metadata sent to AI model
329
- 7. AI responds with text, optionally including `[POINT:x,y:label]` tag in screenshot-image coordinates
330
- 8. Response is spoken via TTS
331
- 9. If pointing tag present, coordinates are mapped back to the live viewport and the cursor animates to the target location
332
- 10. **If user presses hotkey again at any point, current response is interrupted**
333
-
334
- ## TODOs
335
-
336
- - [ ] More test coverage for internal services
337
- - [ ] Add `muted` prop for TTS control
338
- - [ ] Faster transcription -> chat -> TTS flow (eg single endpoint instead of 3 calls)
339
- - [ ] Composition pattern for custom components
340
- - [ ] Better hotkey registering code
341
-
342
- ## License
343
-
344
- MIT