cursor-buddy 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,11 +34,7 @@ type VoiceEvent = {
34
34
  } | {
35
35
  type: "HOTKEY_RELEASED";
36
36
  } | {
37
- type: "TRANSCRIPTION_COMPLETE";
38
- transcript: string;
39
- } | {
40
- type: "AI_RESPONSE_COMPLETE";
41
- response: string;
37
+ type: "RESPONSE_STARTED";
42
38
  } | {
43
39
  type: "TTS_COMPLETE";
44
40
  } | {
@@ -90,6 +86,55 @@ interface AnnotatedScreenshotResult extends ScreenshotResult {
90
86
  /** Text description of markers for AI context */
91
87
  markerContext: string;
92
88
  }
89
+ /**
90
+ * Selects where media processing happens.
91
+ */
92
+ type CursorBuddyMediaMode = "auto" | "browser" | "server";
93
+ /**
94
+ * Controls how user speech is transcribed before it is sent to the chat model.
95
+ */
96
+ interface CursorBuddyTranscriptionConfig {
97
+ /**
98
+ * Selects where transcription happens.
99
+ *
100
+ * - "auto": Try browser speech recognition first, then fall back to server
101
+ * transcription if browser recognition is unavailable, fails, or does not
102
+ * produce a final transcript.
103
+ * - "browser": Require browser speech recognition. If it is unavailable or
104
+ * fails, the turn errors and no server fallback is attempted.
105
+ * - "server": Skip browser speech recognition and always use server
106
+ * transcription.
107
+ *
108
+ * @default "auto"
109
+ */
110
+ mode?: CursorBuddyMediaMode;
111
+ }
112
+ /**
113
+ * Controls how assistant speech is synthesized before it is played back.
114
+ */
115
+ interface CursorBuddySpeechConfig {
116
+ /**
117
+ * Selects where speech synthesis happens.
118
+ *
119
+ * - "auto": Try browser speech synthesis first, then fall back to server
120
+ * synthesis if browser speech is unavailable or fails.
121
+ * - "browser": Require browser speech synthesis. If it is unavailable or
122
+ * fails, the turn errors and no server fallback is attempted.
123
+ * - "server": Skip browser speech synthesis and always use server TTS.
124
+ *
125
+ * @default "server"
126
+ */
127
+ mode?: CursorBuddyMediaMode;
128
+ /**
129
+ * Whether speech may start before the full chat response is available.
130
+ *
131
+ * When enabled, completed sentence segments are spoken as soon as they are
132
+ * ready. When disabled, speech waits for the full chat response first.
133
+ *
134
+ * @default false
135
+ */
136
+ allowStreaming?: boolean;
137
+ }
93
138
  /**
94
139
  * Public contract for voice capture used by the core client.
95
140
  */
@@ -106,6 +151,24 @@ interface AudioPlaybackPort {
106
151
  play(blob: Blob, signal?: AbortSignal): Promise<void>;
107
152
  stop(): void;
108
153
  }
154
+ /**
155
+ * Public contract for browser-side live transcription.
156
+ */
157
+ interface LiveTranscriptionPort {
158
+ isAvailable(): boolean;
159
+ start(): Promise<void>;
160
+ stop(): Promise<string>;
161
+ onPartial(callback: (text: string) => void): void;
162
+ dispose(): void;
163
+ }
164
+ /**
165
+ * Public contract for browser-side speech synthesis.
166
+ */
167
+ interface BrowserSpeechPort {
168
+ isAvailable(): boolean;
169
+ speak(text: string, signal?: AbortSignal): Promise<void>;
170
+ stop(): void;
171
+ }
109
172
  /**
110
173
  * Public contract for screenshot capture used by the core client.
111
174
  */
@@ -129,6 +192,8 @@ interface PointerControllerPort {
129
192
  interface CursorBuddyServices {
130
193
  voiceCapture?: VoiceCapturePort;
131
194
  audioPlayback?: AudioPlaybackPort;
195
+ liveTranscription?: LiveTranscriptionPort;
196
+ browserSpeech?: BrowserSpeechPort;
132
197
  screenCapture?: ScreenCapturePort;
133
198
  pointerController?: PointerControllerPort;
134
199
  }
@@ -169,6 +234,19 @@ interface WaveformRenderProps {
169
234
  * Configuration options for CursorBuddyClient
170
235
  */
171
236
  interface CursorBuddyClientOptions {
237
+ /**
238
+ * Transcription configuration.
239
+ *
240
+ * If omitted, Cursor Buddy uses `{ mode: "auto" }`.
241
+ */
242
+ transcription?: CursorBuddyTranscriptionConfig;
243
+ /**
244
+ * Speech configuration.
245
+ *
246
+ * If omitted, Cursor Buddy uses
247
+ * `{ mode: "server", allowStreaming: false }`.
248
+ */
249
+ speech?: CursorBuddySpeechConfig;
172
250
  /** Callback when transcript is ready */
173
251
  onTranscript?: (text: string) => void;
174
252
  /** Callback when AI responds */
@@ -186,6 +264,11 @@ interface CursorBuddyClientOptions {
186
264
  interface CursorBuddySnapshot {
187
265
  /** Current voice state */
188
266
  state: VoiceState;
267
+ /**
268
+ * In-progress transcript while the user is speaking.
269
+ * Populated only when browser transcription is active.
270
+ */
271
+ liveTranscript: string;
189
272
  /** Latest transcribed user speech */
190
273
  transcript: string;
191
274
  /** Latest AI response (stripped of POINT tags) */
@@ -213,14 +296,18 @@ declare class CursorBuddyClient {
213
296
  private options;
214
297
  private voiceCapture;
215
298
  private audioPlayback;
299
+ private browserSpeech;
300
+ private liveTranscription;
216
301
  private screenCapture;
217
302
  private pointerController;
218
303
  private stateMachine;
304
+ private liveTranscript;
219
305
  private transcript;
220
306
  private response;
221
307
  private error;
222
308
  private abortController;
223
309
  private historyCommittedForTurn;
310
+ private speechProviderForTurn;
224
311
  private cachedSnapshot;
225
312
  private listeners;
226
313
  constructor(endpoint: string, options?: CursorBuddyClientOptions, services?: CursorBuddyServices);
@@ -275,11 +362,101 @@ declare class CursorBuddyClient {
275
362
  */
276
363
  private commitPartialHistory;
277
364
  private transcribe;
278
- private chat;
279
- private speak;
365
+ /**
366
+ * Stream the chat response, keep the visible text updated, and feed complete
367
+ * speech segments into the TTS queue as soon as they are ready.
368
+ */
369
+ private chatAndSpeak;
370
+ /**
371
+ * Request server-side TTS audio for one text segment.
372
+ */
373
+ private synthesizeSpeech;
374
+ /**
375
+ * Resolve the initial speech provider for this turn.
376
+ *
377
+ * Decision tree:
378
+ * 1. In `server` mode, always synthesize on the server.
379
+ * 2. In `browser` mode, require browser speech support up front.
380
+ * 3. In `auto` mode, prefer browser speech when available and keep that
381
+ * choice cached so later segments stay on the same provider unless a
382
+ * browser failure forces a one-way fallback to the server.
383
+ */
384
+ private prepareSpeechMode;
385
+ /**
386
+ * Prepare a playback task for one text segment.
387
+ *
388
+ * The queue calls this eagerly so server synthesis can overlap with the
389
+ * currently playing segment, but the returned task is still executed in the
390
+ * original enqueue order.
391
+ */
392
+ private prepareSpeechSegment;
393
+ /**
394
+ * Synthesize server audio immediately and return a playback task that reuses
395
+ * the prepared blob later.
396
+ */
397
+ private prepareServerSpeechTask;
398
+ /**
399
+ * Return a browser playback task for one text segment.
400
+ */
401
+ private prepareBrowserSpeechTask;
402
+ /**
403
+ * Prepare a playback task for `auto` mode.
404
+ *
405
+ * We prefer the browser for low latency, but if browser speech fails for any
406
+ * segment we permanently switch the remainder of the turn to server TTS so
407
+ * later segments do not keep retrying the failing browser path.
408
+ */
409
+ private prepareAutoSpeechTask;
410
+ /**
411
+ * Read the current provider choice for `auto` mode, lazily defaulting to the
412
+ * browser when supported and the server otherwise.
413
+ */
414
+ private getAutoSpeechProvider;
280
415
  private handleError;
416
+ /**
417
+ * Resolve the effective transcription mode for the current client.
418
+ */
419
+ private getTranscriptionMode;
420
+ /**
421
+ * Resolve the effective speech mode for the current client.
422
+ */
423
+ private getSpeechMode;
424
+ /**
425
+ * Decide whether speech should start before the full chat response is ready.
426
+ */
427
+ private isSpeechStreamingEnabled;
428
+ /**
429
+ * Decide whether this turn should attempt browser speech recognition.
430
+ */
431
+ private shouldAttemptBrowserTranscription;
432
+ /**
433
+ * Decide whether browser speech recognition is mandatory for this turn.
434
+ */
435
+ private isBrowserTranscriptionRequired;
436
+ /**
437
+ * Start the recorder and browser speech recognition together.
438
+ *
439
+ * The recorder always runs so we keep waveform updates and preserve a raw
440
+ * audio backup for server fallback in `auto` mode.
441
+ */
442
+ private beginListeningSession;
443
+ /**
444
+ * Stop browser speech recognition and return the best final transcript it
445
+ * produced for this turn.
446
+ */
447
+ private stopLiveTranscription;
448
+ /**
449
+ * Choose the transcript that should drive the turn.
450
+ *
451
+ * Decision tree:
452
+ * 1. Use the browser transcript when it is available.
453
+ * 2. In browser-only mode, fail if the browser produced nothing usable.
454
+ * 3. In auto/server modes, fall back to the recorded audio upload.
455
+ */
456
+ private resolveTranscript;
457
+ private updateResponse;
281
458
  private notify;
282
459
  }
283
460
  //#endregion
284
- export { Point as a, VoiceEvent as c, CursorRenderProps as i, VoiceState as l, CursorBuddyClientOptions as n, PointingTarget as o, CursorBuddySnapshot as r, SpeechBubbleRenderProps as s, CursorBuddyClient as t, WaveformRenderProps as u };
285
- //# sourceMappingURL=client-CPQnk2_x.d.mts.map
461
+ export { CursorBuddySnapshot as a, CursorRenderProps as c, SpeechBubbleRenderProps as d, VoiceEvent as f, CursorBuddyMediaMode as i, Point as l, WaveformRenderProps as m, BrowserSpeechPort as n, CursorBuddySpeechConfig as o, VoiceState as p, CursorBuddyClientOptions as r, CursorBuddyTranscriptionConfig as s, CursorBuddyClient as t, PointingTarget as u };
462
+ //# sourceMappingURL=client-DJRU6dKB.d.mts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"client-DJRU6dKB.d.mts","names":[],"sources":["../src/core/utils/elements.ts","../src/core/types.ts","../src/core/client.ts"],"mappings":";;AAgEA;;;;;;UAAiB,aAAA;EAMf;EAJA,EAAA;EAMA;EAJA,OAAA,EAAS,OAAA;EAIE;EAFX,IAAA,EAAM,OAAA;EAQa;EANnB,WAAA;AAAA;;;;KAMU,SAAA,GAAY,GAAA,SAAY,aAAA;;;;AAdpC;;KC7DY,UAAA;;;;KAKA,UAAA;EACN,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;AAAA;EACA,IAAA;EAAe,KAAA,EAAO,KAAA;AAAA;;;AAV5B;;;;UAkBiB,cAAA;EAbL;EAeV,CAAA;;EAEA,CAAA;EAhBI;EAkBJ,KAAA;AAAA;;;AAaF;UAAiB,KAAA;EACf,CAAA;EACA,CAAA;AAAA;AAMF;;;AAAA,UAAiB,gBAAA;EAEf;EAAA,SAAA;EAIA;EAFA,KAAA;EAMA;EAJA,MAAA;EAIc;EAFd,aAAA;EAWyC;EATzC,cAAA;AAAA;;;;UASe,yBAAA,SAAkC,gBAAA;EAIpC;EAFb,SAAA,EAFyC,SAAA;EAkBX;EAd9B,aAAA;AAAA;;;;KAcU,oBAAA;;;;UAKK,8BAAA;EAgDA;;;;;;;;;;;;;EAlCf,IAAA,GAAO,oBAAA;AAAA;;;;UAMQ,uBAAA;EAsCA;;;;;;;;;;;EA1Bf,IAAA,GAAO,oBAAA;EA2BU;;;;;AAOnB;;;EAxBE,cAAA;AAAA;;;;UAMe,gBAAA;EACf,KAAA,IAAS,OAAA;EACT,IAAA,IAAQ,OAAA,CAAQ,IAAA;EAChB,OAAA,CAAQ,QAAA,GAAW,KAAA;EACnB,OAAA;AAAA;;AAyBF;;UAnBiB,iBAAA;EACf,IAAA,CAAK,IAAA,EAAM,IAAA,EAAM,MAAA,GAAS,WAAA,GAAc,OAAA;EACxC,IAAA;AAAA;;;;UAMe,qBAAA;EACf,WAAA;EACA,KAAA,IAAS,OAAA;EACT,IAAA,IAAQ,OAAA;EACR,SAAA,CAAU,QAAA,GAAW,IAAA;EACrB,OAAA;AAAA;;;;UAMe,iBAAA;EACf,WAAA;EACA,KAAA,CAAM,IAAA,UAAc,MAAA,GAAS,WAAA,GAAc,OAAA;EAC3C,IAAA;AAAA;;;;UAMe,iBAAA;EACf,OAAA,IAAW,OAAA,CAAQ,gBAAA;EACnB,gBAAA,IAAoB,OAAA,CAAQ,yBAAA;AAAA;;;;UAMb,qBAAA;EACf,OAAA,CAAQ,MAAA,EAAQ,cAAA;EAChB,OAAA;EACA,UAAA;EACA,SAAA,CAAU,QAAA;EACV,oBAAA;AAAA;;;;UAMe,mBAAA;EACf,YAAA,GAAe,gBAAA;EACf,aAAA,GAAgB,iBAAA;EAChB,iBAAA,GAAoB,qBAAA;EACpB,aAAA,GAAgB,iBAAA;EAChB,aAAA,GAAgB,iBAAA;EAChB,iBAAA,GAAoB,qBAAA;AAAA;;;;UAML,iBAAA;EAXA;EAaf,KAAA,EAAO,UAAA;EAZS;EAchB,UAAA;EAboB;EAepB,QAAA;EAdgB;EAgBhB,KAAA;AAAA;;;;UAMe,uBAAA;EAdA;EAgBf,IAAA;;EAEA,SAAA;EAhBA;EAkBA,OAAA;AAAA;;;;UAMe,mBAAA;EAZA;EAcf,UAAA;;EAEA,WAAA;AAAA;;;;UAMe,wBAAA;EAVA;;;;;EAgBf,aAAA,GAAgB,8BAAA;EANuB;;;;;;EAavC,MAAA,GAAS,uBAAA;EAUc;EARvB,YAAA,IAAgB,IAAA;EAThB;EAWA,UAAA,IAAc,IAAA;EAJd;EAMA,OAAA,IAAW,MAAA,EAAQ,cAAA;EAJnB;EAMA,aAAA,IAAiB,KAAA,EAAO,UAAA;EAJxB;EAMA,OAAA,IAAW,KAAA,EAAO,KAAA;AAAA;;;;UAMH,mBAAA;EARE;EAUjB,KAAA,EAAO,UAAA;EARW;;;;EAalB,cAAA;EAPkC;EASlC,UAAA;EAIY;EAFZ,QAAA;EATO;EAWP,KAAA,EAAO,KAAA;EAJP;EAMA,UAAA;EAFA;EAIA,SAAA;AAAA;;;;;;;;;;;;cC9LW,iBAAA;EAAA,QACH,QAAA;EAAA,QACA,OAAA;EAAA,QAGA,YAAA;EAAA,QACA,aAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,aAAA;EAAA,QACA,iBAAA;EAAA,QACA,YAAA;EAAA,QAGA,cAAA;EAAA,QACA,UAAA;EAAA,QACA,QAAA;EAAA,QACA,KAAA;EAAA,QACA,eAAA;EAAA,QACA,uBAAA;EAAA,QACA,qBAAA;EAAA,QAGA,cAAA;EAAA,QAGA,SAAA;cAGN,QAAA,UACA,OAAA,GAAS,wBAAA,EACT,QAAA,GAAU,mBAAA;EDxHR;;;;ECmKJ,cAAA,CAAA;EDhK0B;;;ECiMpB,aAAA,CAAA,GAAiB,OAAA;EDzLM;;;ECiT7B,UAAA,CAAW,OAAA;ED7SX;;;ECqTA,OAAA,CAAQ,CAAA,UAAW,CAAA,UAAW,KAAA;EDtSf;;;EC6Sf,eAAA,CAAA;ED3SC;AAMH;;EC4SE,KAAA,CAAA;ED5S+B;;;;EC4T/B,oBAAA,CAAA;EDlTA;;;ECyTA,SAAA,CAAU,QAAA;EDhT+B;;;;ECyTzC,WAAA,CAAA,GAAe,mBAAA;EDvTN;;;EAAA,QC8TD,aAAA;EAAA,QAcA,KAAA;ED5TsB;;;;AAKhC;EALgC,QCgVtB,oBAAA;EAAA,QAcM,UAAA;ED3Ud;;AAMF;;EANE,QCiWc,YAAA;ED/Ua;;;EAAA,QCsbb,gBAAA;ED5aA;;AAMhB;;;;;;;;EANgB,QCwcN,iBAAA;EDjcC;;;;;;;EAAA,QC8dK,oBAAA;ED3dP;;AAMT;;EANS,QC6eO,uBAAA;EDteH;;;EAAA,QCkfG,wBAAA;EDlfiC;;;;;;;EAAA,QCggBjC,qBAAA;ED/fV;;AAMN;;EANM,QCwiBI,qBAAA;EAAA,QAYA,WAAA;ED7iBR;;;EAAA,QCwjBQ,oBAAA;EDtjBA;;;EAAA,QC6jBA,aAAA;ED3jBR;;;EAAA,QCkkBQ,wBAAA;ED5jBwB;;;EAAA,QCmkBxB,iCAAA;EDjkBR;;;EAAA,QCwkBQ,8BAAA;EDxkBmC;;;;AAO7C;;EAP6C,QCklB7B,qBAAA;ED1kBK;;;;EAAA,QCwnBL,qBAAA;EDvnBa;;;;;;;;EAAA,QCopBb,iBAAA;EAAA,QAmBN,cAAA;EAAA,QAOA,MAAA;AAAA"}