cursor-buddy 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,890 @@
1
+ import html2canvas from "html2canvas-pro";
2
+ import { atom } from "nanostores";
3
+ //#region src/core/state-machine.ts
4
+ /**
5
+ * State transition table for the voice interaction flow.
6
+ * Maps current state + event type to next state.
7
+ */
8
+ const transitions = {
9
+ idle: { HOTKEY_PRESSED: "listening" },
10
+ listening: {
11
+ HOTKEY_RELEASED: "processing",
12
+ ERROR: "idle"
13
+ },
14
+ processing: {
15
+ AI_RESPONSE_COMPLETE: "responding",
16
+ HOTKEY_PRESSED: "listening",
17
+ ERROR: "idle"
18
+ },
19
+ responding: {
20
+ TTS_COMPLETE: "idle",
21
+ HOTKEY_PRESSED: "listening",
22
+ ERROR: "idle"
23
+ }
24
+ };
25
+ /**
26
+ * Create a simple typed state machine for the voice interaction flow.
27
+ *
28
+ * States: idle -> listening -> processing -> responding -> idle
29
+ *
30
+ * Supports interruption: pressing hotkey during processing or responding
31
+ * immediately transitions back to listening.
32
+ */
33
+ function createStateMachine(initial = "idle") {
34
+ let state = initial;
35
+ const listeners = /* @__PURE__ */ new Set();
36
+ function notify() {
37
+ listeners.forEach((listener) => listener());
38
+ }
39
+ return {
40
+ getState: () => state,
41
+ transition: (event) => {
42
+ const nextState = transitions[state][event.type];
43
+ if (!nextState) return false;
44
+ state = nextState;
45
+ notify();
46
+ return true;
47
+ },
48
+ subscribe: (listener) => {
49
+ listeners.add(listener);
50
+ return () => listeners.delete(listener);
51
+ },
52
+ reset: () => {
53
+ state = "idle";
54
+ notify();
55
+ }
56
+ };
57
+ }
58
+ //#endregion
59
+ //#region src/core/utils/audio-worklet.ts
60
+ /**
61
+ * AudioWorklet processor code for voice capture.
62
+ * Inlined as a blob URL to avoid separate file serving requirements.
63
+ */
64
+ const workletCode = `
65
+ class AudioCaptureProcessor extends AudioWorkletProcessor {
66
+ constructor() {
67
+ super()
68
+ this.isRecording = true
69
+ }
70
+
71
+ process(inputs) {
72
+ if (!this.isRecording) return false
73
+
74
+ const input = inputs[0]
75
+ if (input && input.length > 0) {
76
+ const channelData = input[0]
77
+
78
+ // Send audio data to main thread
79
+ this.port.postMessage({
80
+ type: "audio",
81
+ data: new Float32Array(channelData)
82
+ })
83
+
84
+ // Calculate RMS for audio level visualization
85
+ let sum = 0
86
+ for (let i = 0; i < channelData.length; i++) {
87
+ sum += channelData[i] * channelData[i]
88
+ }
89
+ const rms = Math.sqrt(sum / channelData.length)
90
+ this.port.postMessage({ type: "level", rms })
91
+ }
92
+
93
+ return true
94
+ }
95
+ }
96
+
97
+ registerProcessor("audio-capture-processor", AudioCaptureProcessor)
98
+ `;
99
+ let cachedBlobURL = null;
100
+ /**
101
+ * Create a blob URL for the audio worklet processor.
102
+ * Caches the URL to avoid creating multiple blobs.
103
+ */
104
+ function createWorkletBlobURL() {
105
+ if (!cachedBlobURL) {
106
+ const blob = new Blob([workletCode], { type: "application/javascript" });
107
+ cachedBlobURL = URL.createObjectURL(blob);
108
+ }
109
+ return cachedBlobURL;
110
+ }
111
+ //#endregion
112
+ //#region src/core/utils/audio.ts
113
+ /**
114
+ * Audio conversion utilities for voice capture.
115
+ * Converts Float32 audio data to WAV format for server transcription.
116
+ */
117
+ /**
118
+ * Merge multiple Float32Array chunks into a single array
119
+ */
120
+ function mergeAudioChunks(chunks) {
121
+ const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
122
+ const result = new Float32Array(totalLength);
123
+ let offset = 0;
124
+ for (const chunk of chunks) {
125
+ result.set(chunk, offset);
126
+ offset += chunk.length;
127
+ }
128
+ return result;
129
+ }
130
+ /**
131
+ * Convert Float32 audio data to 16-bit PCM
132
+ */
133
+ function floatTo16BitPCM(output, offset, input) {
134
+ for (let i = 0; i < input.length; i++, offset += 2) {
135
+ const sample = Math.max(-1, Math.min(1, input[i]));
136
+ output.setInt16(offset, sample < 0 ? sample * 32768 : sample * 32767, true);
137
+ }
138
+ }
139
+ /**
140
+ * Write a string to a DataView
141
+ */
142
+ function writeString(view, offset, string) {
143
+ for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i));
144
+ }
145
+ /**
146
+ * Encode Float32 audio data as a WAV file
147
+ */
148
+ function encodeWAV(samples, sampleRate) {
149
+ const numChannels = 1;
150
+ const bitsPerSample = 16;
151
+ const bytesPerSample = bitsPerSample / 8;
152
+ const blockAlign = numChannels * bytesPerSample;
153
+ const dataLength = samples.length * bytesPerSample;
154
+ const buffer = new ArrayBuffer(44 + dataLength);
155
+ const view = new DataView(buffer);
156
+ writeString(view, 0, "RIFF");
157
+ view.setUint32(4, 36 + dataLength, true);
158
+ writeString(view, 8, "WAVE");
159
+ writeString(view, 12, "fmt ");
160
+ view.setUint32(16, 16, true);
161
+ view.setUint16(20, 1, true);
162
+ view.setUint16(22, numChannels, true);
163
+ view.setUint32(24, sampleRate, true);
164
+ view.setUint32(28, sampleRate * blockAlign, true);
165
+ view.setUint16(32, blockAlign, true);
166
+ view.setUint16(34, bitsPerSample, true);
167
+ writeString(view, 36, "data");
168
+ view.setUint32(40, dataLength, true);
169
+ floatTo16BitPCM(view, 44, samples);
170
+ return new Blob([buffer], { type: "audio/wav" });
171
+ }
172
+ //#endregion
173
+ //#region src/core/services/voice-capture.ts
174
+ const SAMPLE_RATE = 16e3;
175
+ const AUDIO_LEVEL_BOOST = 10.2;
176
+ /**
177
+ * Framework-agnostic service for voice capture using AudioWorkletNode.
178
+ */
179
+ var VoiceCaptureService = class {
180
+ audioContext = null;
181
+ workletNode = null;
182
+ stream = null;
183
+ chunks = [];
184
+ levelCallback = null;
185
+ /**
186
+ * Register a callback to receive audio level updates (0-1).
187
+ * Called at ~60fps during recording for waveform visualization.
188
+ */
189
+ onLevel(callback) {
190
+ this.levelCallback = callback;
191
+ }
192
+ /**
193
+ * Start recording audio from the microphone.
194
+ * @throws Error if microphone access is denied
195
+ */
196
+ async start() {
197
+ this.chunks = [];
198
+ const stream = await navigator.mediaDevices.getUserMedia({ audio: {
199
+ sampleRate: SAMPLE_RATE,
200
+ channelCount: 1,
201
+ echoCancellation: true,
202
+ noiseSuppression: true
203
+ } });
204
+ this.stream = stream;
205
+ const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
206
+ this.audioContext = audioContext;
207
+ const workletURL = createWorkletBlobURL();
208
+ await audioContext.audioWorklet.addModule(workletURL);
209
+ const source = audioContext.createMediaStreamSource(stream);
210
+ const workletNode = new AudioWorkletNode(audioContext, "audio-capture-processor");
211
+ this.workletNode = workletNode;
212
+ workletNode.port.onmessage = (event) => {
213
+ const { type, data, rms } = event.data;
214
+ if (type === "audio") this.chunks.push(data);
215
+ else if (type === "level" && this.levelCallback) {
216
+ const boostedLevel = Math.min(rms * AUDIO_LEVEL_BOOST, 1);
217
+ this.levelCallback(boostedLevel);
218
+ }
219
+ };
220
+ source.connect(workletNode);
221
+ }
222
+ /**
223
+ * Stop recording and return the captured audio as a WAV blob.
224
+ */
225
+ async stop() {
226
+ if (this.stream) {
227
+ this.stream.getTracks().forEach((track) => track.stop());
228
+ this.stream = null;
229
+ }
230
+ if (this.workletNode) {
231
+ this.workletNode.disconnect();
232
+ this.workletNode = null;
233
+ }
234
+ if (this.audioContext) {
235
+ await this.audioContext.close();
236
+ this.audioContext = null;
237
+ }
238
+ this.levelCallback?.(0);
239
+ const wavBlob = encodeWAV(mergeAudioChunks(this.chunks), SAMPLE_RATE);
240
+ this.chunks = [];
241
+ return wavBlob;
242
+ }
243
+ /**
244
+ * Clean up all resources.
245
+ */
246
+ dispose() {
247
+ if (this.stream) {
248
+ this.stream.getTracks().forEach((track) => track.stop());
249
+ this.stream = null;
250
+ }
251
+ if (this.workletNode) {
252
+ this.workletNode.disconnect();
253
+ this.workletNode = null;
254
+ }
255
+ if (this.audioContext) {
256
+ this.audioContext.close();
257
+ this.audioContext = null;
258
+ }
259
+ this.chunks = [];
260
+ this.levelCallback = null;
261
+ }
262
+ };
263
+ //#endregion
264
+ //#region src/core/services/audio-playback.ts
265
+ /**
266
+ * Framework-agnostic service for audio playback with abort support.
267
+ */
268
+ var AudioPlaybackService = class {
269
+ audio = null;
270
+ currentUrl = null;
271
+ /**
272
+ * Play audio from a blob. Stops any currently playing audio first.
273
+ * @param blob - Audio blob to play
274
+ * @param signal - Optional AbortSignal to cancel playback
275
+ * @returns Promise that resolves when playback completes
276
+ */
277
+ async play(blob, signal) {
278
+ this.stop();
279
+ if (signal?.aborted) return;
280
+ const url = URL.createObjectURL(blob);
281
+ this.currentUrl = url;
282
+ this.audio = new Audio(url);
283
+ const abortHandler = () => this.stop();
284
+ signal?.addEventListener("abort", abortHandler);
285
+ return new Promise((resolve, reject) => {
286
+ if (!this.audio) {
287
+ this.cleanup();
288
+ resolve();
289
+ return;
290
+ }
291
+ this.audio.onended = () => {
292
+ signal?.removeEventListener("abort", abortHandler);
293
+ this.cleanup();
294
+ resolve();
295
+ };
296
+ this.audio.onerror = () => {
297
+ signal?.removeEventListener("abort", abortHandler);
298
+ this.cleanup();
299
+ reject(/* @__PURE__ */ new Error("Audio playback failed"));
300
+ };
301
+ this.audio.play().catch((err) => {
302
+ signal?.removeEventListener("abort", abortHandler);
303
+ this.cleanup();
304
+ reject(err);
305
+ });
306
+ });
307
+ }
308
+ /**
309
+ * Stop any currently playing audio.
310
+ */
311
+ stop() {
312
+ if (this.audio) {
313
+ this.audio.pause();
314
+ this.audio.onended = null;
315
+ this.audio.onerror = null;
316
+ this.audio = null;
317
+ }
318
+ this.cleanup();
319
+ }
320
+ cleanup() {
321
+ if (this.currentUrl) {
322
+ URL.revokeObjectURL(this.currentUrl);
323
+ this.currentUrl = null;
324
+ }
325
+ }
326
+ };
327
+ //#endregion
328
+ //#region src/core/utils/screenshot.ts
329
+ const MAX_WIDTH = 1280;
330
+ function getCaptureMetrics() {
331
+ return {
332
+ viewportWidth: window.innerWidth,
333
+ viewportHeight: window.innerHeight
334
+ };
335
+ }
336
+ /**
337
+ * Resize canvas to max width while maintaining aspect ratio
338
+ */
339
+ function resizeCanvas(canvas, maxWidth) {
340
+ if (canvas.width <= maxWidth) return canvas;
341
+ const scale = maxWidth / canvas.width;
342
+ const resized = document.createElement("canvas");
343
+ resized.width = maxWidth;
344
+ resized.height = Math.round(canvas.height * scale);
345
+ const ctx = resized.getContext("2d");
346
+ if (ctx) ctx.drawImage(canvas, 0, 0, resized.width, resized.height);
347
+ return resized;
348
+ }
349
+ /**
350
+ * Create a fallback canvas when screenshot capture fails.
351
+ * Returns a simple gray canvas with an error message.
352
+ */
353
+ function createFallbackCanvas() {
354
+ const canvas = document.createElement("canvas");
355
+ canvas.width = Math.min(window.innerWidth, MAX_WIDTH);
356
+ canvas.height = Math.round(window.innerHeight / window.innerWidth * canvas.width);
357
+ const ctx = canvas.getContext("2d");
358
+ if (ctx) {
359
+ ctx.fillStyle = "#f0f0f0";
360
+ ctx.fillRect(0, 0, canvas.width, canvas.height);
361
+ ctx.fillStyle = "#666";
362
+ ctx.font = "16px sans-serif";
363
+ ctx.textAlign = "center";
364
+ ctx.fillText("Screenshot unavailable", canvas.width / 2, canvas.height / 2);
365
+ }
366
+ return canvas;
367
+ }
368
+ /**
369
+ * Capture a screenshot of the current viewport.
370
+ * Uses html2canvas to render the DOM to a canvas, then exports as JPEG.
371
+ * Falls back to a placeholder if capture fails (e.g., due to unsupported CSS).
372
+ */
373
+ async function captureViewport() {
374
+ const captureMetrics = getCaptureMetrics();
375
+ let canvas;
376
+ try {
377
+ canvas = await html2canvas(document.body, {
378
+ scale: 1,
379
+ useCORS: true,
380
+ logging: false,
381
+ width: captureMetrics.viewportWidth,
382
+ height: captureMetrics.viewportHeight,
383
+ x: window.scrollX,
384
+ y: window.scrollY
385
+ });
386
+ } catch {
387
+ canvas = createFallbackCanvas();
388
+ }
389
+ const resized = resizeCanvas(canvas, MAX_WIDTH);
390
+ return {
391
+ imageData: resized.toDataURL("image/jpeg", .8),
392
+ width: resized.width,
393
+ height: resized.height,
394
+ viewportWidth: captureMetrics.viewportWidth,
395
+ viewportHeight: captureMetrics.viewportHeight
396
+ };
397
+ }
398
+ //#endregion
399
+ //#region src/core/services/screen-capture.ts
400
+ /**
401
+ * Framework-agnostic service for capturing viewport screenshots.
402
+ */
403
+ var ScreenCaptureService = class {
404
+ /**
405
+ * Capture a screenshot of the current viewport.
406
+ * @returns Screenshot result with image data and dimensions
407
+ */
408
+ async capture() {
409
+ return captureViewport();
410
+ }
411
+ };
412
+ //#endregion
413
+ //#region src/core/atoms.ts
414
+ /**
415
+ * Nanostores atoms for reactive values that don't need state machine semantics.
416
+ * These update frequently (e.g., 60fps audio levels) and are framework-agnostic.
417
+ */
418
+ const $audioLevel = atom(0);
419
+ const $cursorPosition = atom({
420
+ x: 0,
421
+ y: 0
422
+ });
423
+ const $buddyPosition = atom({
424
+ x: 0,
425
+ y: 0
426
+ });
427
+ const $buddyRotation = atom(0);
428
+ const $buddyScale = atom(1);
429
+ const $pointingTarget = atom(null);
430
+ const $isEnabled = atom(true);
431
+ atom(false);
432
+ const $conversationHistory = atom([]);
433
+ //#endregion
434
+ //#region src/core/bezier.ts
435
+ /**
436
+ * Bezier flight animation for cursor pointing.
437
+ */
438
+ /**
439
+ * Quadratic bezier curve: B(t) = (1-t)²P₀ + 2(1-t)t·P₁ + t²P₂
440
+ */
441
+ function quadraticBezier(p0, p1, p2, t) {
442
+ const oneMinusT = 1 - t;
443
+ return {
444
+ x: oneMinusT * oneMinusT * p0.x + 2 * oneMinusT * t * p1.x + t * t * p2.x,
445
+ y: oneMinusT * oneMinusT * p0.y + 2 * oneMinusT * t * p1.y + t * t * p2.y
446
+ };
447
+ }
448
+ /**
449
+ * Bezier tangent (derivative): B'(t) = 2(1-t)(P₁-P₀) + 2t(P₂-P₁)
450
+ */
451
+ function bezierTangent(p0, p1, p2, t) {
452
+ const oneMinusT = 1 - t;
453
+ return {
454
+ x: 2 * oneMinusT * (p1.x - p0.x) + 2 * t * (p2.x - p1.x),
455
+ y: 2 * oneMinusT * (p1.y - p0.y) + 2 * t * (p2.y - p1.y)
456
+ };
457
+ }
458
+ /**
459
+ * Ease-in-out cubic for smooth acceleration/deceleration
460
+ */
461
+ function easeInOutCubic(t) {
462
+ return t < .5 ? 4 * t * t * t : 1 - Math.pow(-2 * t + 2, 3) / 2;
463
+ }
464
+ /**
465
+ * Animate cursor along a parabolic bezier arc from start to end.
466
+ * Used when the AI points at a UI element.
467
+ *
468
+ * @param from - Starting position
469
+ * @param to - Target position
470
+ * @param durationMs - Flight duration in milliseconds
471
+ * @param callbacks - Frame and completion callbacks
472
+ * @returns Cancel function to stop the animation
473
+ */
474
+ function animateBezierFlight(from, to, durationMs, callbacks) {
475
+ const startTime = performance.now();
476
+ const distance = Math.hypot(to.x - from.x, to.y - from.y);
477
+ const controlPoint = {
478
+ x: (from.x + to.x) / 2,
479
+ y: Math.min(from.y, to.y) - distance * .2
480
+ };
481
+ let animationFrameId;
482
+ function animate(now) {
483
+ const elapsed = now - startTime;
484
+ const linearProgress = Math.min(elapsed / durationMs, 1);
485
+ const easedProgress = easeInOutCubic(linearProgress);
486
+ const position = quadraticBezier(from, controlPoint, to, easedProgress);
487
+ const tangent = bezierTangent(from, controlPoint, to, easedProgress);
488
+ const rotation = Math.atan2(tangent.y, tangent.x);
489
+ const scale = 1 + Math.sin(linearProgress * Math.PI) * .3;
490
+ callbacks.onFrame(position, rotation, scale);
491
+ if (linearProgress < 1) animationFrameId = requestAnimationFrame(animate);
492
+ else callbacks.onComplete();
493
+ }
494
+ animationFrameId = requestAnimationFrame(animate);
495
+ return () => cancelAnimationFrame(animationFrameId);
496
+ }
497
+ //#endregion
498
+ //#region src/core/services/pointer-controller.ts
499
+ const POINTING_LOCK_TIMEOUT_MS = 1e4;
500
+ /**
501
+ * Controller for cursor pointing behavior.
502
+ * Manages the pointer state machine (follow -> flying -> anchored -> follow)
503
+ * and cursor animation.
504
+ */
505
+ var PointerController = class {
506
+ mode = "follow";
507
+ cancelAnimation = null;
508
+ releaseTimeout = null;
509
+ listeners = /* @__PURE__ */ new Set();
510
+ /**
511
+ * Animate cursor to point at a target.
512
+ */
513
+ pointAt(target) {
514
+ this.release();
515
+ this.mode = "flying";
516
+ $pointingTarget.set(target);
517
+ const startPos = $buddyPosition.get();
518
+ const endPos = {
519
+ x: target.x,
520
+ y: target.y
521
+ };
522
+ this.cancelAnimation = animateBezierFlight(startPos, endPos, 800, {
523
+ onFrame: (position, rotation, scale) => {
524
+ $buddyPosition.set(position);
525
+ $buddyRotation.set(rotation);
526
+ $buddyScale.set(scale);
527
+ },
528
+ onComplete: () => {
529
+ this.cancelAnimation = null;
530
+ this.mode = "anchored";
531
+ $buddyPosition.set(endPos);
532
+ $buddyRotation.set(0);
533
+ $buddyScale.set(1);
534
+ this.scheduleRelease();
535
+ this.notify();
536
+ }
537
+ });
538
+ this.notify();
539
+ }
540
+ /**
541
+ * Release the cursor from pointing mode back to follow mode.
542
+ */
543
+ release() {
544
+ if (this.cancelAnimation) {
545
+ this.cancelAnimation();
546
+ this.cancelAnimation = null;
547
+ }
548
+ if (this.releaseTimeout) {
549
+ clearTimeout(this.releaseTimeout);
550
+ this.releaseTimeout = null;
551
+ }
552
+ this.mode = "follow";
553
+ $pointingTarget.set(null);
554
+ $buddyPosition.set($cursorPosition.get());
555
+ $buddyRotation.set(0);
556
+ $buddyScale.set(1);
557
+ this.notify();
558
+ }
559
+ /**
560
+ * Check if cursor is currently pointing (flying or anchored).
561
+ */
562
+ isPointing() {
563
+ return this.mode !== "follow";
564
+ }
565
+ /**
566
+ * Get current pointer mode.
567
+ */
568
+ getMode() {
569
+ return this.mode;
570
+ }
571
+ /**
572
+ * Subscribe to pointer state changes.
573
+ */
574
+ subscribe(listener) {
575
+ this.listeners.add(listener);
576
+ return () => this.listeners.delete(listener);
577
+ }
578
+ /**
579
+ * Update buddy position to follow cursor when in follow mode.
580
+ * Call this on cursor position changes.
581
+ */
582
+ updateFollowPosition() {
583
+ if (this.mode === "follow") {
584
+ $buddyPosition.set($cursorPosition.get());
585
+ $buddyRotation.set(0);
586
+ $buddyScale.set(1);
587
+ }
588
+ }
589
+ scheduleRelease() {
590
+ this.releaseTimeout = setTimeout(() => {
591
+ this.releaseTimeout = null;
592
+ this.release();
593
+ }, POINTING_LOCK_TIMEOUT_MS);
594
+ }
595
+ notify() {
596
+ this.listeners.forEach((listener) => listener());
597
+ }
598
+ };
599
+ //#endregion
600
+ //#region src/core/pointing.ts
601
+ /**
602
+ * Parses [POINT:x,y:label] tags from AI responses.
603
+ * Format matches the Swift Clicky app for consistency.
604
+ */
605
+ const POINTING_TAG_REGEX = /\[POINT:(\d+),(\d+):([^\]]+)\]\s*$/;
606
+ /**
607
+ * Extract pointing target from response text.
608
+ * Returns null if no valid POINT tag is found at the end.
609
+ */
610
+ function parsePointingTag(response) {
611
+ const match = response.match(POINTING_TAG_REGEX);
612
+ if (!match) return null;
613
+ return {
614
+ x: parseInt(match[1], 10),
615
+ y: parseInt(match[2], 10),
616
+ label: match[3].trim()
617
+ };
618
+ }
619
+ /**
620
+ * Remove POINT tag from response text for display/TTS.
621
+ */
622
+ function stripPointingTag(response) {
623
+ return response.replace(POINTING_TAG_REGEX, "").trim();
624
+ }
625
+ //#endregion
626
+ //#region src/core/client.ts
627
+ function clamp(value, min, max) {
628
+ return Math.min(Math.max(value, min), max);
629
+ }
630
+ function mapPointToViewport(target, screenshot) {
631
+ if (screenshot.width <= 0 || screenshot.height <= 0) return target;
632
+ const scaleX = screenshot.viewportWidth / screenshot.width;
633
+ const scaleY = screenshot.viewportHeight / screenshot.height;
634
+ return {
635
+ ...target,
636
+ x: clamp(Math.round(target.x * scaleX), 0, Math.max(screenshot.viewportWidth - 1, 0)),
637
+ y: clamp(Math.round(target.y * scaleY), 0, Math.max(screenshot.viewportHeight - 1, 0))
638
+ };
639
+ }
640
+ /**
641
+ * Framework-agnostic client for cursor buddy voice interactions.
642
+ *
643
+ * Manages the complete voice interaction flow:
644
+ * idle -> listening -> processing -> responding -> idle
645
+ *
646
+ * Supports interruption: pressing hotkey during any state aborts
647
+ * in-flight work and immediately transitions to listening.
648
+ */
649
+ var CursorBuddyClient = class {
650
+ endpoint;
651
+ options;
652
+ voiceCapture;
653
+ audioPlayback;
654
+ screenCapture;
655
+ pointerController;
656
+ stateMachine;
657
+ transcript = "";
658
+ response = "";
659
+ error = null;
660
+ abortController = null;
661
+ cachedSnapshot;
662
+ listeners = /* @__PURE__ */ new Set();
663
+ constructor(endpoint, options = {}, services = {}) {
664
+ this.endpoint = endpoint;
665
+ this.options = options;
666
+ this.voiceCapture = services.voiceCapture ?? new VoiceCaptureService();
667
+ this.audioPlayback = services.audioPlayback ?? new AudioPlaybackService();
668
+ this.screenCapture = services.screenCapture ?? new ScreenCaptureService();
669
+ this.pointerController = services.pointerController ?? new PointerController();
670
+ this.stateMachine = createStateMachine();
671
+ this.cachedSnapshot = this.buildSnapshot();
672
+ this.voiceCapture.onLevel((level) => $audioLevel.set(level));
673
+ this.stateMachine.subscribe(() => {
674
+ this.options.onStateChange?.(this.stateMachine.getState());
675
+ this.notify();
676
+ });
677
+ this.pointerController.subscribe(() => this.notify());
678
+ }
679
+ /**
680
+ * Start listening for voice input.
681
+ * Aborts any in-flight work from previous session.
682
+ */
683
+ startListening() {
684
+ this.abort();
685
+ this.transcript = "";
686
+ this.response = "";
687
+ this.error = null;
688
+ this.pointerController.release();
689
+ this.stateMachine.transition({ type: "HOTKEY_PRESSED" });
690
+ this.notify();
691
+ this.abortController = new AbortController();
692
+ this.voiceCapture.start().catch((err) => this.handleError(err));
693
+ }
694
+ /**
695
+ * Stop listening and process the voice input.
696
+ */
697
+ async stopListening() {
698
+ if (this.stateMachine.getState() !== "listening") return;
699
+ this.stateMachine.transition({ type: "HOTKEY_RELEASED" });
700
+ const signal = this.abortController?.signal;
701
+ try {
702
+ const [audioBlob, screenshot] = await Promise.all([this.voiceCapture.stop(), this.screenCapture.capture()]);
703
+ if (signal?.aborted) return;
704
+ const transcript = await this.transcribe(audioBlob, signal);
705
+ if (signal?.aborted) return;
706
+ this.transcript = transcript;
707
+ this.options.onTranscript?.(transcript);
708
+ this.notify();
709
+ const response = await this.chat(transcript, screenshot, signal);
710
+ if (signal?.aborted) return;
711
+ const pointTarget = parsePointingTag(response);
712
+ const cleanResponse = stripPointingTag(response);
713
+ this.response = cleanResponse;
714
+ this.stateMachine.transition({
715
+ type: "AI_RESPONSE_COMPLETE",
716
+ response: cleanResponse
717
+ });
718
+ this.options.onResponse?.(cleanResponse);
719
+ const newHistory = [
720
+ ...$conversationHistory.get(),
721
+ {
722
+ role: "user",
723
+ content: transcript
724
+ },
725
+ {
726
+ role: "assistant",
727
+ content: cleanResponse
728
+ }
729
+ ];
730
+ $conversationHistory.set(newHistory);
731
+ if (pointTarget) {
732
+ const mappedTarget = mapPointToViewport(pointTarget, screenshot);
733
+ this.options.onPoint?.(mappedTarget);
734
+ this.pointerController.pointAt(mappedTarget);
735
+ }
736
+ if (cleanResponse) await this.speak(cleanResponse, signal);
737
+ if (signal?.aborted) return;
738
+ this.stateMachine.transition({ type: "TTS_COMPLETE" });
739
+ } catch (err) {
740
+ if (signal?.aborted) return;
741
+ this.handleError(err instanceof Error ? err : /* @__PURE__ */ new Error("Unknown error"));
742
+ }
743
+ }
744
+ /**
745
+ * Enable or disable the buddy.
746
+ */
747
+ setEnabled(enabled) {
748
+ $isEnabled.set(enabled);
749
+ this.notify();
750
+ }
751
+ /**
752
+ * Manually point at coordinates.
753
+ */
754
+ pointAt(x, y, label) {
755
+ this.pointerController.pointAt({
756
+ x,
757
+ y,
758
+ label
759
+ });
760
+ }
761
+ /**
762
+ * Dismiss the current pointing target.
763
+ */
764
+ dismissPointing() {
765
+ this.pointerController.release();
766
+ }
767
+ /**
768
+ * Reset to idle state and stop any in-progress work.
769
+ */
770
+ reset() {
771
+ this.abort();
772
+ this.transcript = "";
773
+ this.response = "";
774
+ this.error = null;
775
+ this.pointerController.release();
776
+ this.stateMachine.reset();
777
+ this.notify();
778
+ }
779
+ /**
780
+ * Update buddy position to follow cursor.
781
+ * Call this on cursor position changes.
782
+ */
783
+ updateCursorPosition() {
784
+ this.pointerController.updateFollowPosition();
785
+ }
786
+ /**
787
+ * Subscribe to state changes.
788
+ */
789
+ subscribe(listener) {
790
+ this.listeners.add(listener);
791
+ return () => this.listeners.delete(listener);
792
+ }
793
+ /**
794
+ * Get current state snapshot for React's useSyncExternalStore.
795
+ * Returns a cached object to ensure referential stability.
796
+ */
797
+ getSnapshot() {
798
+ return this.cachedSnapshot;
799
+ }
800
+ /**
801
+ * Build a new snapshot object.
802
+ */
803
+ buildSnapshot() {
804
+ return {
805
+ state: this.stateMachine.getState(),
806
+ transcript: this.transcript,
807
+ response: this.response,
808
+ error: this.error,
809
+ isPointing: this.pointerController.isPointing(),
810
+ isEnabled: $isEnabled.get()
811
+ };
812
+ }
813
+ abort() {
814
+ this.abortController?.abort();
815
+ this.abortController = null;
816
+ this.audioPlayback.stop();
817
+ $audioLevel.set(0);
818
+ }
819
+ async transcribe(blob, signal) {
820
+ const formData = new FormData();
821
+ formData.append("audio", blob, "recording.wav");
822
+ const response = await fetch(`${this.endpoint}/transcribe`, {
823
+ method: "POST",
824
+ body: formData,
825
+ signal
826
+ });
827
+ if (!response.ok) throw new Error("Transcription failed");
828
+ const { text } = await response.json();
829
+ return text;
830
+ }
831
+ async chat(transcript, screenshot, signal) {
832
+ const history = $conversationHistory.get();
833
+ const response = await fetch(`${this.endpoint}/chat`, {
834
+ method: "POST",
835
+ headers: { "Content-Type": "application/json" },
836
+ body: JSON.stringify({
837
+ screenshot: screenshot.imageData,
838
+ capture: {
839
+ width: screenshot.width,
840
+ height: screenshot.height
841
+ },
842
+ transcript,
843
+ history
844
+ }),
845
+ signal
846
+ });
847
+ if (!response.ok) throw new Error("Chat request failed");
848
+ const reader = response.body?.getReader();
849
+ if (!reader) throw new Error("No response body");
850
+ const decoder = new TextDecoder();
851
+ let fullResponse = "";
852
+ while (true) {
853
+ const { done, value } = await reader.read();
854
+ if (done) break;
855
+ const chunk = decoder.decode(value, { stream: true });
856
+ fullResponse += chunk;
857
+ this.response = stripPointingTag(fullResponse);
858
+ this.notify();
859
+ }
860
+ return fullResponse;
861
+ }
862
+ async speak(text, signal) {
863
+ const response = await fetch(`${this.endpoint}/tts`, {
864
+ method: "POST",
865
+ headers: { "Content-Type": "application/json" },
866
+ body: JSON.stringify({ text }),
867
+ signal
868
+ });
869
+ if (!response.ok) throw new Error("TTS request failed");
870
+ const audioBlob = await response.blob();
871
+ await this.audioPlayback.play(audioBlob, signal);
872
+ }
873
+ handleError(err) {
874
+ this.error = err;
875
+ this.stateMachine.transition({
876
+ type: "ERROR",
877
+ error: err
878
+ });
879
+ this.options.onError?.(err);
880
+ this.notify();
881
+ }
882
+ notify() {
883
+ this.cachedSnapshot = this.buildSnapshot();
884
+ this.listeners.forEach((listener) => listener());
885
+ }
886
+ };
887
+ //#endregion
888
+ export { $buddyScale as a, $buddyRotation as i, $audioLevel as n, $cursorPosition as o, $buddyPosition as r, $pointingTarget as s, CursorBuddyClient as t };
889
+
890
+ //# sourceMappingURL=client-Bd33JD8T.mjs.map