cursor-buddy 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/{client-DKZY5bI1.d.mts → client-CPQnk2_x.d.mts} +80 -109
- package/dist/client-CPQnk2_x.d.mts.map +1 -0
- package/dist/{client-Bd33JD8T.mjs → client-DAa4L2fE.mjs} +995 -481
- package/dist/client-DAa4L2fE.mjs.map +1 -0
- package/dist/index.d.mts +1 -1
- package/dist/index.mjs +1 -1
- package/dist/react/index.d.mts +21 -21
- package/dist/react/index.d.mts.map +1 -1
- package/dist/react/index.mjs +98 -83
- package/dist/react/index.mjs.map +1 -1
- package/dist/server/index.d.mts +1 -1
- package/dist/server/index.mjs +24 -11
- package/dist/server/index.mjs.map +1 -1
- package/package.json +1 -1
- package/README.md +0 -344
- package/dist/client-Bd33JD8T.mjs.map +0 -1
- package/dist/client-DKZY5bI1.d.mts.map +0 -1
|
@@ -1,350 +1,624 @@
|
|
|
1
|
-
import html2canvas from "html2canvas-pro";
|
|
2
1
|
import { atom } from "nanostores";
|
|
3
|
-
|
|
2
|
+
import html2canvas from "html2canvas-pro";
|
|
3
|
+
//#region src/core/atoms.ts
|
|
4
4
|
/**
|
|
5
|
-
*
|
|
6
|
-
*
|
|
5
|
+
* Nanostores atoms for reactive values that don't need state machine semantics.
|
|
6
|
+
* These update frequently (e.g., 60fps audio levels) and are framework-agnostic.
|
|
7
7
|
*/
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
8
|
+
const $audioLevel = atom(0);
|
|
9
|
+
const $cursorPosition = atom({
|
|
10
|
+
x: 0,
|
|
11
|
+
y: 0
|
|
12
|
+
});
|
|
13
|
+
const $buddyPosition = atom({
|
|
14
|
+
x: 0,
|
|
15
|
+
y: 0
|
|
16
|
+
});
|
|
17
|
+
const $buddyRotation = atom(0);
|
|
18
|
+
const $buddyScale = atom(1);
|
|
19
|
+
const $pointingTarget = atom(null);
|
|
20
|
+
const $isEnabled = atom(true);
|
|
21
|
+
atom(false);
|
|
22
|
+
const $conversationHistory = atom([]);
|
|
23
|
+
//#endregion
|
|
24
|
+
//#region src/core/pointing.ts
|
|
25
25
|
/**
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
* States: idle -> listening -> processing -> responding -> idle
|
|
26
|
+
* Parses POINT tags from AI responses.
|
|
29
27
|
*
|
|
30
|
-
* Supports
|
|
31
|
-
*
|
|
28
|
+
* Supports two formats:
|
|
29
|
+
* - Marker-based: [POINT:5:label] - 3 parts, references a numbered marker
|
|
30
|
+
* - Coordinate-based: [POINT:640,360:label] - 4 parts, raw pixel coordinates
|
|
32
31
|
*/
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
32
|
+
const POINTING_TAG_REGEX = /\[POINT:(\d+)(?:,(\d+))?:([^\]]+)\]\s*$/;
|
|
33
|
+
/**
|
|
34
|
+
* Parse pointing tag into structured result.
|
|
35
|
+
* Returns null if no valid POINT tag is found at the end.
|
|
36
|
+
*/
|
|
37
|
+
function parsePointingTagRaw(response) {
|
|
38
|
+
const match = response.match(POINTING_TAG_REGEX);
|
|
39
|
+
if (!match) return null;
|
|
40
|
+
const first = Number.parseInt(match[1], 10);
|
|
41
|
+
const second = match[2] ? Number.parseInt(match[2], 10) : null;
|
|
42
|
+
const label = match[3].trim();
|
|
43
|
+
if (second !== null) return {
|
|
44
|
+
type: "coordinates",
|
|
45
|
+
x: first,
|
|
46
|
+
y: second,
|
|
47
|
+
label
|
|
48
|
+
};
|
|
39
49
|
return {
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
if (!nextState) return false;
|
|
44
|
-
state = nextState;
|
|
45
|
-
notify();
|
|
46
|
-
return true;
|
|
47
|
-
},
|
|
48
|
-
subscribe: (listener) => {
|
|
49
|
-
listeners.add(listener);
|
|
50
|
-
return () => listeners.delete(listener);
|
|
51
|
-
},
|
|
52
|
-
reset: () => {
|
|
53
|
-
state = "idle";
|
|
54
|
-
notify();
|
|
55
|
-
}
|
|
50
|
+
type: "marker",
|
|
51
|
+
markerId: first,
|
|
52
|
+
label
|
|
56
53
|
};
|
|
57
54
|
}
|
|
58
|
-
//#endregion
|
|
59
|
-
//#region src/core/utils/audio-worklet.ts
|
|
60
55
|
/**
|
|
61
|
-
*
|
|
62
|
-
* Inlined as a blob URL to avoid separate file serving requirements.
|
|
56
|
+
* Remove POINT tag from response text for display/TTS.
|
|
63
57
|
*/
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
constructor() {
|
|
67
|
-
super()
|
|
68
|
-
this.isRecording = true
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
process(inputs) {
|
|
72
|
-
if (!this.isRecording) return false
|
|
73
|
-
|
|
74
|
-
const input = inputs[0]
|
|
75
|
-
if (input && input.length > 0) {
|
|
76
|
-
const channelData = input[0]
|
|
77
|
-
|
|
78
|
-
// Send audio data to main thread
|
|
79
|
-
this.port.postMessage({
|
|
80
|
-
type: "audio",
|
|
81
|
-
data: new Float32Array(channelData)
|
|
82
|
-
})
|
|
83
|
-
|
|
84
|
-
// Calculate RMS for audio level visualization
|
|
85
|
-
let sum = 0
|
|
86
|
-
for (let i = 0; i < channelData.length; i++) {
|
|
87
|
-
sum += channelData[i] * channelData[i]
|
|
88
|
-
}
|
|
89
|
-
const rms = Math.sqrt(sum / channelData.length)
|
|
90
|
-
this.port.postMessage({ type: "level", rms })
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return true
|
|
94
|
-
}
|
|
58
|
+
function stripPointingTag(response) {
|
|
59
|
+
return response.replace(POINTING_TAG_REGEX, "").trim();
|
|
95
60
|
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
`;
|
|
99
|
-
let cachedBlobURL = null;
|
|
61
|
+
//#endregion
|
|
62
|
+
//#region src/core/services/audio-playback.ts
|
|
100
63
|
/**
|
|
101
|
-
*
|
|
102
|
-
* Caches the URL to avoid creating multiple blobs.
|
|
64
|
+
* Framework-agnostic service for audio playback with abort support.
|
|
103
65
|
*/
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
66
|
+
var AudioPlaybackService = class {
|
|
67
|
+
audio = null;
|
|
68
|
+
currentUrl = null;
|
|
69
|
+
settlePlayback = null;
|
|
70
|
+
removeAbortListener = null;
|
|
71
|
+
/**
|
|
72
|
+
* Play audio from a blob. Stops any currently playing audio first.
|
|
73
|
+
* @param blob - Audio blob to play
|
|
74
|
+
* @param signal - Optional AbortSignal to cancel playback
|
|
75
|
+
* @returns Promise that resolves when playback completes
|
|
76
|
+
*/
|
|
77
|
+
async play(blob, signal) {
|
|
78
|
+
this.stop();
|
|
79
|
+
if (signal?.aborted) return;
|
|
80
|
+
const url = URL.createObjectURL(blob);
|
|
81
|
+
this.currentUrl = url;
|
|
82
|
+
this.audio = new Audio(url);
|
|
83
|
+
return new Promise((resolve, reject) => {
|
|
84
|
+
if (!this.audio) {
|
|
85
|
+
this.cleanup();
|
|
86
|
+
resolve();
|
|
87
|
+
return;
|
|
88
|
+
}
|
|
89
|
+
let settled = false;
|
|
90
|
+
const audio = this.audio;
|
|
91
|
+
const settle = (outcome, error) => {
|
|
92
|
+
if (settled) return;
|
|
93
|
+
settled = true;
|
|
94
|
+
if (this.settlePlayback === settle) this.settlePlayback = null;
|
|
95
|
+
this.removeAbortListener?.();
|
|
96
|
+
this.removeAbortListener = null;
|
|
97
|
+
if (this.audio === audio) {
|
|
98
|
+
this.audio.onended = null;
|
|
99
|
+
this.audio.onerror = null;
|
|
100
|
+
this.audio = null;
|
|
101
|
+
}
|
|
102
|
+
this.cleanup();
|
|
103
|
+
if (outcome === "resolve") {
|
|
104
|
+
resolve();
|
|
105
|
+
return;
|
|
106
|
+
}
|
|
107
|
+
reject(error ?? /* @__PURE__ */ new Error("Audio playback failed"));
|
|
108
|
+
};
|
|
109
|
+
this.settlePlayback = settle;
|
|
110
|
+
const abortHandler = () => {
|
|
111
|
+
audio.pause();
|
|
112
|
+
settle("resolve");
|
|
113
|
+
};
|
|
114
|
+
if (signal) {
|
|
115
|
+
signal.addEventListener("abort", abortHandler, { once: true });
|
|
116
|
+
this.removeAbortListener = () => {
|
|
117
|
+
signal.removeEventListener("abort", abortHandler);
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
this.audio.onended = () => {
|
|
121
|
+
settle("resolve");
|
|
122
|
+
};
|
|
123
|
+
this.audio.onerror = () => {
|
|
124
|
+
settle("reject", /* @__PURE__ */ new Error("Audio playback failed"));
|
|
125
|
+
};
|
|
126
|
+
this.audio.play().catch((err) => {
|
|
127
|
+
settle("reject", err instanceof Error ? err : new Error(String(err)));
|
|
128
|
+
});
|
|
129
|
+
});
|
|
108
130
|
}
|
|
109
|
-
|
|
110
|
-
|
|
131
|
+
/**
|
|
132
|
+
* Stop any currently playing audio.
|
|
133
|
+
*/
|
|
134
|
+
stop() {
|
|
135
|
+
if (this.audio) this.audio.pause();
|
|
136
|
+
if (this.settlePlayback) {
|
|
137
|
+
const settlePlayback = this.settlePlayback;
|
|
138
|
+
this.settlePlayback = null;
|
|
139
|
+
settlePlayback("resolve");
|
|
140
|
+
return;
|
|
141
|
+
}
|
|
142
|
+
this.removeAbortListener?.();
|
|
143
|
+
this.removeAbortListener = null;
|
|
144
|
+
if (this.audio) {
|
|
145
|
+
this.audio.onended = null;
|
|
146
|
+
this.audio.onerror = null;
|
|
147
|
+
this.audio = null;
|
|
148
|
+
}
|
|
149
|
+
this.cleanup();
|
|
150
|
+
}
|
|
151
|
+
cleanup() {
|
|
152
|
+
if (this.currentUrl) {
|
|
153
|
+
URL.revokeObjectURL(this.currentUrl);
|
|
154
|
+
this.currentUrl = null;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
};
|
|
111
158
|
//#endregion
|
|
112
|
-
//#region src/core/
|
|
159
|
+
//#region src/core/bezier.ts
|
|
113
160
|
/**
|
|
114
|
-
*
|
|
115
|
-
* Converts Float32 audio data to WAV format for server transcription.
|
|
161
|
+
* Bezier flight animation for cursor pointing.
|
|
116
162
|
*/
|
|
117
163
|
/**
|
|
118
|
-
*
|
|
164
|
+
* Quadratic bezier curve: B(t) = (1-t)²P₀ + 2(1-t)t·P₁ + t²P₂
|
|
119
165
|
*/
|
|
120
|
-
function
|
|
121
|
-
const
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
offset += chunk.length;
|
|
127
|
-
}
|
|
128
|
-
return result;
|
|
166
|
+
function quadraticBezier(p0, p1, p2, t) {
|
|
167
|
+
const oneMinusT = 1 - t;
|
|
168
|
+
return {
|
|
169
|
+
x: oneMinusT * oneMinusT * p0.x + 2 * oneMinusT * t * p1.x + t * t * p2.x,
|
|
170
|
+
y: oneMinusT * oneMinusT * p0.y + 2 * oneMinusT * t * p1.y + t * t * p2.y
|
|
171
|
+
};
|
|
129
172
|
}
|
|
130
173
|
/**
|
|
131
|
-
*
|
|
174
|
+
* Bezier tangent (derivative): B'(t) = 2(1-t)(P₁-P₀) + 2t(P₂-P₁)
|
|
132
175
|
*/
|
|
133
|
-
function
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
176
|
+
function bezierTangent(p0, p1, p2, t) {
|
|
177
|
+
const oneMinusT = 1 - t;
|
|
178
|
+
return {
|
|
179
|
+
x: 2 * oneMinusT * (p1.x - p0.x) + 2 * t * (p2.x - p1.x),
|
|
180
|
+
y: 2 * oneMinusT * (p1.y - p0.y) + 2 * t * (p2.y - p1.y)
|
|
181
|
+
};
|
|
138
182
|
}
|
|
139
183
|
/**
|
|
140
|
-
*
|
|
184
|
+
* Ease-in-out cubic for smooth acceleration/deceleration
|
|
141
185
|
*/
|
|
142
|
-
function
|
|
143
|
-
|
|
186
|
+
function easeInOutCubic(t) {
|
|
187
|
+
return t < .5 ? 4 * t * t * t : 1 - (-2 * t + 2) ** 3 / 2;
|
|
144
188
|
}
|
|
145
189
|
/**
|
|
146
|
-
*
|
|
190
|
+
* Animate cursor along a parabolic bezier arc from start to end.
|
|
191
|
+
* Used when the AI points at a UI element.
|
|
192
|
+
*
|
|
193
|
+
* @param from - Starting position
|
|
194
|
+
* @param to - Target position
|
|
195
|
+
* @param durationMs - Flight duration in milliseconds
|
|
196
|
+
* @param callbacks - Frame and completion callbacks
|
|
197
|
+
* @returns Cancel function to stop the animation
|
|
147
198
|
*/
|
|
148
|
-
function
|
|
149
|
-
const
|
|
150
|
-
const
|
|
151
|
-
const
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
return new Blob([buffer], { type: "audio/wav" });
|
|
199
|
+
function animateBezierFlight(from, to, durationMs, callbacks) {
|
|
200
|
+
const startTime = performance.now();
|
|
201
|
+
const distance = Math.hypot(to.x - from.x, to.y - from.y);
|
|
202
|
+
const controlPoint = {
|
|
203
|
+
x: (from.x + to.x) / 2,
|
|
204
|
+
y: Math.min(from.y, to.y) - distance * .2
|
|
205
|
+
};
|
|
206
|
+
let animationFrameId;
|
|
207
|
+
function animate(now) {
|
|
208
|
+
const elapsed = now - startTime;
|
|
209
|
+
const linearProgress = Math.min(elapsed / durationMs, 1);
|
|
210
|
+
const easedProgress = easeInOutCubic(linearProgress);
|
|
211
|
+
const position = quadraticBezier(from, controlPoint, to, easedProgress);
|
|
212
|
+
const tangent = bezierTangent(from, controlPoint, to, easedProgress);
|
|
213
|
+
const rotation = Math.atan2(tangent.y, tangent.x);
|
|
214
|
+
const scale = 1 + Math.sin(linearProgress * Math.PI) * .3;
|
|
215
|
+
callbacks.onFrame(position, rotation, scale);
|
|
216
|
+
if (linearProgress < 1) animationFrameId = requestAnimationFrame(animate);
|
|
217
|
+
else callbacks.onComplete();
|
|
218
|
+
}
|
|
219
|
+
animationFrameId = requestAnimationFrame(animate);
|
|
220
|
+
return () => cancelAnimationFrame(animationFrameId);
|
|
171
221
|
}
|
|
172
222
|
//#endregion
|
|
173
|
-
//#region src/core/services/
|
|
174
|
-
const
|
|
175
|
-
const AUDIO_LEVEL_BOOST = 10.2;
|
|
223
|
+
//#region src/core/services/pointer-controller.ts
|
|
224
|
+
const POINTING_LOCK_TIMEOUT_MS = 1e4;
|
|
176
225
|
/**
|
|
177
|
-
*
|
|
226
|
+
* Controller for cursor pointing behavior.
|
|
227
|
+
* Manages the pointer state machine (follow -> flying -> anchored -> follow)
|
|
228
|
+
* and cursor animation.
|
|
178
229
|
*/
|
|
179
|
-
var
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
levelCallback = null;
|
|
185
|
-
/**
|
|
186
|
-
* Register a callback to receive audio level updates (0-1).
|
|
187
|
-
* Called at ~60fps during recording for waveform visualization.
|
|
188
|
-
*/
|
|
189
|
-
onLevel(callback) {
|
|
190
|
-
this.levelCallback = callback;
|
|
191
|
-
}
|
|
230
|
+
var PointerController = class {
|
|
231
|
+
mode = "follow";
|
|
232
|
+
cancelAnimation = null;
|
|
233
|
+
releaseTimeout = null;
|
|
234
|
+
listeners = /* @__PURE__ */ new Set();
|
|
192
235
|
/**
|
|
193
|
-
*
|
|
194
|
-
* @throws Error if microphone access is denied
|
|
236
|
+
* Animate cursor to point at a target.
|
|
195
237
|
*/
|
|
196
|
-
|
|
197
|
-
this.
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
this.stream = stream;
|
|
205
|
-
const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
|
|
206
|
-
this.audioContext = audioContext;
|
|
207
|
-
const workletURL = createWorkletBlobURL();
|
|
208
|
-
await audioContext.audioWorklet.addModule(workletURL);
|
|
209
|
-
const source = audioContext.createMediaStreamSource(stream);
|
|
210
|
-
const workletNode = new AudioWorkletNode(audioContext, "audio-capture-processor");
|
|
211
|
-
this.workletNode = workletNode;
|
|
212
|
-
workletNode.port.onmessage = (event) => {
|
|
213
|
-
const { type, data, rms } = event.data;
|
|
214
|
-
if (type === "audio") this.chunks.push(data);
|
|
215
|
-
else if (type === "level" && this.levelCallback) {
|
|
216
|
-
const boostedLevel = Math.min(rms * AUDIO_LEVEL_BOOST, 1);
|
|
217
|
-
this.levelCallback(boostedLevel);
|
|
218
|
-
}
|
|
238
|
+
pointAt(target) {
|
|
239
|
+
this.release();
|
|
240
|
+
this.mode = "flying";
|
|
241
|
+
$pointingTarget.set(target);
|
|
242
|
+
const startPos = $buddyPosition.get();
|
|
243
|
+
const endPos = {
|
|
244
|
+
x: target.x,
|
|
245
|
+
y: target.y
|
|
219
246
|
};
|
|
220
|
-
|
|
247
|
+
this.cancelAnimation = animateBezierFlight(startPos, endPos, 800, {
|
|
248
|
+
onFrame: (position, rotation, scale) => {
|
|
249
|
+
$buddyPosition.set(position);
|
|
250
|
+
$buddyRotation.set(rotation);
|
|
251
|
+
$buddyScale.set(scale);
|
|
252
|
+
},
|
|
253
|
+
onComplete: () => {
|
|
254
|
+
this.cancelAnimation = null;
|
|
255
|
+
this.mode = "anchored";
|
|
256
|
+
$buddyPosition.set(endPos);
|
|
257
|
+
$buddyRotation.set(0);
|
|
258
|
+
$buddyScale.set(1);
|
|
259
|
+
this.scheduleRelease();
|
|
260
|
+
this.notify();
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
this.notify();
|
|
221
264
|
}
|
|
222
265
|
/**
|
|
223
|
-
*
|
|
266
|
+
* Release the cursor from pointing mode back to follow mode.
|
|
224
267
|
*/
|
|
225
|
-
|
|
226
|
-
if (this.
|
|
227
|
-
this.
|
|
228
|
-
this.
|
|
229
|
-
}
|
|
230
|
-
if (this.workletNode) {
|
|
231
|
-
this.workletNode.disconnect();
|
|
232
|
-
this.workletNode = null;
|
|
268
|
+
release() {
|
|
269
|
+
if (this.cancelAnimation) {
|
|
270
|
+
this.cancelAnimation();
|
|
271
|
+
this.cancelAnimation = null;
|
|
233
272
|
}
|
|
234
|
-
if (this.
|
|
235
|
-
|
|
236
|
-
this.
|
|
273
|
+
if (this.releaseTimeout) {
|
|
274
|
+
clearTimeout(this.releaseTimeout);
|
|
275
|
+
this.releaseTimeout = null;
|
|
237
276
|
}
|
|
238
|
-
this.
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
277
|
+
this.mode = "follow";
|
|
278
|
+
$pointingTarget.set(null);
|
|
279
|
+
$buddyPosition.set($cursorPosition.get());
|
|
280
|
+
$buddyRotation.set(0);
|
|
281
|
+
$buddyScale.set(1);
|
|
282
|
+
this.notify();
|
|
242
283
|
}
|
|
243
284
|
/**
|
|
244
|
-
*
|
|
285
|
+
* Check if cursor is currently pointing (flying or anchored).
|
|
245
286
|
*/
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
this.stream.getTracks().forEach((track) => track.stop());
|
|
249
|
-
this.stream = null;
|
|
250
|
-
}
|
|
251
|
-
if (this.workletNode) {
|
|
252
|
-
this.workletNode.disconnect();
|
|
253
|
-
this.workletNode = null;
|
|
254
|
-
}
|
|
255
|
-
if (this.audioContext) {
|
|
256
|
-
this.audioContext.close();
|
|
257
|
-
this.audioContext = null;
|
|
258
|
-
}
|
|
259
|
-
this.chunks = [];
|
|
260
|
-
this.levelCallback = null;
|
|
287
|
+
isPointing() {
|
|
288
|
+
return this.mode !== "follow";
|
|
261
289
|
}
|
|
262
|
-
};
|
|
263
|
-
//#endregion
|
|
264
|
-
//#region src/core/services/audio-playback.ts
|
|
265
|
-
/**
|
|
266
|
-
* Framework-agnostic service for audio playback with abort support.
|
|
267
|
-
*/
|
|
268
|
-
var AudioPlaybackService = class {
|
|
269
|
-
audio = null;
|
|
270
|
-
currentUrl = null;
|
|
271
290
|
/**
|
|
272
|
-
*
|
|
273
|
-
* @param blob - Audio blob to play
|
|
274
|
-
* @param signal - Optional AbortSignal to cancel playback
|
|
275
|
-
* @returns Promise that resolves when playback completes
|
|
291
|
+
* Get current pointer mode.
|
|
276
292
|
*/
|
|
277
|
-
|
|
278
|
-
this.
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
return
|
|
286
|
-
if (!this.audio) {
|
|
287
|
-
this.cleanup();
|
|
288
|
-
resolve();
|
|
289
|
-
return;
|
|
290
|
-
}
|
|
291
|
-
this.audio.onended = () => {
|
|
292
|
-
signal?.removeEventListener("abort", abortHandler);
|
|
293
|
-
this.cleanup();
|
|
294
|
-
resolve();
|
|
295
|
-
};
|
|
296
|
-
this.audio.onerror = () => {
|
|
297
|
-
signal?.removeEventListener("abort", abortHandler);
|
|
298
|
-
this.cleanup();
|
|
299
|
-
reject(/* @__PURE__ */ new Error("Audio playback failed"));
|
|
300
|
-
};
|
|
301
|
-
this.audio.play().catch((err) => {
|
|
302
|
-
signal?.removeEventListener("abort", abortHandler);
|
|
303
|
-
this.cleanup();
|
|
304
|
-
reject(err);
|
|
305
|
-
});
|
|
306
|
-
});
|
|
293
|
+
getMode() {
|
|
294
|
+
return this.mode;
|
|
295
|
+
}
|
|
296
|
+
/**
|
|
297
|
+
* Subscribe to pointer state changes.
|
|
298
|
+
*/
|
|
299
|
+
subscribe(listener) {
|
|
300
|
+
this.listeners.add(listener);
|
|
301
|
+
return () => this.listeners.delete(listener);
|
|
307
302
|
}
|
|
308
303
|
/**
|
|
309
|
-
*
|
|
304
|
+
* Update buddy position to follow cursor when in follow mode.
|
|
305
|
+
* Call this on cursor position changes.
|
|
310
306
|
*/
|
|
311
|
-
|
|
312
|
-
if (this.
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
this.audio = null;
|
|
307
|
+
updateFollowPosition() {
|
|
308
|
+
if (this.mode === "follow") {
|
|
309
|
+
$buddyPosition.set($cursorPosition.get());
|
|
310
|
+
$buddyRotation.set(0);
|
|
311
|
+
$buddyScale.set(1);
|
|
317
312
|
}
|
|
318
|
-
this.cleanup();
|
|
319
313
|
}
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
this.
|
|
324
|
-
}
|
|
314
|
+
scheduleRelease() {
|
|
315
|
+
this.releaseTimeout = setTimeout(() => {
|
|
316
|
+
this.releaseTimeout = null;
|
|
317
|
+
this.release();
|
|
318
|
+
}, POINTING_LOCK_TIMEOUT_MS);
|
|
319
|
+
}
|
|
320
|
+
notify() {
|
|
321
|
+
this.listeners.forEach((listener) => listener());
|
|
322
|
+
}
|
|
323
|
+
};
|
|
324
|
+
//#endregion
|
|
325
|
+
//#region src/core/utils/elements.ts
|
|
326
|
+
/**
|
|
327
|
+
* Element discovery for annotated screenshots.
|
|
328
|
+
* Finds visible interactive elements and assigns marker IDs.
|
|
329
|
+
*/
|
|
330
|
+
/** Max characters for element descriptions passed to the model. */
|
|
331
|
+
const MAX_DESCRIPTION_LENGTH = 50;
|
|
332
|
+
/** Pixels tolerance for grouping elements into the same visual row. */
|
|
333
|
+
const ROW_TOLERANCE_PX = 20;
|
|
334
|
+
/**
|
|
335
|
+
* Interactive element selectors - elements users would want to click/interact with.
|
|
336
|
+
* Mirrors accessibility roles from agent-browser but using CSS selectors.
|
|
337
|
+
*/
|
|
338
|
+
const INTERACTIVE_SELECTORS = [
|
|
339
|
+
"button",
|
|
340
|
+
"[role=\"button\"]",
|
|
341
|
+
"input[type=\"button\"]",
|
|
342
|
+
"input[type=\"submit\"]",
|
|
343
|
+
"input[type=\"reset\"]",
|
|
344
|
+
"a[href]",
|
|
345
|
+
"[role=\"link\"]",
|
|
346
|
+
"input:not([type=\"hidden\"])",
|
|
347
|
+
"textarea",
|
|
348
|
+
"select",
|
|
349
|
+
"[role=\"textbox\"]",
|
|
350
|
+
"[role=\"searchbox\"]",
|
|
351
|
+
"[role=\"combobox\"]",
|
|
352
|
+
"[role=\"listbox\"]",
|
|
353
|
+
"[role=\"slider\"]",
|
|
354
|
+
"[role=\"spinbutton\"]",
|
|
355
|
+
"[role=\"checkbox\"]",
|
|
356
|
+
"[role=\"radio\"]",
|
|
357
|
+
"[role=\"switch\"]",
|
|
358
|
+
"[role=\"menuitem\"]",
|
|
359
|
+
"[role=\"menuitemcheckbox\"]",
|
|
360
|
+
"[role=\"menuitemradio\"]",
|
|
361
|
+
"[role=\"option\"]",
|
|
362
|
+
"[role=\"tab\"]",
|
|
363
|
+
"[role=\"treeitem\"]",
|
|
364
|
+
"video",
|
|
365
|
+
"audio",
|
|
366
|
+
"[data-cursor-buddy-interactive]"
|
|
367
|
+
];
|
|
368
|
+
/**
|
|
369
|
+
* Check if an element is visible in the viewport.
|
|
370
|
+
*/
|
|
371
|
+
function isElementVisible(element, rect = element.getBoundingClientRect()) {
|
|
372
|
+
if (rect.width <= 0 || rect.height <= 0) return false;
|
|
373
|
+
if (rect.bottom < 0 || rect.top > window.innerHeight || rect.right < 0 || rect.left > window.innerWidth) return false;
|
|
374
|
+
const style = window.getComputedStyle(element);
|
|
375
|
+
if (style.visibility === "hidden" || style.display === "none") return false;
|
|
376
|
+
if (Number.parseFloat(style.opacity) === 0) return false;
|
|
377
|
+
return true;
|
|
378
|
+
}
|
|
379
|
+
function truncateDescription(value) {
|
|
380
|
+
return value.slice(0, MAX_DESCRIPTION_LENGTH);
|
|
381
|
+
}
|
|
382
|
+
/**
|
|
383
|
+
* Generate a brief description for an element.
|
|
384
|
+
*/
|
|
385
|
+
function describeElement(element) {
|
|
386
|
+
const tag = element.tagName.toLowerCase();
|
|
387
|
+
const ariaLabel = element.getAttribute("aria-label");
|
|
388
|
+
if (ariaLabel) return truncateDescription(ariaLabel);
|
|
389
|
+
if (tag === "button" || tag === "a") {
|
|
390
|
+
const text = element.textContent?.trim();
|
|
391
|
+
if (text) return truncateDescription(text);
|
|
392
|
+
}
|
|
393
|
+
if (tag === "input" || tag === "textarea") {
|
|
394
|
+
const placeholder = element.getAttribute("placeholder");
|
|
395
|
+
if (placeholder) return truncateDescription(placeholder);
|
|
396
|
+
return `${element.getAttribute("type") || "text"} input`;
|
|
397
|
+
}
|
|
398
|
+
if (tag === "img") {
|
|
399
|
+
const alt = element.getAttribute("alt");
|
|
400
|
+
if (alt) return truncateDescription(alt);
|
|
401
|
+
return "image";
|
|
402
|
+
}
|
|
403
|
+
const role = element.getAttribute("role");
|
|
404
|
+
if (role) return role;
|
|
405
|
+
return tag;
|
|
406
|
+
}
|
|
407
|
+
function collectVisibleInteractiveElements() {
|
|
408
|
+
const selector = INTERACTIVE_SELECTORS.join(",");
|
|
409
|
+
const allElements = document.querySelectorAll(selector);
|
|
410
|
+
const visible = [];
|
|
411
|
+
for (const element of allElements) {
|
|
412
|
+
const rect = element.getBoundingClientRect();
|
|
413
|
+
if (!isElementVisible(element, rect)) continue;
|
|
414
|
+
visible.push({
|
|
415
|
+
element,
|
|
416
|
+
rect
|
|
417
|
+
});
|
|
325
418
|
}
|
|
419
|
+
visible.sort((a, b) => {
|
|
420
|
+
const rowDiff = Math.floor(a.rect.top / ROW_TOLERANCE_PX) - Math.floor(b.rect.top / ROW_TOLERANCE_PX);
|
|
421
|
+
if (rowDiff !== 0) return rowDiff;
|
|
422
|
+
return a.rect.left - b.rect.left;
|
|
423
|
+
});
|
|
424
|
+
return visible;
|
|
425
|
+
}
|
|
426
|
+
/**
|
|
427
|
+
* Create marker map from visible interactive elements.
|
|
428
|
+
* Assigns sequential IDs starting from 1.
|
|
429
|
+
*/
|
|
430
|
+
function createMarkerMap() {
|
|
431
|
+
const elements = collectVisibleInteractiveElements();
|
|
432
|
+
const map = /* @__PURE__ */ new Map();
|
|
433
|
+
elements.forEach(({ element, rect }, index) => {
|
|
434
|
+
const id = index + 1;
|
|
435
|
+
map.set(id, {
|
|
436
|
+
id,
|
|
437
|
+
element,
|
|
438
|
+
rect,
|
|
439
|
+
description: describeElement(element)
|
|
440
|
+
});
|
|
441
|
+
});
|
|
442
|
+
return map;
|
|
443
|
+
}
|
|
444
|
+
/**
|
|
445
|
+
* Get the center point of an element in viewport coordinates.
|
|
446
|
+
*/
|
|
447
|
+
function getElementCenter(element) {
|
|
448
|
+
const rect = element.getBoundingClientRect();
|
|
449
|
+
return {
|
|
450
|
+
x: Math.round(rect.left + rect.width / 2),
|
|
451
|
+
y: Math.round(rect.top + rect.height / 2)
|
|
452
|
+
};
|
|
453
|
+
}
|
|
454
|
+
/**
|
|
455
|
+
* Resolve a marker ID to viewport coordinates.
|
|
456
|
+
* Returns null if marker not found or element no longer visible.
|
|
457
|
+
*/
|
|
458
|
+
function resolveMarkerToCoordinates(markerMap, markerId) {
|
|
459
|
+
const marker = markerMap.get(markerId);
|
|
460
|
+
if (!marker) return null;
|
|
461
|
+
if (!document.contains(marker.element)) return null;
|
|
462
|
+
if (!isElementVisible(marker.element)) return null;
|
|
463
|
+
return getElementCenter(marker.element);
|
|
464
|
+
}
|
|
465
|
+
//#endregion
|
|
466
|
+
//#region src/core/utils/annotations.ts
|
|
467
|
+
const DEFAULT_STYLE = {
|
|
468
|
+
borderColor: "rgba(255, 0, 0, 0.8)",
|
|
469
|
+
labelBackground: "rgba(255, 0, 0, 0.9)",
|
|
470
|
+
labelColor: "#ffffff",
|
|
471
|
+
borderWidth: 2,
|
|
472
|
+
fontSize: 11,
|
|
473
|
+
labelPadding: 4
|
|
326
474
|
};
|
|
475
|
+
/**
|
|
476
|
+
* Draw annotation markers onto a canvas.
|
|
477
|
+
* Modifies the canvas in place.
|
|
478
|
+
*
|
|
479
|
+
* @param ctx Canvas 2D context to draw on
|
|
480
|
+
* @param markers Marker map from element discovery
|
|
481
|
+
* @param style Optional style overrides
|
|
482
|
+
*/
|
|
483
|
+
function drawAnnotations(ctx, markers, style = {}) {
|
|
484
|
+
const s = {
|
|
485
|
+
...DEFAULT_STYLE,
|
|
486
|
+
...style
|
|
487
|
+
};
|
|
488
|
+
ctx.save();
|
|
489
|
+
for (const marker of markers.values()) {
|
|
490
|
+
const { rect, id } = marker;
|
|
491
|
+
ctx.strokeStyle = s.borderColor;
|
|
492
|
+
ctx.lineWidth = s.borderWidth;
|
|
493
|
+
ctx.strokeRect(rect.left, rect.top, rect.width, rect.height);
|
|
494
|
+
const label = String(id);
|
|
495
|
+
ctx.font = `bold ${s.fontSize}px monospace`;
|
|
496
|
+
const textWidth = ctx.measureText(label).width;
|
|
497
|
+
const textHeight = s.fontSize;
|
|
498
|
+
const labelWidth = textWidth + s.labelPadding * 2;
|
|
499
|
+
const labelHeight = textHeight + s.labelPadding;
|
|
500
|
+
const labelX = rect.left - s.borderWidth;
|
|
501
|
+
const labelY = rect.top < labelHeight + 4 ? rect.top + 2 : rect.top - labelHeight;
|
|
502
|
+
ctx.fillStyle = s.labelBackground;
|
|
503
|
+
ctx.beginPath();
|
|
504
|
+
ctx.roundRect(labelX, labelY, labelWidth, labelHeight, 2);
|
|
505
|
+
ctx.fill();
|
|
506
|
+
ctx.fillStyle = s.labelColor;
|
|
507
|
+
ctx.textBaseline = "top";
|
|
508
|
+
ctx.fillText(label, labelX + s.labelPadding, labelY + s.labelPadding / 2);
|
|
509
|
+
}
|
|
510
|
+
ctx.restore();
|
|
511
|
+
}
|
|
512
|
+
/**
|
|
513
|
+
* Create an annotated copy of a canvas.
|
|
514
|
+
* Does not modify the original canvas.
|
|
515
|
+
*
|
|
516
|
+
* @param sourceCanvas Original screenshot canvas
|
|
517
|
+
* @param markers Marker map from element discovery
|
|
518
|
+
* @returns New canvas with annotations drawn
|
|
519
|
+
*/
|
|
520
|
+
function createAnnotatedCanvas(sourceCanvas, markers) {
|
|
521
|
+
const canvas = document.createElement("canvas");
|
|
522
|
+
canvas.width = sourceCanvas.width;
|
|
523
|
+
canvas.height = sourceCanvas.height;
|
|
524
|
+
const ctx = canvas.getContext("2d");
|
|
525
|
+
if (!ctx) throw new Error("Failed to get canvas 2D context");
|
|
526
|
+
ctx.drawImage(sourceCanvas, 0, 0);
|
|
527
|
+
drawAnnotations(ctx, markers);
|
|
528
|
+
return canvas;
|
|
529
|
+
}
|
|
530
|
+
/**
|
|
531
|
+
* Generate marker context string for AI prompt.
|
|
532
|
+
* Lists available markers with their descriptions.
|
|
533
|
+
*
|
|
534
|
+
* @param markers Marker map from element discovery
|
|
535
|
+
* @returns Formatted string listing markers
|
|
536
|
+
*/
|
|
537
|
+
function generateMarkerContext(markers) {
|
|
538
|
+
if (markers.size === 0) return "No interactive elements detected.";
|
|
539
|
+
const lines = ["Interactive elements (use marker number to point):"];
|
|
540
|
+
for (const marker of markers.values()) lines.push(` ${marker.id}: ${marker.description}`);
|
|
541
|
+
return lines.join("\n");
|
|
542
|
+
}
|
|
327
543
|
//#endregion
|
|
328
544
|
//#region src/core/utils/screenshot.ts
|
|
329
|
-
const
|
|
545
|
+
const CLONE_RESOURCE_TIMEOUT_MS = 3e3;
|
|
330
546
|
function getCaptureMetrics() {
|
|
331
547
|
return {
|
|
332
548
|
viewportWidth: window.innerWidth,
|
|
333
549
|
viewportHeight: window.innerHeight
|
|
334
550
|
};
|
|
335
551
|
}
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
return
|
|
552
|
+
function waitForNextPaint(doc) {
|
|
553
|
+
const view = doc.defaultView;
|
|
554
|
+
if (!view?.requestAnimationFrame) return Promise.resolve();
|
|
555
|
+
return new Promise((resolve) => {
|
|
556
|
+
view.requestAnimationFrame(() => {
|
|
557
|
+
view.requestAnimationFrame(() => resolve());
|
|
558
|
+
});
|
|
559
|
+
});
|
|
560
|
+
}
|
|
561
|
+
function isStylesheetReady(link) {
|
|
562
|
+
const sheet = link.sheet;
|
|
563
|
+
if (!sheet) return false;
|
|
564
|
+
try {
|
|
565
|
+
sheet.cssRules;
|
|
566
|
+
return true;
|
|
567
|
+
} catch (error) {
|
|
568
|
+
return error instanceof DOMException && error.name === "SecurityError";
|
|
569
|
+
}
|
|
570
|
+
}
|
|
571
|
+
function waitForStylesheetLink(link) {
|
|
572
|
+
if (isStylesheetReady(link)) return Promise.resolve();
|
|
573
|
+
return new Promise((resolve) => {
|
|
574
|
+
let settled = false;
|
|
575
|
+
let timeoutId = 0;
|
|
576
|
+
const finish = () => {
|
|
577
|
+
if (settled) return;
|
|
578
|
+
settled = true;
|
|
579
|
+
window.clearTimeout(timeoutId);
|
|
580
|
+
link.removeEventListener("load", handleReady);
|
|
581
|
+
link.removeEventListener("error", handleReady);
|
|
582
|
+
resolve();
|
|
583
|
+
};
|
|
584
|
+
const handleReady = () => {
|
|
585
|
+
if (isStylesheetReady(link)) {
|
|
586
|
+
finish();
|
|
587
|
+
return;
|
|
588
|
+
}
|
|
589
|
+
window.requestAnimationFrame(() => {
|
|
590
|
+
if (isStylesheetReady(link)) finish();
|
|
591
|
+
});
|
|
592
|
+
};
|
|
593
|
+
timeoutId = window.setTimeout(finish, CLONE_RESOURCE_TIMEOUT_MS);
|
|
594
|
+
link.addEventListener("load", handleReady, { once: true });
|
|
595
|
+
link.addEventListener("error", finish, { once: true });
|
|
596
|
+
handleReady();
|
|
597
|
+
});
|
|
598
|
+
}
|
|
599
|
+
async function waitForClonedDocumentStyles(doc) {
|
|
600
|
+
const stylesheetLinks = Array.from(doc.querySelectorAll("link[rel=\"stylesheet\"][href]"));
|
|
601
|
+
await Promise.all(stylesheetLinks.map(waitForStylesheetLink));
|
|
602
|
+
if (doc.fonts?.ready) await doc.fonts.ready;
|
|
603
|
+
await waitForNextPaint(doc);
|
|
604
|
+
}
|
|
605
|
+
function getHtml2CanvasOptions(captureMetrics) {
|
|
606
|
+
return {
|
|
607
|
+
scale: 1,
|
|
608
|
+
useCORS: true,
|
|
609
|
+
logging: false,
|
|
610
|
+
width: captureMetrics.viewportWidth,
|
|
611
|
+
height: captureMetrics.viewportHeight,
|
|
612
|
+
windowWidth: captureMetrics.viewportWidth,
|
|
613
|
+
windowHeight: captureMetrics.viewportHeight,
|
|
614
|
+
x: window.scrollX,
|
|
615
|
+
y: window.scrollY,
|
|
616
|
+
scrollX: window.scrollX,
|
|
617
|
+
scrollY: window.scrollY,
|
|
618
|
+
onclone: async (doc) => {
|
|
619
|
+
await waitForClonedDocumentStyles(doc);
|
|
620
|
+
}
|
|
621
|
+
};
|
|
348
622
|
}
|
|
349
623
|
/**
|
|
350
624
|
* Create a fallback canvas when screenshot capture fails.
|
|
@@ -352,8 +626,8 @@ function resizeCanvas(canvas, maxWidth) {
|
|
|
352
626
|
*/
|
|
353
627
|
function createFallbackCanvas() {
|
|
354
628
|
const canvas = document.createElement("canvas");
|
|
355
|
-
canvas.width =
|
|
356
|
-
canvas.height =
|
|
629
|
+
canvas.width = window.innerWidth;
|
|
630
|
+
canvas.height = window.innerHeight;
|
|
357
631
|
const ctx = canvas.getContext("2d");
|
|
358
632
|
if (ctx) {
|
|
359
633
|
ctx.fillStyle = "#f0f0f0";
|
|
@@ -374,27 +648,44 @@ async function captureViewport() {
|
|
|
374
648
|
const captureMetrics = getCaptureMetrics();
|
|
375
649
|
let canvas;
|
|
376
650
|
try {
|
|
377
|
-
canvas = await html2canvas(document.body,
|
|
378
|
-
scale: 1,
|
|
379
|
-
useCORS: true,
|
|
380
|
-
logging: false,
|
|
381
|
-
width: captureMetrics.viewportWidth,
|
|
382
|
-
height: captureMetrics.viewportHeight,
|
|
383
|
-
x: window.scrollX,
|
|
384
|
-
y: window.scrollY
|
|
385
|
-
});
|
|
651
|
+
canvas = await html2canvas(document.body, getHtml2CanvasOptions(captureMetrics));
|
|
386
652
|
} catch {
|
|
387
653
|
canvas = createFallbackCanvas();
|
|
388
654
|
}
|
|
389
|
-
const resized = resizeCanvas(canvas, MAX_WIDTH);
|
|
390
655
|
return {
|
|
391
|
-
imageData:
|
|
392
|
-
width:
|
|
393
|
-
height:
|
|
656
|
+
imageData: canvas.toDataURL("image/png"),
|
|
657
|
+
width: canvas.width,
|
|
658
|
+
height: canvas.height,
|
|
394
659
|
viewportWidth: captureMetrics.viewportWidth,
|
|
395
660
|
viewportHeight: captureMetrics.viewportHeight
|
|
396
661
|
};
|
|
397
662
|
}
|
|
663
|
+
/**
|
|
664
|
+
* Capture an annotated screenshot of the current viewport.
|
|
665
|
+
* Interactive elements are marked with numbered labels.
|
|
666
|
+
* Returns both the annotated image and a marker map for resolving IDs.
|
|
667
|
+
*/
|
|
668
|
+
async function captureAnnotatedViewport() {
|
|
669
|
+
const captureMetrics = getCaptureMetrics();
|
|
670
|
+
const markerMap = createMarkerMap();
|
|
671
|
+
let sourceCanvas;
|
|
672
|
+
try {
|
|
673
|
+
sourceCanvas = await html2canvas(document.body, getHtml2CanvasOptions(captureMetrics));
|
|
674
|
+
} catch {
|
|
675
|
+
sourceCanvas = createFallbackCanvas();
|
|
676
|
+
}
|
|
677
|
+
const canvas = markerMap.size > 0 ? createAnnotatedCanvas(sourceCanvas, markerMap) : sourceCanvas;
|
|
678
|
+
const markerContext = generateMarkerContext(markerMap);
|
|
679
|
+
return {
|
|
680
|
+
imageData: canvas.toDataURL("image/png"),
|
|
681
|
+
width: canvas.width,
|
|
682
|
+
height: canvas.height,
|
|
683
|
+
viewportWidth: captureMetrics.viewportWidth,
|
|
684
|
+
viewportHeight: captureMetrics.viewportHeight,
|
|
685
|
+
markerMap,
|
|
686
|
+
markerContext
|
|
687
|
+
};
|
|
688
|
+
}
|
|
398
689
|
//#endregion
|
|
399
690
|
//#region src/core/services/screen-capture.ts
|
|
400
691
|
/**
|
|
@@ -408,233 +699,418 @@ var ScreenCaptureService = class {
|
|
|
408
699
|
async capture() {
|
|
409
700
|
return captureViewport();
|
|
410
701
|
}
|
|
702
|
+
/**
|
|
703
|
+
* Capture an annotated screenshot with marker overlays.
|
|
704
|
+
* Interactive elements are marked with numbered labels.
|
|
705
|
+
* @returns Annotated screenshot result with marker map
|
|
706
|
+
*/
|
|
707
|
+
async captureAnnotated() {
|
|
708
|
+
return captureAnnotatedViewport();
|
|
709
|
+
}
|
|
411
710
|
};
|
|
412
711
|
//#endregion
|
|
413
|
-
//#region src/core/
|
|
712
|
+
//#region src/core/utils/audio.ts
|
|
414
713
|
/**
|
|
415
|
-
*
|
|
416
|
-
*
|
|
714
|
+
* Audio conversion utilities for voice capture.
|
|
715
|
+
* Converts Float32 audio data to WAV format for server transcription.
|
|
417
716
|
*/
|
|
418
|
-
const $audioLevel = atom(0);
|
|
419
|
-
const $cursorPosition = atom({
|
|
420
|
-
x: 0,
|
|
421
|
-
y: 0
|
|
422
|
-
});
|
|
423
|
-
const $buddyPosition = atom({
|
|
424
|
-
x: 0,
|
|
425
|
-
y: 0
|
|
426
|
-
});
|
|
427
|
-
const $buddyRotation = atom(0);
|
|
428
|
-
const $buddyScale = atom(1);
|
|
429
|
-
const $pointingTarget = atom(null);
|
|
430
|
-
const $isEnabled = atom(true);
|
|
431
|
-
atom(false);
|
|
432
|
-
const $conversationHistory = atom([]);
|
|
433
|
-
//#endregion
|
|
434
|
-
//#region src/core/bezier.ts
|
|
435
717
|
/**
|
|
436
|
-
*
|
|
718
|
+
* Merge multiple Float32Array chunks into a single array
|
|
437
719
|
*/
|
|
720
|
+
function mergeAudioChunks(chunks) {
|
|
721
|
+
const totalLength = chunks.reduce((acc, chunk) => acc + chunk.length, 0);
|
|
722
|
+
const result = new Float32Array(totalLength);
|
|
723
|
+
let offset = 0;
|
|
724
|
+
for (const chunk of chunks) {
|
|
725
|
+
result.set(chunk, offset);
|
|
726
|
+
offset += chunk.length;
|
|
727
|
+
}
|
|
728
|
+
return result;
|
|
729
|
+
}
|
|
438
730
|
/**
|
|
439
|
-
*
|
|
731
|
+
* Convert Float32 audio data to 16-bit PCM
|
|
440
732
|
*/
|
|
441
|
-
function
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
};
|
|
733
|
+
function floatTo16BitPCM(output, offset, input) {
|
|
734
|
+
for (let i = 0; i < input.length; i++, offset += 2) {
|
|
735
|
+
const sample = Math.max(-1, Math.min(1, input[i]));
|
|
736
|
+
output.setInt16(offset, sample < 0 ? sample * 32768 : sample * 32767, true);
|
|
737
|
+
}
|
|
447
738
|
}
|
|
448
739
|
/**
|
|
449
|
-
*
|
|
740
|
+
* Write a string to a DataView
|
|
450
741
|
*/
|
|
451
|
-
function
|
|
452
|
-
|
|
453
|
-
return {
|
|
454
|
-
x: 2 * oneMinusT * (p1.x - p0.x) + 2 * t * (p2.x - p1.x),
|
|
455
|
-
y: 2 * oneMinusT * (p1.y - p0.y) + 2 * t * (p2.y - p1.y)
|
|
456
|
-
};
|
|
742
|
+
function writeString(view, offset, string) {
|
|
743
|
+
for (let i = 0; i < string.length; i++) view.setUint8(offset + i, string.charCodeAt(i));
|
|
457
744
|
}
|
|
458
745
|
/**
|
|
459
|
-
*
|
|
746
|
+
* Encode Float32 audio data as a WAV file
|
|
460
747
|
*/
|
|
461
|
-
function
|
|
462
|
-
|
|
748
|
+
function encodeWAV(samples, sampleRate) {
|
|
749
|
+
const numChannels = 1;
|
|
750
|
+
const bitsPerSample = 16;
|
|
751
|
+
const bytesPerSample = bitsPerSample / 8;
|
|
752
|
+
const blockAlign = numChannels * bytesPerSample;
|
|
753
|
+
const dataLength = samples.length * bytesPerSample;
|
|
754
|
+
const buffer = new ArrayBuffer(44 + dataLength);
|
|
755
|
+
const view = new DataView(buffer);
|
|
756
|
+
writeString(view, 0, "RIFF");
|
|
757
|
+
view.setUint32(4, 36 + dataLength, true);
|
|
758
|
+
writeString(view, 8, "WAVE");
|
|
759
|
+
writeString(view, 12, "fmt ");
|
|
760
|
+
view.setUint32(16, 16, true);
|
|
761
|
+
view.setUint16(20, 1, true);
|
|
762
|
+
view.setUint16(22, numChannels, true);
|
|
763
|
+
view.setUint32(24, sampleRate, true);
|
|
764
|
+
view.setUint32(28, sampleRate * blockAlign, true);
|
|
765
|
+
view.setUint16(32, blockAlign, true);
|
|
766
|
+
view.setUint16(34, bitsPerSample, true);
|
|
767
|
+
writeString(view, 36, "data");
|
|
768
|
+
view.setUint32(40, dataLength, true);
|
|
769
|
+
floatTo16BitPCM(view, 44, samples);
|
|
770
|
+
return new Blob([buffer], { type: "audio/wav" });
|
|
463
771
|
}
|
|
772
|
+
//#endregion
|
|
773
|
+
//#region src/core/utils/audio-worklet.ts
|
|
464
774
|
/**
|
|
465
|
-
*
|
|
466
|
-
*
|
|
467
|
-
*
|
|
468
|
-
* @param from - Starting position
|
|
469
|
-
* @param to - Target position
|
|
470
|
-
* @param durationMs - Flight duration in milliseconds
|
|
471
|
-
* @param callbacks - Frame and completion callbacks
|
|
472
|
-
* @returns Cancel function to stop the animation
|
|
775
|
+
* AudioWorklet processor code for voice capture.
|
|
776
|
+
* Inlined as a blob URL to avoid separate file serving requirements.
|
|
473
777
|
*/
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
778
|
+
const workletCode = `
|
|
779
|
+
class AudioCaptureProcessor extends AudioWorkletProcessor {
|
|
780
|
+
constructor() {
|
|
781
|
+
super()
|
|
782
|
+
this.isRecording = true
|
|
783
|
+
this.audioChunkSize = 2048
|
|
784
|
+
this.audioBuffer = new Float32Array(this.audioChunkSize)
|
|
785
|
+
this.audioBufferIndex = 0
|
|
786
|
+
this.levelFramesPerUpdate = 4
|
|
787
|
+
this.levelFrameCount = 0
|
|
788
|
+
this.levelRmsSum = 0
|
|
789
|
+
this.levelPeak = 0
|
|
790
|
+
|
|
791
|
+
this.port.onmessage = (event) => {
|
|
792
|
+
if (event.data?.type === "flush") {
|
|
793
|
+
this.flushAudio()
|
|
794
|
+
this.flushLevel()
|
|
795
|
+
this.port.postMessage({ type: "flush-complete" })
|
|
796
|
+
}
|
|
797
|
+
}
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
flushAudio() {
|
|
801
|
+
if (this.audioBufferIndex === 0) return
|
|
802
|
+
|
|
803
|
+
const chunk = this.audioBuffer.slice(0, this.audioBufferIndex)
|
|
804
|
+
this.port.postMessage({
|
|
805
|
+
type: "audio",
|
|
806
|
+
data: chunk
|
|
807
|
+
})
|
|
808
|
+
this.audioBufferIndex = 0
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
flushLevel() {
|
|
812
|
+
if (this.levelFrameCount === 0) return
|
|
813
|
+
|
|
814
|
+
this.port.postMessage({
|
|
815
|
+
type: "level",
|
|
816
|
+
rms: this.levelRmsSum / this.levelFrameCount,
|
|
817
|
+
peak: this.levelPeak
|
|
818
|
+
})
|
|
819
|
+
|
|
820
|
+
this.levelFrameCount = 0
|
|
821
|
+
this.levelRmsSum = 0
|
|
822
|
+
this.levelPeak = 0
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
process(inputs) {
|
|
826
|
+
if (!this.isRecording) return false
|
|
827
|
+
|
|
828
|
+
const input = inputs[0]
|
|
829
|
+
if (input && input.length > 0) {
|
|
830
|
+
const channelData = input[0]
|
|
831
|
+
let sum = 0
|
|
832
|
+
let peak = 0
|
|
833
|
+
for (let i = 0; i < channelData.length; i++) {
|
|
834
|
+
const sample = channelData[i]
|
|
835
|
+
sum += sample * sample
|
|
836
|
+
const absolute = Math.abs(sample)
|
|
837
|
+
if (absolute > peak) peak = absolute
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
this.levelRmsSum += Math.sqrt(sum / channelData.length)
|
|
841
|
+
this.levelPeak = Math.max(this.levelPeak, peak)
|
|
842
|
+
this.levelFrameCount += 1
|
|
843
|
+
|
|
844
|
+
if (this.levelFrameCount >= this.levelFramesPerUpdate) {
|
|
845
|
+
this.flushLevel()
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
let readOffset = 0
|
|
849
|
+
while (readOffset < channelData.length) {
|
|
850
|
+
const remaining = this.audioBuffer.length - this.audioBufferIndex
|
|
851
|
+
const copyLength = Math.min(remaining, channelData.length - readOffset)
|
|
852
|
+
|
|
853
|
+
this.audioBuffer.set(
|
|
854
|
+
channelData.subarray(readOffset, readOffset + copyLength),
|
|
855
|
+
this.audioBufferIndex
|
|
856
|
+
)
|
|
857
|
+
|
|
858
|
+
this.audioBufferIndex += copyLength
|
|
859
|
+
readOffset += copyLength
|
|
860
|
+
|
|
861
|
+
if (this.audioBufferIndex >= this.audioBuffer.length) {
|
|
862
|
+
this.flushAudio()
|
|
863
|
+
}
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
return true
|
|
868
|
+
}
|
|
869
|
+
}
|
|
870
|
+
|
|
871
|
+
registerProcessor("audio-capture-processor", AudioCaptureProcessor)
|
|
872
|
+
`;
|
|
873
|
+
let cachedBlobURL = null;
|
|
874
|
+
/**
|
|
875
|
+
* Create a blob URL for the audio worklet processor.
|
|
876
|
+
* Caches the URL to avoid creating multiple blobs.
|
|
877
|
+
*/
|
|
878
|
+
function createWorkletBlobURL() {
|
|
879
|
+
if (!cachedBlobURL) {
|
|
880
|
+
const blob = new Blob([workletCode], { type: "application/javascript" });
|
|
881
|
+
cachedBlobURL = URL.createObjectURL(blob);
|
|
493
882
|
}
|
|
494
|
-
|
|
495
|
-
return () => cancelAnimationFrame(animationFrameId);
|
|
883
|
+
return cachedBlobURL;
|
|
496
884
|
}
|
|
497
885
|
//#endregion
|
|
498
|
-
//#region src/core/services/
|
|
499
|
-
const
|
|
886
|
+
//#region src/core/services/voice-capture.ts
|
|
887
|
+
const SAMPLE_RATE = 16e3;
|
|
888
|
+
const AUDIO_LEVEL_NOISE_GATE = 5e-4;
|
|
889
|
+
const AUDIO_LEVEL_INPUT_GAIN = 600;
|
|
890
|
+
const AUDIO_LEVEL_ATTACK = .7;
|
|
891
|
+
const AUDIO_LEVEL_RELEASE = .25;
|
|
892
|
+
function clamp$1(value, min, max) {
|
|
893
|
+
return Math.min(Math.max(value, min), max);
|
|
894
|
+
}
|
|
895
|
+
function normalizeAudioLevel(rms) {
|
|
896
|
+
const gatedRms = Math.max(0, rms - AUDIO_LEVEL_NOISE_GATE);
|
|
897
|
+
return clamp$1(Math.log1p(gatedRms * AUDIO_LEVEL_INPUT_GAIN) / Math.log1p(AUDIO_LEVEL_INPUT_GAIN), 0, 1);
|
|
898
|
+
}
|
|
899
|
+
function smoothAudioLevel(current, target) {
|
|
900
|
+
const smoothing = target > current ? AUDIO_LEVEL_ATTACK : AUDIO_LEVEL_RELEASE;
|
|
901
|
+
return current + (target - current) * smoothing;
|
|
902
|
+
}
|
|
500
903
|
/**
|
|
501
|
-
*
|
|
502
|
-
* Manages the pointer state machine (follow -> flying -> anchored -> follow)
|
|
503
|
-
* and cursor animation.
|
|
904
|
+
* Framework-agnostic service for voice capture using AudioWorkletNode.
|
|
504
905
|
*/
|
|
505
|
-
var
|
|
506
|
-
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
this.mode = "flying";
|
|
516
|
-
$pointingTarget.set(target);
|
|
517
|
-
const startPos = $buddyPosition.get();
|
|
518
|
-
const endPos = {
|
|
519
|
-
x: target.x,
|
|
520
|
-
y: target.y
|
|
521
|
-
};
|
|
522
|
-
this.cancelAnimation = animateBezierFlight(startPos, endPos, 800, {
|
|
523
|
-
onFrame: (position, rotation, scale) => {
|
|
524
|
-
$buddyPosition.set(position);
|
|
525
|
-
$buddyRotation.set(rotation);
|
|
526
|
-
$buddyScale.set(scale);
|
|
527
|
-
},
|
|
528
|
-
onComplete: () => {
|
|
529
|
-
this.cancelAnimation = null;
|
|
530
|
-
this.mode = "anchored";
|
|
531
|
-
$buddyPosition.set(endPos);
|
|
532
|
-
$buddyRotation.set(0);
|
|
533
|
-
$buddyScale.set(1);
|
|
534
|
-
this.scheduleRelease();
|
|
535
|
-
this.notify();
|
|
536
|
-
}
|
|
537
|
-
});
|
|
538
|
-
this.notify();
|
|
539
|
-
}
|
|
540
|
-
/**
|
|
541
|
-
* Release the cursor from pointing mode back to follow mode.
|
|
542
|
-
*/
|
|
543
|
-
release() {
|
|
544
|
-
if (this.cancelAnimation) {
|
|
545
|
-
this.cancelAnimation();
|
|
546
|
-
this.cancelAnimation = null;
|
|
547
|
-
}
|
|
548
|
-
if (this.releaseTimeout) {
|
|
549
|
-
clearTimeout(this.releaseTimeout);
|
|
550
|
-
this.releaseTimeout = null;
|
|
551
|
-
}
|
|
552
|
-
this.mode = "follow";
|
|
553
|
-
$pointingTarget.set(null);
|
|
554
|
-
$buddyPosition.set($cursorPosition.get());
|
|
555
|
-
$buddyRotation.set(0);
|
|
556
|
-
$buddyScale.set(1);
|
|
557
|
-
this.notify();
|
|
558
|
-
}
|
|
906
|
+
var VoiceCaptureService = class {
|
|
907
|
+
audioContext = null;
|
|
908
|
+
workletNode = null;
|
|
909
|
+
sourceNode = null;
|
|
910
|
+
silentGainNode = null;
|
|
911
|
+
stream = null;
|
|
912
|
+
chunks = [];
|
|
913
|
+
levelCallback = null;
|
|
914
|
+
visualLevel = 0;
|
|
915
|
+
flushResolve = null;
|
|
559
916
|
/**
|
|
560
|
-
*
|
|
917
|
+
* Register a callback to receive audio level updates (0-1).
|
|
918
|
+
* Called at ~60fps during recording for waveform visualization.
|
|
561
919
|
*/
|
|
562
|
-
|
|
563
|
-
|
|
920
|
+
onLevel(callback) {
|
|
921
|
+
this.levelCallback = callback;
|
|
564
922
|
}
|
|
565
923
|
/**
|
|
566
|
-
*
|
|
924
|
+
* Start recording audio from the microphone.
|
|
925
|
+
* @throws Error if microphone access is denied
|
|
567
926
|
*/
|
|
568
|
-
|
|
569
|
-
|
|
927
|
+
async start() {
|
|
928
|
+
this.chunks = [];
|
|
929
|
+
this.visualLevel = 0;
|
|
930
|
+
const stream = await navigator.mediaDevices.getUserMedia({ audio: {
|
|
931
|
+
sampleRate: SAMPLE_RATE,
|
|
932
|
+
channelCount: 1,
|
|
933
|
+
echoCancellation: true,
|
|
934
|
+
noiseSuppression: true
|
|
935
|
+
} });
|
|
936
|
+
this.stream = stream;
|
|
937
|
+
const audioContext = new AudioContext({ sampleRate: SAMPLE_RATE });
|
|
938
|
+
this.audioContext = audioContext;
|
|
939
|
+
await audioContext.resume();
|
|
940
|
+
const workletURL = createWorkletBlobURL();
|
|
941
|
+
await audioContext.audioWorklet.addModule(workletURL);
|
|
942
|
+
const source = audioContext.createMediaStreamSource(stream);
|
|
943
|
+
this.sourceNode = source;
|
|
944
|
+
const workletNode = new AudioWorkletNode(audioContext, "audio-capture-processor");
|
|
945
|
+
this.workletNode = workletNode;
|
|
946
|
+
const silentGainNode = audioContext.createGain();
|
|
947
|
+
silentGainNode.gain.value = 0;
|
|
948
|
+
this.silentGainNode = silentGainNode;
|
|
949
|
+
workletNode.port.onmessage = (event) => {
|
|
950
|
+
const { type, data, rms, peak } = event.data;
|
|
951
|
+
if (type === "audio") this.chunks.push(data);
|
|
952
|
+
else if (type === "level" && this.levelCallback) {
|
|
953
|
+
const targetLevel = normalizeAudioLevel(Math.max(rms ?? 0, (peak ?? 0) * .6));
|
|
954
|
+
this.visualLevel = smoothAudioLevel(this.visualLevel, targetLevel);
|
|
955
|
+
this.levelCallback(this.visualLevel);
|
|
956
|
+
} else if (type === "flush-complete") {
|
|
957
|
+
this.flushResolve?.();
|
|
958
|
+
this.flushResolve = null;
|
|
959
|
+
}
|
|
960
|
+
};
|
|
961
|
+
source.connect(workletNode);
|
|
962
|
+
workletNode.connect(silentGainNode);
|
|
963
|
+
silentGainNode.connect(audioContext.destination);
|
|
570
964
|
}
|
|
571
965
|
/**
|
|
572
|
-
*
|
|
966
|
+
* Stop recording and return the captured audio as a WAV blob.
|
|
573
967
|
*/
|
|
574
|
-
|
|
575
|
-
this.
|
|
576
|
-
|
|
968
|
+
async stop() {
|
|
969
|
+
await this.flushPendingAudio();
|
|
970
|
+
if (this.stream) {
|
|
971
|
+
this.stream.getTracks().forEach((track) => track.stop());
|
|
972
|
+
this.stream = null;
|
|
973
|
+
}
|
|
974
|
+
if (this.sourceNode) {
|
|
975
|
+
this.sourceNode.disconnect();
|
|
976
|
+
this.sourceNode = null;
|
|
977
|
+
}
|
|
978
|
+
if (this.workletNode) {
|
|
979
|
+
this.workletNode.disconnect();
|
|
980
|
+
this.workletNode = null;
|
|
981
|
+
}
|
|
982
|
+
if (this.silentGainNode) {
|
|
983
|
+
this.silentGainNode.disconnect();
|
|
984
|
+
this.silentGainNode = null;
|
|
985
|
+
}
|
|
986
|
+
if (this.audioContext) {
|
|
987
|
+
await this.audioContext.close();
|
|
988
|
+
this.audioContext = null;
|
|
989
|
+
}
|
|
990
|
+
this.visualLevel = 0;
|
|
991
|
+
this.levelCallback?.(0);
|
|
992
|
+
const wavBlob = encodeWAV(mergeAudioChunks(this.chunks), SAMPLE_RATE);
|
|
993
|
+
this.chunks = [];
|
|
994
|
+
return wavBlob;
|
|
577
995
|
}
|
|
578
996
|
/**
|
|
579
|
-
*
|
|
580
|
-
* Call this on cursor position changes.
|
|
997
|
+
* Clean up all resources.
|
|
581
998
|
*/
|
|
582
|
-
|
|
583
|
-
if (this.
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
$buddyScale.set(1);
|
|
999
|
+
dispose() {
|
|
1000
|
+
if (this.stream) {
|
|
1001
|
+
this.stream.getTracks().forEach((track) => track.stop());
|
|
1002
|
+
this.stream = null;
|
|
587
1003
|
}
|
|
1004
|
+
if (this.sourceNode) {
|
|
1005
|
+
this.sourceNode.disconnect();
|
|
1006
|
+
this.sourceNode = null;
|
|
1007
|
+
}
|
|
1008
|
+
if (this.workletNode) {
|
|
1009
|
+
this.workletNode.disconnect();
|
|
1010
|
+
this.workletNode = null;
|
|
1011
|
+
}
|
|
1012
|
+
if (this.silentGainNode) {
|
|
1013
|
+
this.silentGainNode.disconnect();
|
|
1014
|
+
this.silentGainNode = null;
|
|
1015
|
+
}
|
|
1016
|
+
if (this.audioContext) {
|
|
1017
|
+
this.audioContext.close();
|
|
1018
|
+
this.audioContext = null;
|
|
1019
|
+
}
|
|
1020
|
+
this.chunks = [];
|
|
1021
|
+
this.visualLevel = 0;
|
|
1022
|
+
this.flushResolve = null;
|
|
1023
|
+
this.levelCallback = null;
|
|
588
1024
|
}
|
|
589
|
-
|
|
590
|
-
this.
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
1025
|
+
async flushPendingAudio() {
|
|
1026
|
+
if (!this.workletNode) return;
|
|
1027
|
+
await new Promise((resolve) => {
|
|
1028
|
+
const timeoutId = setTimeout(() => {
|
|
1029
|
+
this.flushResolve = null;
|
|
1030
|
+
resolve();
|
|
1031
|
+
}, 50);
|
|
1032
|
+
this.flushResolve = () => {
|
|
1033
|
+
clearTimeout(timeoutId);
|
|
1034
|
+
resolve();
|
|
1035
|
+
};
|
|
1036
|
+
this.workletNode?.port.postMessage({ type: "flush" });
|
|
1037
|
+
});
|
|
597
1038
|
}
|
|
598
1039
|
};
|
|
599
1040
|
//#endregion
|
|
600
|
-
//#region src/core/
|
|
1041
|
+
//#region src/core/state-machine.ts
|
|
601
1042
|
/**
|
|
602
|
-
*
|
|
603
|
-
*
|
|
1043
|
+
* State transition table for the voice interaction flow.
|
|
1044
|
+
* Maps current state + event type to next state.
|
|
604
1045
|
*/
|
|
605
|
-
const
|
|
1046
|
+
const transitions = {
|
|
1047
|
+
idle: { HOTKEY_PRESSED: "listening" },
|
|
1048
|
+
listening: {
|
|
1049
|
+
HOTKEY_RELEASED: "processing",
|
|
1050
|
+
ERROR: "idle"
|
|
1051
|
+
},
|
|
1052
|
+
processing: {
|
|
1053
|
+
AI_RESPONSE_COMPLETE: "responding",
|
|
1054
|
+
HOTKEY_PRESSED: "listening",
|
|
1055
|
+
ERROR: "idle"
|
|
1056
|
+
},
|
|
1057
|
+
responding: {
|
|
1058
|
+
TTS_COMPLETE: "idle",
|
|
1059
|
+
HOTKEY_PRESSED: "listening",
|
|
1060
|
+
ERROR: "idle"
|
|
1061
|
+
}
|
|
1062
|
+
};
|
|
606
1063
|
/**
|
|
607
|
-
*
|
|
608
|
-
*
|
|
1064
|
+
* Create a simple typed state machine for the voice interaction flow.
|
|
1065
|
+
*
|
|
1066
|
+
* States: idle -> listening -> processing -> responding -> idle
|
|
1067
|
+
*
|
|
1068
|
+
* Supports interruption: pressing hotkey during processing or responding
|
|
1069
|
+
* immediately transitions back to listening.
|
|
609
1070
|
*/
|
|
610
|
-
function
|
|
611
|
-
|
|
612
|
-
|
|
1071
|
+
function createStateMachine(initial = "idle") {
|
|
1072
|
+
let state = initial;
|
|
1073
|
+
const listeners = /* @__PURE__ */ new Set();
|
|
1074
|
+
function notify() {
|
|
1075
|
+
listeners.forEach((listener) => listener());
|
|
1076
|
+
}
|
|
613
1077
|
return {
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
1078
|
+
getState: () => state,
|
|
1079
|
+
transition: (event) => {
|
|
1080
|
+
const nextState = transitions[state][event.type];
|
|
1081
|
+
if (!nextState) return false;
|
|
1082
|
+
state = nextState;
|
|
1083
|
+
notify();
|
|
1084
|
+
return true;
|
|
1085
|
+
},
|
|
1086
|
+
subscribe: (listener) => {
|
|
1087
|
+
listeners.add(listener);
|
|
1088
|
+
return () => listeners.delete(listener);
|
|
1089
|
+
},
|
|
1090
|
+
reset: () => {
|
|
1091
|
+
state = "idle";
|
|
1092
|
+
notify();
|
|
1093
|
+
}
|
|
617
1094
|
};
|
|
618
1095
|
}
|
|
619
|
-
/**
|
|
620
|
-
* Remove POINT tag from response text for display/TTS.
|
|
621
|
-
*/
|
|
622
|
-
function stripPointingTag(response) {
|
|
623
|
-
return response.replace(POINTING_TAG_REGEX, "").trim();
|
|
624
|
-
}
|
|
625
1096
|
//#endregion
|
|
626
1097
|
//#region src/core/client.ts
|
|
627
1098
|
function clamp(value, min, max) {
|
|
628
1099
|
return Math.min(Math.max(value, min), max);
|
|
629
1100
|
}
|
|
630
|
-
|
|
631
|
-
|
|
1101
|
+
/**
|
|
1102
|
+
* Map coordinate-based pointing from screenshot space to viewport space.
|
|
1103
|
+
*/
|
|
1104
|
+
function mapCoordinatesToViewport(x, y, screenshot) {
|
|
1105
|
+
if (screenshot.width <= 0 || screenshot.height <= 0) return {
|
|
1106
|
+
x,
|
|
1107
|
+
y
|
|
1108
|
+
};
|
|
632
1109
|
const scaleX = screenshot.viewportWidth / screenshot.width;
|
|
633
1110
|
const scaleY = screenshot.viewportHeight / screenshot.height;
|
|
634
1111
|
return {
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
y: clamp(Math.round(target.y * scaleY), 0, Math.max(screenshot.viewportHeight - 1, 0))
|
|
1112
|
+
x: clamp(Math.round(x * scaleX), 0, Math.max(screenshot.viewportWidth - 1, 0)),
|
|
1113
|
+
y: clamp(Math.round(y * scaleY), 0, Math.max(screenshot.viewportHeight - 1, 0))
|
|
638
1114
|
};
|
|
639
1115
|
}
|
|
640
1116
|
/**
|
|
@@ -658,6 +1134,7 @@ var CursorBuddyClient = class {
|
|
|
658
1134
|
response = "";
|
|
659
1135
|
error = null;
|
|
660
1136
|
abortController = null;
|
|
1137
|
+
historyCommittedForTurn = false;
|
|
661
1138
|
cachedSnapshot;
|
|
662
1139
|
listeners = /* @__PURE__ */ new Set();
|
|
663
1140
|
constructor(endpoint, options = {}, services = {}) {
|
|
@@ -685,6 +1162,7 @@ var CursorBuddyClient = class {
|
|
|
685
1162
|
this.transcript = "";
|
|
686
1163
|
this.response = "";
|
|
687
1164
|
this.error = null;
|
|
1165
|
+
this.historyCommittedForTurn = false;
|
|
688
1166
|
this.pointerController.release();
|
|
689
1167
|
this.stateMachine.transition({ type: "HOTKEY_PRESSED" });
|
|
690
1168
|
this.notify();
|
|
@@ -699,7 +1177,7 @@ var CursorBuddyClient = class {
|
|
|
699
1177
|
this.stateMachine.transition({ type: "HOTKEY_RELEASED" });
|
|
700
1178
|
const signal = this.abortController?.signal;
|
|
701
1179
|
try {
|
|
702
|
-
const [audioBlob, screenshot] = await Promise.all([this.voiceCapture.stop(), this.screenCapture.
|
|
1180
|
+
const [audioBlob, screenshot] = await Promise.all([this.voiceCapture.stop(), this.screenCapture.captureAnnotated()]);
|
|
703
1181
|
if (signal?.aborted) return;
|
|
704
1182
|
const transcript = await this.transcribe(audioBlob, signal);
|
|
705
1183
|
if (signal?.aborted) return;
|
|
@@ -708,7 +1186,7 @@ var CursorBuddyClient = class {
|
|
|
708
1186
|
this.notify();
|
|
709
1187
|
const response = await this.chat(transcript, screenshot, signal);
|
|
710
1188
|
if (signal?.aborted) return;
|
|
711
|
-
const
|
|
1189
|
+
const parsed = parsePointingTagRaw(response);
|
|
712
1190
|
const cleanResponse = stripPointingTag(response);
|
|
713
1191
|
this.response = cleanResponse;
|
|
714
1192
|
this.stateMachine.transition({
|
|
@@ -728,10 +1206,21 @@ var CursorBuddyClient = class {
|
|
|
728
1206
|
}
|
|
729
1207
|
];
|
|
730
1208
|
$conversationHistory.set(newHistory);
|
|
1209
|
+
this.historyCommittedForTurn = true;
|
|
1210
|
+
let pointTarget = null;
|
|
1211
|
+
if (parsed) if (parsed.type === "marker") {
|
|
1212
|
+
const coords = resolveMarkerToCoordinates(screenshot.markerMap, parsed.markerId);
|
|
1213
|
+
if (coords) pointTarget = {
|
|
1214
|
+
...coords,
|
|
1215
|
+
label: parsed.label
|
|
1216
|
+
};
|
|
1217
|
+
} else pointTarget = {
|
|
1218
|
+
...mapCoordinatesToViewport(parsed.x, parsed.y, screenshot),
|
|
1219
|
+
label: parsed.label
|
|
1220
|
+
};
|
|
731
1221
|
if (pointTarget) {
|
|
732
|
-
|
|
733
|
-
this.
|
|
734
|
-
this.pointerController.pointAt(mappedTarget);
|
|
1222
|
+
this.options.onPoint?.(pointTarget);
|
|
1223
|
+
this.pointerController.pointAt(pointTarget);
|
|
735
1224
|
}
|
|
736
1225
|
if (cleanResponse) await this.speak(cleanResponse, signal);
|
|
737
1226
|
if (signal?.aborted) return;
|
|
@@ -772,6 +1261,7 @@ var CursorBuddyClient = class {
|
|
|
772
1261
|
this.transcript = "";
|
|
773
1262
|
this.response = "";
|
|
774
1263
|
this.error = null;
|
|
1264
|
+
this.historyCommittedForTurn = false;
|
|
775
1265
|
this.pointerController.release();
|
|
776
1266
|
this.stateMachine.reset();
|
|
777
1267
|
this.notify();
|
|
@@ -811,11 +1301,34 @@ var CursorBuddyClient = class {
|
|
|
811
1301
|
};
|
|
812
1302
|
}
|
|
813
1303
|
abort() {
|
|
1304
|
+
this.commitPartialHistory();
|
|
814
1305
|
this.abortController?.abort();
|
|
815
1306
|
this.abortController = null;
|
|
816
1307
|
this.audioPlayback.stop();
|
|
817
1308
|
$audioLevel.set(0);
|
|
818
1309
|
}
|
|
1310
|
+
/**
|
|
1311
|
+
* Commit partial turn to history when interrupted.
|
|
1312
|
+
* Only commits if we have both transcript and response,
|
|
1313
|
+
* and haven't already committed for this turn.
|
|
1314
|
+
*/
|
|
1315
|
+
commitPartialHistory() {
|
|
1316
|
+
if (this.historyCommittedForTurn) return;
|
|
1317
|
+
if (!this.transcript || !this.response) return;
|
|
1318
|
+
const newHistory = [
|
|
1319
|
+
...$conversationHistory.get(),
|
|
1320
|
+
{
|
|
1321
|
+
role: "user",
|
|
1322
|
+
content: this.transcript
|
|
1323
|
+
},
|
|
1324
|
+
{
|
|
1325
|
+
role: "assistant",
|
|
1326
|
+
content: this.response
|
|
1327
|
+
}
|
|
1328
|
+
];
|
|
1329
|
+
$conversationHistory.set(newHistory);
|
|
1330
|
+
this.historyCommittedForTurn = true;
|
|
1331
|
+
}
|
|
819
1332
|
async transcribe(blob, signal) {
|
|
820
1333
|
const formData = new FormData();
|
|
821
1334
|
formData.append("audio", blob, "recording.wav");
|
|
@@ -840,7 +1353,8 @@ var CursorBuddyClient = class {
|
|
|
840
1353
|
height: screenshot.height
|
|
841
1354
|
},
|
|
842
1355
|
transcript,
|
|
843
|
-
history
|
|
1356
|
+
history,
|
|
1357
|
+
markerContext: screenshot.markerContext
|
|
844
1358
|
}),
|
|
845
1359
|
signal
|
|
846
1360
|
});
|
|
@@ -887,4 +1401,4 @@ var CursorBuddyClient = class {
|
|
|
887
1401
|
//#endregion
|
|
888
1402
|
export { $buddyScale as a, $buddyRotation as i, $audioLevel as n, $cursorPosition as o, $buddyPosition as r, $pointingTarget as s, CursorBuddyClient as t };
|
|
889
1403
|
|
|
890
|
-
//# sourceMappingURL=client-
|
|
1404
|
+
//# sourceMappingURL=client-DAa4L2fE.mjs.map
|