@absolutejs/voice 0.0.15 → 0.0.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/client/htmxBootstrap.d.ts +1 -0
- package/dist/client/htmxBootstrap.js +888 -0
- package/dist/index.js +100 -7
- package/dist/plugin.d.ts +12 -0
- package/dist/turnDetection.d.ts +3 -1
- package/dist/types.d.ts +3 -0
- package/package.json +2 -2
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const initVoiceHTMX: () => void;
|
|
@@ -0,0 +1,888 @@
|
|
|
1
|
+
// src/client/actions.ts
|
|
2
|
+
var normalizeErrorMessage = (value) => {
|
|
3
|
+
if (typeof value === "string" && value.trim()) {
|
|
4
|
+
return value;
|
|
5
|
+
}
|
|
6
|
+
if (value instanceof Error && value.message.trim()) {
|
|
7
|
+
return value.message;
|
|
8
|
+
}
|
|
9
|
+
if (value && typeof value === "object") {
|
|
10
|
+
const record = value;
|
|
11
|
+
for (const key of ["message", "reason", "description"]) {
|
|
12
|
+
const candidate = record[key];
|
|
13
|
+
if (typeof candidate === "string" && candidate.trim()) {
|
|
14
|
+
return candidate;
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
if ("error" in record) {
|
|
18
|
+
return normalizeErrorMessage(record.error);
|
|
19
|
+
}
|
|
20
|
+
if ("cause" in record) {
|
|
21
|
+
return normalizeErrorMessage(record.cause);
|
|
22
|
+
}
|
|
23
|
+
try {
|
|
24
|
+
return JSON.stringify(value);
|
|
25
|
+
} catch {}
|
|
26
|
+
}
|
|
27
|
+
return "Unexpected error";
|
|
28
|
+
};
|
|
29
|
+
var serverMessageToAction = (message) => {
|
|
30
|
+
switch (message.type) {
|
|
31
|
+
case "assistant":
|
|
32
|
+
return {
|
|
33
|
+
text: message.text,
|
|
34
|
+
type: "assistant"
|
|
35
|
+
};
|
|
36
|
+
case "complete":
|
|
37
|
+
return {
|
|
38
|
+
sessionId: message.sessionId,
|
|
39
|
+
type: "complete"
|
|
40
|
+
};
|
|
41
|
+
case "error":
|
|
42
|
+
return {
|
|
43
|
+
message: normalizeErrorMessage(message.message),
|
|
44
|
+
type: "error"
|
|
45
|
+
};
|
|
46
|
+
case "final":
|
|
47
|
+
return {
|
|
48
|
+
transcript: message.transcript,
|
|
49
|
+
type: "final"
|
|
50
|
+
};
|
|
51
|
+
case "partial":
|
|
52
|
+
return {
|
|
53
|
+
transcript: message.transcript,
|
|
54
|
+
type: "partial"
|
|
55
|
+
};
|
|
56
|
+
case "session":
|
|
57
|
+
return {
|
|
58
|
+
sessionId: message.sessionId,
|
|
59
|
+
status: message.status,
|
|
60
|
+
type: "session"
|
|
61
|
+
};
|
|
62
|
+
case "turn":
|
|
63
|
+
return {
|
|
64
|
+
turn: message.turn,
|
|
65
|
+
type: "turn"
|
|
66
|
+
};
|
|
67
|
+
default:
|
|
68
|
+
return null;
|
|
69
|
+
}
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
// src/client/connection.ts
|
|
73
|
+
var WS_OPEN = 1;
|
|
74
|
+
var WS_CLOSED = 3;
|
|
75
|
+
var WS_NORMAL_CLOSURE = 1000;
|
|
76
|
+
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
77
|
+
var DEFAULT_PING_INTERVAL = 30000;
|
|
78
|
+
var RECONNECT_DELAY_MS = 500;
|
|
79
|
+
var noop = () => {};
|
|
80
|
+
var noopUnsubscribe = () => noop;
|
|
81
|
+
var NOOP_CONNECTION = {
|
|
82
|
+
close: noop,
|
|
83
|
+
endTurn: noop,
|
|
84
|
+
getReadyState: () => WS_CLOSED,
|
|
85
|
+
getSessionId: () => "",
|
|
86
|
+
send: noop,
|
|
87
|
+
sendAudio: noop,
|
|
88
|
+
subscribe: noopUnsubscribe
|
|
89
|
+
};
|
|
90
|
+
var createSessionId = () => crypto.randomUUID();
|
|
91
|
+
var buildWsUrl = (path, sessionId) => {
|
|
92
|
+
const { hostname, port, protocol } = window.location;
|
|
93
|
+
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
94
|
+
const portSuffix = port ? `:${port}` : "";
|
|
95
|
+
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
96
|
+
url.searchParams.set("sessionId", sessionId);
|
|
97
|
+
return url.toString();
|
|
98
|
+
};
|
|
99
|
+
var isVoiceServerMessage = (value) => {
|
|
100
|
+
if (!value || typeof value !== "object" || !("type" in value)) {
|
|
101
|
+
return false;
|
|
102
|
+
}
|
|
103
|
+
switch (value.type) {
|
|
104
|
+
case "assistant":
|
|
105
|
+
case "complete":
|
|
106
|
+
case "error":
|
|
107
|
+
case "final":
|
|
108
|
+
case "partial":
|
|
109
|
+
case "pong":
|
|
110
|
+
case "session":
|
|
111
|
+
case "turn":
|
|
112
|
+
return true;
|
|
113
|
+
default:
|
|
114
|
+
return false;
|
|
115
|
+
}
|
|
116
|
+
};
|
|
117
|
+
var parseServerMessage = (event) => {
|
|
118
|
+
if (typeof event.data !== "string") {
|
|
119
|
+
return null;
|
|
120
|
+
}
|
|
121
|
+
try {
|
|
122
|
+
const parsed = JSON.parse(event.data);
|
|
123
|
+
return isVoiceServerMessage(parsed) ? parsed : null;
|
|
124
|
+
} catch {
|
|
125
|
+
return null;
|
|
126
|
+
}
|
|
127
|
+
};
|
|
128
|
+
var createVoiceConnection = (path, options = {}) => {
|
|
129
|
+
if (typeof window === "undefined") {
|
|
130
|
+
return NOOP_CONNECTION;
|
|
131
|
+
}
|
|
132
|
+
const listeners = new Set;
|
|
133
|
+
const shouldReconnect = options.reconnect !== false;
|
|
134
|
+
const maxReconnectAttempts = options.maxReconnectAttempts ?? DEFAULT_MAX_RECONNECT_ATTEMPTS;
|
|
135
|
+
const pingInterval = options.pingInterval ?? DEFAULT_PING_INTERVAL;
|
|
136
|
+
const state = {
|
|
137
|
+
isConnected: false,
|
|
138
|
+
pendingMessages: [],
|
|
139
|
+
pingInterval: null,
|
|
140
|
+
reconnectAttempts: 0,
|
|
141
|
+
reconnectTimeout: null,
|
|
142
|
+
sessionId: options.sessionId ?? createSessionId(),
|
|
143
|
+
ws: null
|
|
144
|
+
};
|
|
145
|
+
const clearTimers = () => {
|
|
146
|
+
if (state.pingInterval) {
|
|
147
|
+
clearInterval(state.pingInterval);
|
|
148
|
+
state.pingInterval = null;
|
|
149
|
+
}
|
|
150
|
+
if (state.reconnectTimeout) {
|
|
151
|
+
clearTimeout(state.reconnectTimeout);
|
|
152
|
+
state.reconnectTimeout = null;
|
|
153
|
+
}
|
|
154
|
+
};
|
|
155
|
+
const flushPendingMessages = () => {
|
|
156
|
+
if (state.ws?.readyState !== WS_OPEN) {
|
|
157
|
+
return;
|
|
158
|
+
}
|
|
159
|
+
while (state.pendingMessages.length > 0) {
|
|
160
|
+
const next = state.pendingMessages.shift();
|
|
161
|
+
if (next !== undefined) {
|
|
162
|
+
state.ws.send(next);
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
const scheduleReconnect = () => {
|
|
167
|
+
state.reconnectAttempts += 1;
|
|
168
|
+
state.reconnectTimeout = setTimeout(() => {
|
|
169
|
+
if (state.reconnectAttempts > maxReconnectAttempts) {
|
|
170
|
+
return;
|
|
171
|
+
}
|
|
172
|
+
connect();
|
|
173
|
+
}, RECONNECT_DELAY_MS);
|
|
174
|
+
};
|
|
175
|
+
const connect = () => {
|
|
176
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
177
|
+
ws.binaryType = "arraybuffer";
|
|
178
|
+
ws.onopen = () => {
|
|
179
|
+
state.isConnected = true;
|
|
180
|
+
state.reconnectAttempts = 0;
|
|
181
|
+
flushPendingMessages();
|
|
182
|
+
listeners.forEach((listener) => listener({
|
|
183
|
+
sessionId: state.sessionId,
|
|
184
|
+
status: "active",
|
|
185
|
+
type: "session"
|
|
186
|
+
}));
|
|
187
|
+
state.pingInterval = setInterval(() => {
|
|
188
|
+
if (ws.readyState === WS_OPEN) {
|
|
189
|
+
ws.send(JSON.stringify({ type: "ping" }));
|
|
190
|
+
}
|
|
191
|
+
}, pingInterval);
|
|
192
|
+
};
|
|
193
|
+
ws.onmessage = (event) => {
|
|
194
|
+
const parsed = parseServerMessage(event);
|
|
195
|
+
if (!parsed) {
|
|
196
|
+
return;
|
|
197
|
+
}
|
|
198
|
+
if (parsed.type === "session") {
|
|
199
|
+
state.sessionId = parsed.sessionId;
|
|
200
|
+
}
|
|
201
|
+
listeners.forEach((listener) => listener(parsed));
|
|
202
|
+
};
|
|
203
|
+
ws.onclose = (event) => {
|
|
204
|
+
state.isConnected = false;
|
|
205
|
+
clearTimers();
|
|
206
|
+
const reconnectable = shouldReconnect && event.code !== WS_NORMAL_CLOSURE && state.reconnectAttempts < maxReconnectAttempts;
|
|
207
|
+
if (reconnectable) {
|
|
208
|
+
scheduleReconnect();
|
|
209
|
+
}
|
|
210
|
+
};
|
|
211
|
+
state.ws = ws;
|
|
212
|
+
};
|
|
213
|
+
const sendSerialized = (value) => {
|
|
214
|
+
if (state.ws?.readyState === WS_OPEN) {
|
|
215
|
+
state.ws.send(value);
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
218
|
+
state.pendingMessages.push(value);
|
|
219
|
+
};
|
|
220
|
+
const send = (message) => {
|
|
221
|
+
sendSerialized(JSON.stringify(message));
|
|
222
|
+
};
|
|
223
|
+
const sendAudio = (audio) => {
|
|
224
|
+
sendSerialized(audio);
|
|
225
|
+
};
|
|
226
|
+
const endTurn = () => {
|
|
227
|
+
send({ type: "end_turn" });
|
|
228
|
+
};
|
|
229
|
+
const close = () => {
|
|
230
|
+
clearTimers();
|
|
231
|
+
if (state.ws) {
|
|
232
|
+
state.ws.close(WS_NORMAL_CLOSURE);
|
|
233
|
+
state.ws = null;
|
|
234
|
+
}
|
|
235
|
+
state.isConnected = false;
|
|
236
|
+
listeners.clear();
|
|
237
|
+
};
|
|
238
|
+
const subscribe = (callback) => {
|
|
239
|
+
listeners.add(callback);
|
|
240
|
+
return () => {
|
|
241
|
+
listeners.delete(callback);
|
|
242
|
+
};
|
|
243
|
+
};
|
|
244
|
+
connect();
|
|
245
|
+
return {
|
|
246
|
+
close,
|
|
247
|
+
endTurn,
|
|
248
|
+
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
249
|
+
getSessionId: () => state.sessionId,
|
|
250
|
+
send,
|
|
251
|
+
sendAudio,
|
|
252
|
+
subscribe
|
|
253
|
+
};
|
|
254
|
+
};
|
|
255
|
+
|
|
256
|
+
// src/client/store.ts
|
|
257
|
+
var createInitialState = () => ({
|
|
258
|
+
assistantTexts: [],
|
|
259
|
+
error: null,
|
|
260
|
+
isConnected: false,
|
|
261
|
+
partial: "",
|
|
262
|
+
sessionId: null,
|
|
263
|
+
status: "idle",
|
|
264
|
+
turns: []
|
|
265
|
+
});
|
|
266
|
+
var createVoiceStreamStore = () => {
|
|
267
|
+
let state = createInitialState();
|
|
268
|
+
const subscribers = new Set;
|
|
269
|
+
const notify = () => {
|
|
270
|
+
subscribers.forEach((subscriber) => subscriber());
|
|
271
|
+
};
|
|
272
|
+
const dispatch = (action) => {
|
|
273
|
+
switch (action.type) {
|
|
274
|
+
case "assistant":
|
|
275
|
+
state = {
|
|
276
|
+
...state,
|
|
277
|
+
assistantTexts: [...state.assistantTexts, action.text]
|
|
278
|
+
};
|
|
279
|
+
break;
|
|
280
|
+
case "complete":
|
|
281
|
+
state = {
|
|
282
|
+
...state,
|
|
283
|
+
sessionId: action.sessionId,
|
|
284
|
+
status: "completed"
|
|
285
|
+
};
|
|
286
|
+
break;
|
|
287
|
+
case "connected":
|
|
288
|
+
state = {
|
|
289
|
+
...state,
|
|
290
|
+
isConnected: true
|
|
291
|
+
};
|
|
292
|
+
break;
|
|
293
|
+
case "disconnected":
|
|
294
|
+
state = {
|
|
295
|
+
...state,
|
|
296
|
+
isConnected: false
|
|
297
|
+
};
|
|
298
|
+
break;
|
|
299
|
+
case "error":
|
|
300
|
+
state = {
|
|
301
|
+
...state,
|
|
302
|
+
error: action.message
|
|
303
|
+
};
|
|
304
|
+
break;
|
|
305
|
+
case "final":
|
|
306
|
+
state = {
|
|
307
|
+
...state,
|
|
308
|
+
partial: action.transcript.text,
|
|
309
|
+
turns: state.turns.map((turn) => turn)
|
|
310
|
+
};
|
|
311
|
+
break;
|
|
312
|
+
case "partial":
|
|
313
|
+
state = {
|
|
314
|
+
...state,
|
|
315
|
+
partial: action.transcript.text
|
|
316
|
+
};
|
|
317
|
+
break;
|
|
318
|
+
case "session":
|
|
319
|
+
state = {
|
|
320
|
+
...state,
|
|
321
|
+
error: null,
|
|
322
|
+
isConnected: action.status === "active",
|
|
323
|
+
sessionId: action.sessionId,
|
|
324
|
+
status: action.status
|
|
325
|
+
};
|
|
326
|
+
break;
|
|
327
|
+
case "turn":
|
|
328
|
+
state = {
|
|
329
|
+
...state,
|
|
330
|
+
partial: "",
|
|
331
|
+
turns: [...state.turns, action.turn]
|
|
332
|
+
};
|
|
333
|
+
break;
|
|
334
|
+
}
|
|
335
|
+
notify();
|
|
336
|
+
};
|
|
337
|
+
return {
|
|
338
|
+
dispatch,
|
|
339
|
+
getServerSnapshot: () => state,
|
|
340
|
+
getSnapshot: () => state,
|
|
341
|
+
subscribe: (subscriber) => {
|
|
342
|
+
subscribers.add(subscriber);
|
|
343
|
+
return () => {
|
|
344
|
+
subscribers.delete(subscriber);
|
|
345
|
+
};
|
|
346
|
+
}
|
|
347
|
+
};
|
|
348
|
+
};
|
|
349
|
+
|
|
350
|
+
// src/client/createVoiceStream.ts
|
|
351
|
+
var createVoiceStream = (path, options = {}) => {
|
|
352
|
+
const connection = createVoiceConnection(path, options);
|
|
353
|
+
const store = createVoiceStreamStore();
|
|
354
|
+
const subscribers = new Set;
|
|
355
|
+
const notify = () => {
|
|
356
|
+
subscribers.forEach((subscriber) => subscriber());
|
|
357
|
+
};
|
|
358
|
+
const unsubscribeConnection = connection.subscribe((message) => {
|
|
359
|
+
const action = serverMessageToAction(message);
|
|
360
|
+
if (action) {
|
|
361
|
+
store.dispatch(action);
|
|
362
|
+
notify();
|
|
363
|
+
}
|
|
364
|
+
});
|
|
365
|
+
return {
|
|
366
|
+
close() {
|
|
367
|
+
unsubscribeConnection();
|
|
368
|
+
connection.close();
|
|
369
|
+
store.dispatch({ type: "disconnected" });
|
|
370
|
+
notify();
|
|
371
|
+
},
|
|
372
|
+
endTurn() {
|
|
373
|
+
connection.endTurn();
|
|
374
|
+
},
|
|
375
|
+
get error() {
|
|
376
|
+
return store.getSnapshot().error;
|
|
377
|
+
},
|
|
378
|
+
getServerSnapshot() {
|
|
379
|
+
return store.getServerSnapshot();
|
|
380
|
+
},
|
|
381
|
+
getSnapshot() {
|
|
382
|
+
return store.getSnapshot();
|
|
383
|
+
},
|
|
384
|
+
get isConnected() {
|
|
385
|
+
return store.getSnapshot().isConnected;
|
|
386
|
+
},
|
|
387
|
+
get partial() {
|
|
388
|
+
return store.getSnapshot().partial;
|
|
389
|
+
},
|
|
390
|
+
get sessionId() {
|
|
391
|
+
return connection.getSessionId();
|
|
392
|
+
},
|
|
393
|
+
get status() {
|
|
394
|
+
return store.getSnapshot().status;
|
|
395
|
+
},
|
|
396
|
+
get turns() {
|
|
397
|
+
return store.getSnapshot().turns;
|
|
398
|
+
},
|
|
399
|
+
get assistantTexts() {
|
|
400
|
+
return store.getSnapshot().assistantTexts;
|
|
401
|
+
},
|
|
402
|
+
sendAudio(audio) {
|
|
403
|
+
connection.sendAudio(audio);
|
|
404
|
+
},
|
|
405
|
+
subscribe(subscriber) {
|
|
406
|
+
subscribers.add(subscriber);
|
|
407
|
+
return () => {
|
|
408
|
+
subscribers.delete(subscriber);
|
|
409
|
+
};
|
|
410
|
+
}
|
|
411
|
+
};
|
|
412
|
+
};
|
|
413
|
+
|
|
414
|
+
// src/client/htmx.ts
|
|
415
|
+
var DEFAULT_EVENT_NAME = "voice-refresh";
|
|
416
|
+
var DEFAULT_QUERY_PARAM = "sessionId";
|
|
417
|
+
var resolveElement = (input) => {
|
|
418
|
+
if (typeof input !== "string") {
|
|
419
|
+
return input;
|
|
420
|
+
}
|
|
421
|
+
return document.querySelector(input);
|
|
422
|
+
};
|
|
423
|
+
var buildRoute = (element, route, queryParam, sessionId) => {
|
|
424
|
+
const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
|
|
425
|
+
if (!baseRoute) {
|
|
426
|
+
return "";
|
|
427
|
+
}
|
|
428
|
+
const url = new URL(baseRoute, window.location.origin);
|
|
429
|
+
if (sessionId) {
|
|
430
|
+
url.searchParams.set(queryParam, sessionId);
|
|
431
|
+
} else {
|
|
432
|
+
url.searchParams.delete(queryParam);
|
|
433
|
+
}
|
|
434
|
+
return `${url.pathname}${url.search}${url.hash}`;
|
|
435
|
+
};
|
|
436
|
+
var bindVoiceHTMX = (stream, options) => {
|
|
437
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
438
|
+
return () => {};
|
|
439
|
+
}
|
|
440
|
+
const element = resolveElement(options.element);
|
|
441
|
+
if (!element) {
|
|
442
|
+
return () => {};
|
|
443
|
+
}
|
|
444
|
+
const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
|
|
445
|
+
const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
|
|
446
|
+
const sync = () => {
|
|
447
|
+
const htmxWindow = window;
|
|
448
|
+
const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
|
|
449
|
+
if (nextRoute) {
|
|
450
|
+
element.setAttribute("hx-get", nextRoute);
|
|
451
|
+
}
|
|
452
|
+
htmxWindow.htmx?.process?.(element);
|
|
453
|
+
htmxWindow.htmx?.trigger?.(element, eventName);
|
|
454
|
+
};
|
|
455
|
+
const unsubscribe = stream.subscribe(sync);
|
|
456
|
+
sync();
|
|
457
|
+
return () => {
|
|
458
|
+
unsubscribe();
|
|
459
|
+
};
|
|
460
|
+
};
|
|
461
|
+
|
|
462
|
+
// src/client/microphone.ts
|
|
463
|
+
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
464
|
+
var floatTo16BitPCM = (input) => {
|
|
465
|
+
const output = new Int16Array(input.length);
|
|
466
|
+
for (let index = 0;index < input.length; index += 1) {
|
|
467
|
+
const sample = clampSample(input[index] ?? 0);
|
|
468
|
+
output[index] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
469
|
+
}
|
|
470
|
+
return new Uint8Array(output.buffer);
|
|
471
|
+
};
|
|
472
|
+
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
473
|
+
if (sourceRate === targetRate) {
|
|
474
|
+
return input;
|
|
475
|
+
}
|
|
476
|
+
const ratio = sourceRate / targetRate;
|
|
477
|
+
const length = Math.round(input.length / ratio);
|
|
478
|
+
const output = new Float32Array(length);
|
|
479
|
+
let offsetResult = 0;
|
|
480
|
+
let offsetBuffer = 0;
|
|
481
|
+
while (offsetResult < output.length) {
|
|
482
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
483
|
+
let accum = 0;
|
|
484
|
+
let count = 0;
|
|
485
|
+
for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
|
|
486
|
+
accum += input[index] ?? 0;
|
|
487
|
+
count += 1;
|
|
488
|
+
}
|
|
489
|
+
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
490
|
+
offsetResult += 1;
|
|
491
|
+
offsetBuffer = nextOffsetBuffer;
|
|
492
|
+
}
|
|
493
|
+
return output;
|
|
494
|
+
};
|
|
495
|
+
var createMicrophoneCapture = (options) => {
|
|
496
|
+
let audioContext = null;
|
|
497
|
+
let sourceNode = null;
|
|
498
|
+
let processorNode = null;
|
|
499
|
+
let mediaStream = null;
|
|
500
|
+
const start = async () => {
|
|
501
|
+
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
|
|
502
|
+
throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
|
|
503
|
+
}
|
|
504
|
+
const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
|
|
505
|
+
if (!AudioContextCtor) {
|
|
506
|
+
throw new Error("Browser microphone capture requires AudioContext support.");
|
|
507
|
+
}
|
|
508
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
509
|
+
audio: {
|
|
510
|
+
channelCount: options.channelCount ?? 1
|
|
511
|
+
}
|
|
512
|
+
});
|
|
513
|
+
audioContext = new AudioContextCtor;
|
|
514
|
+
sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
515
|
+
processorNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
516
|
+
processorNode.onaudioprocess = (event) => {
|
|
517
|
+
const channel = event.inputBuffer.getChannelData(0);
|
|
518
|
+
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
519
|
+
options.onAudio(floatTo16BitPCM(downsampled));
|
|
520
|
+
};
|
|
521
|
+
sourceNode.connect(processorNode);
|
|
522
|
+
processorNode.connect(audioContext.destination);
|
|
523
|
+
};
|
|
524
|
+
const stop = () => {
|
|
525
|
+
processorNode?.disconnect();
|
|
526
|
+
sourceNode?.disconnect();
|
|
527
|
+
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
528
|
+
audioContext?.close();
|
|
529
|
+
audioContext = null;
|
|
530
|
+
mediaStream = null;
|
|
531
|
+
processorNode = null;
|
|
532
|
+
sourceNode = null;
|
|
533
|
+
};
|
|
534
|
+
return { start, stop };
|
|
535
|
+
};
|
|
536
|
+
|
|
537
|
+
// src/client/htmxBootstrap.ts
|
|
538
|
+
var VOICE_WAVE_POINTS = 48;
|
|
539
|
+
var VOICE_WAVE_WIDTH = 320;
|
|
540
|
+
var VOICE_WAVE_HEIGHT = 88;
|
|
541
|
+
var DEFAULT_GUIDED_LABEL = "Guided test";
|
|
542
|
+
var DEFAULT_GENERAL_LABEL = "General recording";
|
|
543
|
+
var DEFAULT_IDLE_LEAD = "Pick a mode to begin the demo.";
|
|
544
|
+
var DEFAULT_GUIDED_LEAD = "I can walk you through a short guided voice test.";
|
|
545
|
+
var DEFAULT_GENERAL_LEAD = "I can capture one freeform recording and confirm that it landed.";
|
|
546
|
+
var DEFAULT_IDLE_PROMPT = "Choose a mode to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
|
|
547
|
+
var DEFAULT_GENERAL_IDLE_PROMPT = "Click Start general recording to capture one freeform answer.";
|
|
548
|
+
var DEFAULT_GENERAL_LIVE_PROMPT = "Speak freely. When you pause, the recording will be captured.";
|
|
549
|
+
var DEFAULT_GENERAL_COMPLETE_PROMPT = "Recording saved. Start again if you want another capture.";
|
|
550
|
+
var DEFAULT_GUIDED_COMPLETE_PROMPT = "Guided test complete. Review the saved summary below.";
|
|
551
|
+
var DEFAULT_GUIDED_OVERFLOW_PROMPT = "All prompts are covered. You can stop the microphone or keep speaking for extra detail.";
|
|
552
|
+
var DEFAULT_MIC_IDLE = "Ready. Start guided test or general recording to begin.";
|
|
553
|
+
var DEFAULT_MIC_LIVE = "Live. Answer the prompt, then click Stop microphone when finished.";
|
|
554
|
+
var DEFAULT_GUIDED_PROMPTS = [
|
|
555
|
+
"Start with a quick introduction about who you are.",
|
|
556
|
+
"Now describe what you are trying to do or test.",
|
|
557
|
+
"Finish with any detail that feels blocked, risky, or unclear."
|
|
558
|
+
];
|
|
559
|
+
var clamp = (value, min, max) => Math.min(max, Math.max(min, value));
|
|
560
|
+
var escapeHtml = (value) => value.replaceAll("&", "&").replaceAll("<", "<").replaceAll(">", ">").replaceAll('"', """).replaceAll("'", "'");
|
|
561
|
+
var readErrorField = (value, key) => {
|
|
562
|
+
const candidate = value[key];
|
|
563
|
+
if (typeof candidate === "string" && candidate.trim()) {
|
|
564
|
+
return candidate;
|
|
565
|
+
}
|
|
566
|
+
return null;
|
|
567
|
+
};
|
|
568
|
+
var formatErrorMessage = (error) => {
|
|
569
|
+
if (typeof error === "string" && error.trim()) {
|
|
570
|
+
return error;
|
|
571
|
+
}
|
|
572
|
+
if (error instanceof Error && error.message.trim()) {
|
|
573
|
+
return error.message;
|
|
574
|
+
}
|
|
575
|
+
if (error && typeof error === "object") {
|
|
576
|
+
const record = error;
|
|
577
|
+
const direct = readErrorField(record, "message") ?? readErrorField(record, "reason") ?? readErrorField(record, "description");
|
|
578
|
+
if (direct) {
|
|
579
|
+
return direct;
|
|
580
|
+
}
|
|
581
|
+
if ("error" in record) {
|
|
582
|
+
return formatErrorMessage(record.error);
|
|
583
|
+
}
|
|
584
|
+
if ("cause" in record) {
|
|
585
|
+
return formatErrorMessage(record.cause);
|
|
586
|
+
}
|
|
587
|
+
try {
|
|
588
|
+
return JSON.stringify(error);
|
|
589
|
+
} catch {}
|
|
590
|
+
}
|
|
591
|
+
return "Unexpected error";
|
|
592
|
+
};
|
|
593
|
+
var createInitialVoiceWaveLevels = (count = VOICE_WAVE_POINTS) => Array.from({ length: count }, () => 0);
|
|
594
|
+
var pushVoiceWaveLevel = (levels, nextLevel, count = VOICE_WAVE_POINTS) => {
|
|
595
|
+
const next = levels.slice(-(count - 1));
|
|
596
|
+
next.push(clamp(nextLevel, 0, 1));
|
|
597
|
+
while (next.length < count) {
|
|
598
|
+
next.unshift(0);
|
|
599
|
+
}
|
|
600
|
+
return next;
|
|
601
|
+
};
|
|
602
|
+
var createVoiceWavePath = (levels, width = VOICE_WAVE_WIDTH, height = VOICE_WAVE_HEIGHT) => {
|
|
603
|
+
const samples = levels.length > 1 ? levels : createInitialVoiceWaveLevels(VOICE_WAVE_POINTS);
|
|
604
|
+
const step = width / (samples.length - 1);
|
|
605
|
+
const center = height / 2;
|
|
606
|
+
const maxAmplitude = height * 0.34;
|
|
607
|
+
const peakLevel = Math.max(...samples, 0);
|
|
608
|
+
if (peakLevel <= 0.015) {
|
|
609
|
+
return `M 0 ${center} L ${width} ${center}`;
|
|
610
|
+
}
|
|
611
|
+
const points = samples.map((level, index) => {
|
|
612
|
+
const phase = index * 0.76;
|
|
613
|
+
const wobble = Math.sin(phase) * 0.78 + Math.sin(phase * 0.41) * 0.22;
|
|
614
|
+
const amplitude = level * maxAmplitude;
|
|
615
|
+
const x = step * index;
|
|
616
|
+
const y = clamp(center + wobble * amplitude, 8, height - 8);
|
|
617
|
+
return { x, y };
|
|
618
|
+
});
|
|
619
|
+
if (points.length === 0) {
|
|
620
|
+
return `M 0 ${center} L ${width} ${center}`;
|
|
621
|
+
}
|
|
622
|
+
let path = `M ${points[0]?.x ?? 0} ${points[0]?.y ?? center}`;
|
|
623
|
+
for (let index = 1;index < points.length; index += 1) {
|
|
624
|
+
const previous = points[index - 1];
|
|
625
|
+
const current = points[index];
|
|
626
|
+
if (!previous || !current) {
|
|
627
|
+
continue;
|
|
628
|
+
}
|
|
629
|
+
const controlX = (previous.x + current.x) / 2;
|
|
630
|
+
path += ` Q ${controlX} ${previous.y} ${current.x} ${current.y}`;
|
|
631
|
+
}
|
|
632
|
+
return path;
|
|
633
|
+
};
|
|
634
|
+
var getPcmLevel = (audio) => {
|
|
635
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
636
|
+
if (bytes.byteLength < 2) {
|
|
637
|
+
return 0;
|
|
638
|
+
}
|
|
639
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
640
|
+
if (samples.length === 0) {
|
|
641
|
+
return 0;
|
|
642
|
+
}
|
|
643
|
+
let sumSquares = 0;
|
|
644
|
+
for (const sample of samples) {
|
|
645
|
+
const normalized = sample / 32768;
|
|
646
|
+
sumSquares += normalized * normalized;
|
|
647
|
+
}
|
|
648
|
+
const rms = Math.sqrt(sumSquares / samples.length);
|
|
649
|
+
return clamp(rms * 5.5, 0, 1);
|
|
650
|
+
};
|
|
651
|
+
var parsePromptList = (value) => {
|
|
652
|
+
if (!value) {
|
|
653
|
+
return DEFAULT_GUIDED_PROMPTS;
|
|
654
|
+
}
|
|
655
|
+
try {
|
|
656
|
+
const parsed = JSON.parse(value);
|
|
657
|
+
if (Array.isArray(parsed)) {
|
|
658
|
+
const prompts = parsed.filter((entry) => typeof entry === "string").map((entry) => entry.trim()).filter(Boolean);
|
|
659
|
+
if (prompts.length > 0) {
|
|
660
|
+
return prompts;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
} catch {}
|
|
664
|
+
return DEFAULT_GUIDED_PROMPTS;
|
|
665
|
+
};
|
|
666
|
+
var requireElement = (root, selector, ctor, name) => {
|
|
667
|
+
const value = selector ? document.querySelector(selector) : null;
|
|
668
|
+
if (value instanceof ctor) {
|
|
669
|
+
return value;
|
|
670
|
+
}
|
|
671
|
+
const fallback = root.querySelector(`#${name}`);
|
|
672
|
+
if (fallback instanceof ctor) {
|
|
673
|
+
return fallback;
|
|
674
|
+
}
|
|
675
|
+
throw new Error(`Voice HTMX bootstrap could not find the required element "${name}".`);
|
|
676
|
+
};
|
|
677
|
+
var resolveLeadMessage = (input) => {
|
|
678
|
+
if (!input.mode) {
|
|
679
|
+
return DEFAULT_IDLE_LEAD;
|
|
680
|
+
}
|
|
681
|
+
if (!input.hasStarted) {
|
|
682
|
+
return input.mode === "guided" ? DEFAULT_GUIDED_LEAD : DEFAULT_GENERAL_LEAD;
|
|
683
|
+
}
|
|
684
|
+
if (input.status === "completed") {
|
|
685
|
+
return input.mode === "guided" ? DEFAULT_GUIDED_COMPLETE_PROMPT : DEFAULT_GENERAL_COMPLETE_PROMPT;
|
|
686
|
+
}
|
|
687
|
+
if (input.mode === "general") {
|
|
688
|
+
return DEFAULT_GENERAL_LIVE_PROMPT;
|
|
689
|
+
}
|
|
690
|
+
return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
|
|
691
|
+
};
|
|
692
|
+
var resolvePromptMessage = (input) => {
|
|
693
|
+
if (!input.mode) {
|
|
694
|
+
return DEFAULT_IDLE_PROMPT;
|
|
695
|
+
}
|
|
696
|
+
if (input.status === "completed") {
|
|
697
|
+
return input.mode === "guided" ? DEFAULT_GUIDED_COMPLETE_PROMPT : DEFAULT_GENERAL_COMPLETE_PROMPT;
|
|
698
|
+
}
|
|
699
|
+
if (!input.hasStarted) {
|
|
700
|
+
return input.mode === "guided" ? `Click Start guided test to begin. First prompt: ${input.guidedPrompts[0] ?? "Answer the first prompt."}` : DEFAULT_GENERAL_IDLE_PROMPT;
|
|
701
|
+
}
|
|
702
|
+
if (input.mode === "general") {
|
|
703
|
+
return input.turnCount === 0 ? DEFAULT_GENERAL_LIVE_PROMPT : DEFAULT_GENERAL_COMPLETE_PROMPT;
|
|
704
|
+
}
|
|
705
|
+
return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
|
|
706
|
+
};
|
|
707
|
+
var createDemoMicrophone = (onAudio, onLevel) => {
|
|
708
|
+
let capture = null;
|
|
709
|
+
return {
|
|
710
|
+
start: async () => {
|
|
711
|
+
if (capture) {
|
|
712
|
+
return;
|
|
713
|
+
}
|
|
714
|
+
const nextCapture = createMicrophoneCapture({
|
|
715
|
+
onAudio: (audio) => {
|
|
716
|
+
onLevel(getPcmLevel(audio));
|
|
717
|
+
onAudio(audio);
|
|
718
|
+
},
|
|
719
|
+
sampleRateHz: 16000
|
|
720
|
+
});
|
|
721
|
+
capture = nextCapture;
|
|
722
|
+
try {
|
|
723
|
+
await capture.start();
|
|
724
|
+
} catch (error) {
|
|
725
|
+
capture = null;
|
|
726
|
+
throw error;
|
|
727
|
+
}
|
|
728
|
+
},
|
|
729
|
+
stop: () => {
|
|
730
|
+
capture?.stop();
|
|
731
|
+
capture = null;
|
|
732
|
+
onLevel(0);
|
|
733
|
+
}
|
|
734
|
+
};
|
|
735
|
+
};
|
|
736
|
+
var initVoiceHTMXRoot = (root) => {
|
|
737
|
+
const guidedPath = root.dataset.voiceGuidedPath;
|
|
738
|
+
const generalPath = root.dataset.voiceGeneralPath;
|
|
739
|
+
if (!guidedPath || !generalPath) {
|
|
740
|
+
throw new Error("Voice HTMX bootstrap requires data-voice-guided-path and data-voice-general-path.");
|
|
741
|
+
}
|
|
742
|
+
const guidedPrompts = parsePromptList(root.dataset.voiceGuidedPrompts);
|
|
743
|
+
const guidedLabel = root.dataset.voiceGuidedLabel ?? DEFAULT_GUIDED_LABEL;
|
|
744
|
+
const generalLabel = root.dataset.voiceGeneralLabel ?? DEFAULT_GENERAL_LABEL;
|
|
745
|
+
const syncElement = requireElement(document, root.dataset.voiceSync, HTMLElement, "voice-htmx-sync");
|
|
746
|
+
const connectionMetric = requireElement(root, root.dataset.voiceConnection, HTMLElement, "metric-connection");
|
|
747
|
+
const errorStatus = requireElement(root, root.dataset.voiceError, HTMLElement, "status-error");
|
|
748
|
+
const microphoneStatus = requireElement(root, root.dataset.voiceMicrophone, HTMLElement, "status-mic");
|
|
749
|
+
const promptStatus = requireElement(root, root.dataset.voicePrompt, HTMLElement, "status-prompt");
|
|
750
|
+
const chatList = requireElement(root, root.dataset.voiceChat, HTMLElement, "chat-list");
|
|
751
|
+
const startGuidedButton = requireElement(root, root.dataset.voiceStartGuided, HTMLButtonElement, "start-guided");
|
|
752
|
+
const startGeneralButton = requireElement(root, root.dataset.voiceStartGeneral, HTMLButtonElement, "start-general");
|
|
753
|
+
const stopButton = requireElement(root, root.dataset.voiceStop, HTMLButtonElement, "stop-mic");
|
|
754
|
+
const voiceMonitor = requireElement(root, root.dataset.voiceMonitor, HTMLElement, "voice-monitor");
|
|
755
|
+
const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
|
|
756
|
+
const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
|
|
757
|
+
const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
|
|
758
|
+
const guidedVoice = createVoiceStream(guidedPath);
|
|
759
|
+
const generalVoice = createVoiceStream(generalPath);
|
|
760
|
+
const stopGuidedBinding = bindVoiceHTMX(guidedVoice, { element: syncElement });
|
|
761
|
+
const stopGeneralBinding = bindVoiceHTMX(generalVoice, {
|
|
762
|
+
element: syncElement
|
|
763
|
+
});
|
|
764
|
+
let activeMode = null;
|
|
765
|
+
let hasStartedModes = {
|
|
766
|
+
general: false,
|
|
767
|
+
guided: false
|
|
768
|
+
};
|
|
769
|
+
let isCapturing = false;
|
|
770
|
+
let micError = null;
|
|
771
|
+
let waveLevels = createInitialVoiceWaveLevels();
|
|
772
|
+
const currentVoice = () => activeMode === "general" ? generalVoice : guidedVoice;
|
|
773
|
+
const renderWave = () => {
|
|
774
|
+
const path = createVoiceWavePath(waveLevels);
|
|
775
|
+
voiceWaveGlow.setAttribute("d", path);
|
|
776
|
+
voiceWavePath.setAttribute("d", path);
|
|
777
|
+
voiceMonitorCopy.innerHTML = `<span class="voice-live-dot"></span>${isCapturing ? "Microphone live" : "Microphone idle"}`;
|
|
778
|
+
voiceMonitorCopy.classList.toggle("is-live", isCapturing);
|
|
779
|
+
voiceMonitor.classList.toggle("is-live", isCapturing);
|
|
780
|
+
};
|
|
781
|
+
const render = () => {
|
|
782
|
+
const voice = currentVoice();
|
|
783
|
+
const hasStarted = (activeMode ? hasStartedModes[activeMode] : false) || voice.turns.length > 0;
|
|
784
|
+
const status = voice.status;
|
|
785
|
+
connectionMetric.textContent = voice.isConnected ? "Connected" : "Waiting";
|
|
786
|
+
errorStatus.textContent = micError || voice.error || "None";
|
|
787
|
+
microphoneStatus.textContent = isCapturing ? DEFAULT_MIC_LIVE : DEFAULT_MIC_IDLE;
|
|
788
|
+
promptStatus.textContent = resolvePromptMessage({
|
|
789
|
+
guidedPrompts,
|
|
790
|
+
hasStarted,
|
|
791
|
+
mode: activeMode,
|
|
792
|
+
status,
|
|
793
|
+
turnCount: voice.turns.length
|
|
794
|
+
});
|
|
795
|
+
startGuidedButton.hidden = isCapturing;
|
|
796
|
+
startGeneralButton.hidden = isCapturing;
|
|
797
|
+
stopButton.hidden = !isCapturing;
|
|
798
|
+
chatList.innerHTML = `<article class="voice-chat-message assistant">
|
|
799
|
+
<div class="voice-chat-role">${escapeHtml(activeMode === "general" ? generalLabel : activeMode === "guided" ? guidedLabel : "Voice demo")}</div>
|
|
800
|
+
<p class="voice-turn-text">${escapeHtml(resolveLeadMessage({
|
|
801
|
+
generalLabel,
|
|
802
|
+
guidedLabel,
|
|
803
|
+
guidedPrompts,
|
|
804
|
+
hasStarted,
|
|
805
|
+
mode: activeMode,
|
|
806
|
+
status,
|
|
807
|
+
turnCount: voice.turns.length
|
|
808
|
+
}))}</p>
|
|
809
|
+
</article>${voice.turns.map((turn) => `<div class="voice-chat-stack">
|
|
810
|
+
<article class="voice-chat-message user">
|
|
811
|
+
<div class="voice-chat-role">You</div>
|
|
812
|
+
<p class="voice-turn-text">${escapeHtml(turn.text)}</p>
|
|
813
|
+
</article>
|
|
814
|
+
${turn.assistantText ? `<article class="voice-chat-message assistant">
|
|
815
|
+
<div class="voice-chat-role">${escapeHtml(activeMode === "general" ? generalLabel : activeMode === "guided" ? guidedLabel : "Guide")}</div>
|
|
816
|
+
<p class="voice-turn-text">${escapeHtml(turn.assistantText)}</p>
|
|
817
|
+
</article>` : ""}
|
|
818
|
+
</div>`).join("")}${voice.partial ? `<article class="voice-chat-message user pending">
|
|
819
|
+
<div class="voice-chat-role">Speaking</div>
|
|
820
|
+
<p class="voice-turn-text">${escapeHtml(voice.partial)}</p>
|
|
821
|
+
</article>` : ""}`;
|
|
822
|
+
renderWave();
|
|
823
|
+
};
|
|
824
|
+
const microphone = createDemoMicrophone((audio) => currentVoice().sendAudio(audio), (level) => {
|
|
825
|
+
waveLevels = pushVoiceWaveLevel(waveLevels, level);
|
|
826
|
+
renderWave();
|
|
827
|
+
});
|
|
828
|
+
const stopMic = () => {
|
|
829
|
+
microphone.stop();
|
|
830
|
+
isCapturing = false;
|
|
831
|
+
micError = null;
|
|
832
|
+
waveLevels = createInitialVoiceWaveLevels();
|
|
833
|
+
render();
|
|
834
|
+
};
|
|
835
|
+
const startMode = async (mode) => {
|
|
836
|
+
activeMode = mode;
|
|
837
|
+
hasStartedModes = {
|
|
838
|
+
...hasStartedModes,
|
|
839
|
+
[mode]: true
|
|
840
|
+
};
|
|
841
|
+
try {
|
|
842
|
+
await microphone.start();
|
|
843
|
+
micError = null;
|
|
844
|
+
isCapturing = true;
|
|
845
|
+
render();
|
|
846
|
+
} catch (error) {
|
|
847
|
+
microphone.stop();
|
|
848
|
+
isCapturing = false;
|
|
849
|
+
waveLevels = createInitialVoiceWaveLevels();
|
|
850
|
+
micError = formatErrorMessage(error);
|
|
851
|
+
render();
|
|
852
|
+
}
|
|
853
|
+
};
|
|
854
|
+
guidedVoice.subscribe(render);
|
|
855
|
+
generalVoice.subscribe(render);
|
|
856
|
+
startGuidedButton.addEventListener("click", () => {
|
|
857
|
+
startMode("guided");
|
|
858
|
+
});
|
|
859
|
+
startGeneralButton.addEventListener("click", () => {
|
|
860
|
+
startMode("general");
|
|
861
|
+
});
|
|
862
|
+
stopButton.addEventListener("click", () => {
|
|
863
|
+
stopMic();
|
|
864
|
+
});
|
|
865
|
+
window.addEventListener("beforeunload", () => {
|
|
866
|
+
microphone.stop();
|
|
867
|
+
stopGuidedBinding();
|
|
868
|
+
stopGeneralBinding();
|
|
869
|
+
guidedVoice.close();
|
|
870
|
+
generalVoice.close();
|
|
871
|
+
});
|
|
872
|
+
render();
|
|
873
|
+
};
|
|
874
|
+
var initVoiceHTMX = () => {
|
|
875
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
876
|
+
return;
|
|
877
|
+
}
|
|
878
|
+
const roots = Array.from(document.querySelectorAll("[data-voice-htmx]"));
|
|
879
|
+
for (const root of roots) {
|
|
880
|
+
if (root instanceof HTMLElement) {
|
|
881
|
+
initVoiceHTMXRoot(root);
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
};
|
|
885
|
+
initVoiceHTMX();
|
|
886
|
+
export {
|
|
887
|
+
initVoiceHTMX
|
|
888
|
+
};
|
package/dist/index.js
CHANGED
|
@@ -71,6 +71,7 @@ var __decorateElement = (array, flags, name, decorators, target, extra) => {
|
|
|
71
71
|
|
|
72
72
|
// src/plugin.ts
|
|
73
73
|
import { Elysia } from "elysia";
|
|
74
|
+
import { resolve } from "path";
|
|
74
75
|
|
|
75
76
|
// src/htmx.ts
|
|
76
77
|
var DEFAULT_HTMX_TARGETS = {
|
|
@@ -236,6 +237,29 @@ var toVoiceSessionSummary = (session) => ({
|
|
|
236
237
|
|
|
237
238
|
// src/turnDetection.ts
|
|
238
239
|
var DEFAULT_SILENCE_MS = 700;
|
|
240
|
+
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
241
|
+
var toUint8Array = (audio) => {
|
|
242
|
+
if (audio instanceof ArrayBuffer) {
|
|
243
|
+
return new Uint8Array(audio);
|
|
244
|
+
}
|
|
245
|
+
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
246
|
+
};
|
|
247
|
+
var measureAudioLevel = (audio) => {
|
|
248
|
+
const bytes = toUint8Array(audio);
|
|
249
|
+
if (bytes.byteLength < 2) {
|
|
250
|
+
return 0;
|
|
251
|
+
}
|
|
252
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
253
|
+
if (samples.length === 0) {
|
|
254
|
+
return 0;
|
|
255
|
+
}
|
|
256
|
+
let sumSquares = 0;
|
|
257
|
+
for (const sample of samples) {
|
|
258
|
+
const normalized = sample / 32768;
|
|
259
|
+
sumSquares += normalized * normalized;
|
|
260
|
+
}
|
|
261
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
262
|
+
};
|
|
239
263
|
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
240
264
|
var mergeTranscriptTexts = (transcripts) => {
|
|
241
265
|
const merged = [];
|
|
@@ -293,11 +317,13 @@ var createVoiceSession = (options) => {
|
|
|
293
317
|
timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
|
|
294
318
|
};
|
|
295
319
|
const turnDetection = {
|
|
296
|
-
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS
|
|
320
|
+
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
321
|
+
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
|
|
297
322
|
};
|
|
298
323
|
let socket = options.socket;
|
|
299
324
|
let sttSession = null;
|
|
300
325
|
let silenceTimer = null;
|
|
326
|
+
let speechDetected = false;
|
|
301
327
|
const clearSilenceTimer = () => {
|
|
302
328
|
if (!silenceTimer) {
|
|
303
329
|
return;
|
|
@@ -339,7 +365,9 @@ var createVoiceSession = (options) => {
|
|
|
339
365
|
}
|
|
340
366
|
};
|
|
341
367
|
const scheduleSilenceCommit = () => {
|
|
342
|
-
|
|
368
|
+
if (silenceTimer) {
|
|
369
|
+
return;
|
|
370
|
+
}
|
|
343
371
|
silenceTimer = setTimeout(() => {
|
|
344
372
|
api.commitTurn("silence");
|
|
345
373
|
}, turnDetection.silenceMs);
|
|
@@ -370,7 +398,6 @@ var createVoiceSession = (options) => {
|
|
|
370
398
|
transcript,
|
|
371
399
|
type: "partial"
|
|
372
400
|
});
|
|
373
|
-
scheduleSilenceCommit();
|
|
374
401
|
};
|
|
375
402
|
const handleFinal = async (transcript) => {
|
|
376
403
|
await writeSession((session) => {
|
|
@@ -394,7 +421,6 @@ var createVoiceSession = (options) => {
|
|
|
394
421
|
transcript,
|
|
395
422
|
type: "final"
|
|
396
423
|
});
|
|
397
|
-
scheduleSilenceCommit();
|
|
398
424
|
};
|
|
399
425
|
const ensureAdapter = async () => {
|
|
400
426
|
if (sttSession) {
|
|
@@ -496,6 +522,7 @@ var createVoiceSession = (options) => {
|
|
|
496
522
|
currentSession.status = "active";
|
|
497
523
|
currentSession.turns = [...currentSession.turns, turn];
|
|
498
524
|
});
|
|
525
|
+
speechDetected = false;
|
|
499
526
|
logger.info("voice turn committed", {
|
|
500
527
|
reason,
|
|
501
528
|
sessionId: options.id,
|
|
@@ -529,6 +556,7 @@ var createVoiceSession = (options) => {
|
|
|
529
556
|
type: "complete"
|
|
530
557
|
});
|
|
531
558
|
await closeAdapter("complete");
|
|
559
|
+
speechDetected = false;
|
|
532
560
|
await options.route.onComplete({
|
|
533
561
|
api,
|
|
534
562
|
context: options.context,
|
|
@@ -599,6 +627,7 @@ var createVoiceSession = (options) => {
|
|
|
599
627
|
session.reconnect.lastDisconnectAt = Date.now();
|
|
600
628
|
session.status = "reconnecting";
|
|
601
629
|
});
|
|
630
|
+
speechDetected = false;
|
|
602
631
|
},
|
|
603
632
|
fail: async (error) => {
|
|
604
633
|
clearSilenceTimer();
|
|
@@ -613,6 +642,7 @@ var createVoiceSession = (options) => {
|
|
|
613
642
|
type: "error"
|
|
614
643
|
});
|
|
615
644
|
await closeAdapter("failed");
|
|
645
|
+
speechDetected = false;
|
|
616
646
|
await options.route.onError?.({
|
|
617
647
|
api,
|
|
618
648
|
context: options.context,
|
|
@@ -627,11 +657,22 @@ var createVoiceSession = (options) => {
|
|
|
627
657
|
return;
|
|
628
658
|
}
|
|
629
659
|
const adapter = await ensureAdapter();
|
|
660
|
+
const audioLevel = measureAudioLevel(audio);
|
|
630
661
|
await writeSession((currentSession) => {
|
|
631
662
|
currentSession.currentTurn.lastAudioAt = Date.now();
|
|
632
663
|
currentSession.lastActivityAt = Date.now();
|
|
633
664
|
currentSession.status = "active";
|
|
634
665
|
});
|
|
666
|
+
if (audioLevel >= turnDetection.speechThreshold) {
|
|
667
|
+
speechDetected = true;
|
|
668
|
+
clearSilenceTimer();
|
|
669
|
+
} else if (speechDetected) {
|
|
670
|
+
const currentSession = await readSession();
|
|
671
|
+
const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText));
|
|
672
|
+
if (hasTurnText) {
|
|
673
|
+
scheduleSilenceCommit();
|
|
674
|
+
}
|
|
675
|
+
}
|
|
635
676
|
await adapter.send(audio);
|
|
636
677
|
},
|
|
637
678
|
snapshot: async () => readSession()
|
|
@@ -640,6 +681,51 @@ var createVoiceSession = (options) => {
|
|
|
640
681
|
};
|
|
641
682
|
|
|
642
683
|
// src/plugin.ts
|
|
684
|
+
var HTMX_BOOTSTRAP_DIST_CANDIDATES = [
|
|
685
|
+
resolve(import.meta.dir, "client", "htmxBootstrap.js"),
|
|
686
|
+
resolve(import.meta.dir, "..", "dist", "client", "htmxBootstrap.js")
|
|
687
|
+
];
|
|
688
|
+
var HTMX_BOOTSTRAP_SOURCE_CANDIDATES = [
|
|
689
|
+
resolve(import.meta.dir, "client", "htmxBootstrap.ts"),
|
|
690
|
+
resolve(import.meta.dir, "..", "src", "client", "htmxBootstrap.ts")
|
|
691
|
+
];
|
|
692
|
+
var loadHTMXBootstrap = (() => {
|
|
693
|
+
let cached = null;
|
|
694
|
+
return () => {
|
|
695
|
+
if (cached) {
|
|
696
|
+
return cached;
|
|
697
|
+
}
|
|
698
|
+
cached = (async () => {
|
|
699
|
+
for (const candidate of HTMX_BOOTSTRAP_DIST_CANDIDATES) {
|
|
700
|
+
const asset = Bun.file(candidate);
|
|
701
|
+
if (await asset.exists()) {
|
|
702
|
+
return await asset.text();
|
|
703
|
+
}
|
|
704
|
+
}
|
|
705
|
+
for (const candidate of HTMX_BOOTSTRAP_SOURCE_CANDIDATES) {
|
|
706
|
+
const asset = Bun.file(candidate);
|
|
707
|
+
if (!await asset.exists()) {
|
|
708
|
+
continue;
|
|
709
|
+
}
|
|
710
|
+
const build = await Bun.build({
|
|
711
|
+
entrypoints: [candidate],
|
|
712
|
+
format: "esm",
|
|
713
|
+
minify: true,
|
|
714
|
+
target: "browser"
|
|
715
|
+
});
|
|
716
|
+
if (!build.success || build.outputs.length === 0) {
|
|
717
|
+
const log = build.logs.map((entry) => entry.message).join(`
|
|
718
|
+
`);
|
|
719
|
+
throw new Error(`Failed to build the voice HTMX bootstrap bundle.${log ? `
|
|
720
|
+
${log}` : ""}`);
|
|
721
|
+
}
|
|
722
|
+
return await build.outputs[0].text();
|
|
723
|
+
}
|
|
724
|
+
throw new Error("Unable to locate the voice HTMX bootstrap client.");
|
|
725
|
+
})();
|
|
726
|
+
return cached;
|
|
727
|
+
};
|
|
728
|
+
})();
|
|
643
729
|
var isArrayBufferView = (value) => typeof value === "object" && value !== null && ArrayBuffer.isView(value);
|
|
644
730
|
var isVoiceClientMessage = (value) => {
|
|
645
731
|
if (!value || typeof value !== "object" || !("type" in value)) {
|
|
@@ -715,6 +801,7 @@ var voice = (config) => {
|
|
|
715
801
|
const onTurn = normalizeOnTurn(config.onTurn);
|
|
716
802
|
const htmxOptions = config.htmx && typeof config.htmx === "object" ? config.htmx : undefined;
|
|
717
803
|
const htmxRoute = htmxOptions?.route ?? `${config.path}/htmx/session`;
|
|
804
|
+
const htmxBootstrapRoute = htmxOptions?.bootstrapRoute ?? `${config.path}/htmx/bootstrap.js`;
|
|
718
805
|
const htmxRenderers = resolveVoiceHTMXRenderers(config.htmx && config.htmx !== true ? config.htmx : undefined);
|
|
719
806
|
const htmxTargets = resolveVoiceHTMXTargets(htmxOptions?.targets);
|
|
720
807
|
const htmxRoutes = () => {
|
|
@@ -738,7 +825,11 @@ var voice = (config) => {
|
|
|
738
825
|
}, htmxRenderers, htmxTargets), {
|
|
739
826
|
headers: { "Content-Type": "text/html; charset=utf-8" }
|
|
740
827
|
});
|
|
741
|
-
})
|
|
828
|
+
}).get(htmxBootstrapRoute, async () => new Response(await loadHTMXBootstrap(), {
|
|
829
|
+
headers: {
|
|
830
|
+
"Content-Type": "application/javascript; charset=utf-8"
|
|
831
|
+
}
|
|
832
|
+
}));
|
|
742
833
|
};
|
|
743
834
|
return new Elysia({ name: "absolutejs-voice" }).ws(config.path, {
|
|
744
835
|
close: async (ws, code, reason) => {
|
|
@@ -800,7 +891,8 @@ var voice = (config) => {
|
|
|
800
891
|
store: config.session,
|
|
801
892
|
stt: config.stt,
|
|
802
893
|
turnDetection: {
|
|
803
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700
|
|
894
|
+
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
895
|
+
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
804
896
|
}
|
|
805
897
|
});
|
|
806
898
|
if (!current) {
|
|
@@ -835,7 +927,8 @@ var voice = (config) => {
|
|
|
835
927
|
store: config.session,
|
|
836
928
|
stt: config.stt,
|
|
837
929
|
turnDetection: {
|
|
838
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700
|
|
930
|
+
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
931
|
+
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
839
932
|
}
|
|
840
933
|
});
|
|
841
934
|
runtime.activeSessions.set(sessionId, session);
|
package/dist/plugin.d.ts
CHANGED
|
@@ -47,6 +47,18 @@ export declare const voice: <TContext = unknown, TSession extends VoiceSessionRe
|
|
|
47
47
|
};
|
|
48
48
|
};
|
|
49
49
|
};
|
|
50
|
+
} & {
|
|
51
|
+
[x: string]: {
|
|
52
|
+
get: {
|
|
53
|
+
body: unknown;
|
|
54
|
+
params: {};
|
|
55
|
+
query: unknown;
|
|
56
|
+
headers: unknown;
|
|
57
|
+
response: {
|
|
58
|
+
200: Response;
|
|
59
|
+
};
|
|
60
|
+
};
|
|
61
|
+
};
|
|
50
62
|
}), {
|
|
51
63
|
derive: {};
|
|
52
64
|
resolve: {};
|
package/dist/turnDetection.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
import type { Transcript } from './types';
|
|
1
|
+
import type { AudioChunk, Transcript } from './types';
|
|
2
2
|
export declare const DEFAULT_SILENCE_MS = 700;
|
|
3
|
+
export declare const DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
4
|
+
export declare const measureAudioLevel: (audio: AudioChunk) => number;
|
|
3
5
|
export declare const buildTurnText: (transcripts: Transcript[], partialText: string) => string;
|
package/dist/types.d.ts
CHANGED
|
@@ -211,6 +211,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
211
211
|
reconnect?: VoiceReconnectConfig;
|
|
212
212
|
turnDetection?: {
|
|
213
213
|
silenceMs?: number;
|
|
214
|
+
speechThreshold?: number;
|
|
214
215
|
};
|
|
215
216
|
logger?: VoiceLogger;
|
|
216
217
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
@@ -224,6 +225,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
224
225
|
reconnect: Required<VoiceReconnectConfig>;
|
|
225
226
|
turnDetection: {
|
|
226
227
|
silenceMs: number;
|
|
228
|
+
speechThreshold: number;
|
|
227
229
|
};
|
|
228
230
|
route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
|
|
229
231
|
logger?: VoiceLogger;
|
|
@@ -313,6 +315,7 @@ export type VoiceHTMXTargets = {
|
|
|
313
315
|
turns: string;
|
|
314
316
|
};
|
|
315
317
|
export type VoiceHTMXOptions<TSession extends VoiceSessionRecord = VoiceSessionRecord, TResult = unknown> = VoiceHTMXRenderConfig<TSession, TResult> & {
|
|
318
|
+
bootstrapRoute?: string;
|
|
316
319
|
route?: string;
|
|
317
320
|
targets?: Partial<VoiceHTMXTargets>;
|
|
318
321
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@absolutejs/voice",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.17",
|
|
4
4
|
"description": "Voice primitives and Elysia plugin for AbsoluteJS",
|
|
5
5
|
"repository": {
|
|
6
6
|
"type": "git",
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
"license": "CC BY-NC 4.0",
|
|
16
16
|
"author": "Alex Kahn",
|
|
17
17
|
"scripts": {
|
|
18
|
-
"build": "rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
18
|
+
"build": "rm -rf dist && bun build ./src/index.ts ./src/client/index.ts ./src/react/index.ts ./src/vue/index.ts ./src/svelte/index.ts ./src/angular/index.ts --outdir dist --target bun --external elysia --external react --external vue --external @angular/core --external @absolutejs/absolute && bun build ./src/client/htmxBootstrap.ts --outdir dist/client --target browser --format esm && tsc --emitDeclarationOnly --project tsconfig.json",
|
|
19
19
|
"format": "prettier --write \"./**/*.{js,jsx,ts,tsx,json,md}\"",
|
|
20
20
|
"lint": "eslint ./src",
|
|
21
21
|
"release": "bun run format && bun run build && bun publish",
|