@absolutejs/voice 0.0.20 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +884 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +759 -3
- package/dist/angular/voice-controller.service.d.ts +27 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +48 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +660 -167
- package/dist/client/index.d.ts +3 -0
- package/dist/client/index.js +991 -6
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +33 -0
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +3721 -298
- package/dist/ops.d.ts +100 -0
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +728 -3
- package/dist/react/useVoiceController.d.ts +26 -0
- package/dist/react/useVoiceStream.d.ts +7 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +691 -3
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +93 -2
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +6247 -402
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +92 -2
- package/dist/testing/stt.d.ts +3 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +487 -10
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +750 -3
- package/dist/vue/useVoiceController.d.ts +30 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +135 -1
|
@@ -1,3 +1,145 @@
|
|
|
1
|
+
// src/client/htmx.ts
|
|
2
|
+
var DEFAULT_EVENT_NAME = "voice-refresh";
|
|
3
|
+
var DEFAULT_QUERY_PARAM = "sessionId";
|
|
4
|
+
var resolveElement = (input) => {
|
|
5
|
+
if (typeof input !== "string") {
|
|
6
|
+
return input;
|
|
7
|
+
}
|
|
8
|
+
return document.querySelector(input);
|
|
9
|
+
};
|
|
10
|
+
var buildRoute = (element, route, queryParam, sessionId) => {
|
|
11
|
+
const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
|
|
12
|
+
if (!baseRoute) {
|
|
13
|
+
return "";
|
|
14
|
+
}
|
|
15
|
+
const url = new URL(baseRoute, window.location.origin);
|
|
16
|
+
if (sessionId) {
|
|
17
|
+
url.searchParams.set(queryParam, sessionId);
|
|
18
|
+
} else {
|
|
19
|
+
url.searchParams.delete(queryParam);
|
|
20
|
+
}
|
|
21
|
+
return `${url.pathname}${url.search}${url.hash}`;
|
|
22
|
+
};
|
|
23
|
+
var bindVoiceHTMX = (stream, options) => {
|
|
24
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
25
|
+
return () => {};
|
|
26
|
+
}
|
|
27
|
+
const element = resolveElement(options.element);
|
|
28
|
+
if (!element) {
|
|
29
|
+
return () => {};
|
|
30
|
+
}
|
|
31
|
+
const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
|
|
32
|
+
const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
|
|
33
|
+
const sync = () => {
|
|
34
|
+
const htmxWindow = window;
|
|
35
|
+
const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
|
|
36
|
+
if (nextRoute) {
|
|
37
|
+
element.setAttribute("hx-get", nextRoute);
|
|
38
|
+
}
|
|
39
|
+
htmxWindow.htmx?.process?.(element);
|
|
40
|
+
htmxWindow.htmx?.trigger?.(element, eventName);
|
|
41
|
+
};
|
|
42
|
+
const unsubscribe = stream.subscribe(sync);
|
|
43
|
+
sync();
|
|
44
|
+
return () => {
|
|
45
|
+
unsubscribe();
|
|
46
|
+
};
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
// src/client/microphone.ts
|
|
50
|
+
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
51
|
+
var floatTo16BitPCM = (input) => {
|
|
52
|
+
const output = new Int16Array(input.length);
|
|
53
|
+
for (let index = 0;index < input.length; index += 1) {
|
|
54
|
+
const sample = clampSample(input[index] ?? 0);
|
|
55
|
+
output[index] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
56
|
+
}
|
|
57
|
+
return new Uint8Array(output.buffer);
|
|
58
|
+
};
|
|
59
|
+
var getPcmLevel = (audio) => {
|
|
60
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
61
|
+
if (bytes.byteLength < 2) {
|
|
62
|
+
return 0;
|
|
63
|
+
}
|
|
64
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
65
|
+
if (samples.length === 0) {
|
|
66
|
+
return 0;
|
|
67
|
+
}
|
|
68
|
+
let sumSquares = 0;
|
|
69
|
+
for (const sample of samples) {
|
|
70
|
+
const normalized = sample / 32768;
|
|
71
|
+
sumSquares += normalized * normalized;
|
|
72
|
+
}
|
|
73
|
+
return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
|
|
74
|
+
};
|
|
75
|
+
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
76
|
+
if (sourceRate === targetRate) {
|
|
77
|
+
return input;
|
|
78
|
+
}
|
|
79
|
+
const ratio = sourceRate / targetRate;
|
|
80
|
+
const length = Math.round(input.length / ratio);
|
|
81
|
+
const output = new Float32Array(length);
|
|
82
|
+
let offsetResult = 0;
|
|
83
|
+
let offsetBuffer = 0;
|
|
84
|
+
while (offsetResult < output.length) {
|
|
85
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
86
|
+
let accum = 0;
|
|
87
|
+
let count = 0;
|
|
88
|
+
for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
|
|
89
|
+
accum += input[index] ?? 0;
|
|
90
|
+
count += 1;
|
|
91
|
+
}
|
|
92
|
+
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
93
|
+
offsetResult += 1;
|
|
94
|
+
offsetBuffer = nextOffsetBuffer;
|
|
95
|
+
}
|
|
96
|
+
return output;
|
|
97
|
+
};
|
|
98
|
+
var createMicrophoneCapture = (options) => {
|
|
99
|
+
let audioContext = null;
|
|
100
|
+
let sourceNode = null;
|
|
101
|
+
let processorNode = null;
|
|
102
|
+
let mediaStream = null;
|
|
103
|
+
const start = async () => {
|
|
104
|
+
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
|
|
105
|
+
throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
|
|
106
|
+
}
|
|
107
|
+
const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
|
|
108
|
+
if (!AudioContextCtor) {
|
|
109
|
+
throw new Error("Browser microphone capture requires AudioContext support.");
|
|
110
|
+
}
|
|
111
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
112
|
+
audio: {
|
|
113
|
+
channelCount: options.channelCount ?? 1
|
|
114
|
+
}
|
|
115
|
+
});
|
|
116
|
+
audioContext = new AudioContextCtor;
|
|
117
|
+
sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
118
|
+
processorNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
119
|
+
processorNode.onaudioprocess = (event) => {
|
|
120
|
+
const channel = event.inputBuffer.getChannelData(0);
|
|
121
|
+
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
122
|
+
const pcm = floatTo16BitPCM(downsampled);
|
|
123
|
+
options.onLevel?.(getPcmLevel(pcm));
|
|
124
|
+
options.onAudio(pcm);
|
|
125
|
+
};
|
|
126
|
+
sourceNode.connect(processorNode);
|
|
127
|
+
processorNode.connect(audioContext.destination);
|
|
128
|
+
};
|
|
129
|
+
const stop = () => {
|
|
130
|
+
processorNode?.disconnect();
|
|
131
|
+
sourceNode?.disconnect();
|
|
132
|
+
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
133
|
+
audioContext?.close();
|
|
134
|
+
options.onLevel?.(0);
|
|
135
|
+
audioContext = null;
|
|
136
|
+
mediaStream = null;
|
|
137
|
+
processorNode = null;
|
|
138
|
+
sourceNode = null;
|
|
139
|
+
};
|
|
140
|
+
return { start, stop };
|
|
141
|
+
};
|
|
142
|
+
|
|
1
143
|
// src/client/actions.ts
|
|
2
144
|
var normalizeErrorMessage = (value) => {
|
|
3
145
|
if (typeof value === "string" && value.trim()) {
|
|
@@ -28,6 +170,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
28
170
|
};
|
|
29
171
|
var serverMessageToAction = (message) => {
|
|
30
172
|
switch (message.type) {
|
|
173
|
+
case "audio":
|
|
174
|
+
return {
|
|
175
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
176
|
+
format: message.format,
|
|
177
|
+
receivedAt: message.receivedAt,
|
|
178
|
+
turnId: message.turnId,
|
|
179
|
+
type: "audio"
|
|
180
|
+
};
|
|
31
181
|
case "assistant":
|
|
32
182
|
return {
|
|
33
183
|
text: message.text,
|
|
@@ -56,6 +206,7 @@ var serverMessageToAction = (message) => {
|
|
|
56
206
|
case "session":
|
|
57
207
|
return {
|
|
58
208
|
sessionId: message.sessionId,
|
|
209
|
+
scenarioId: message.scenarioId,
|
|
59
210
|
status: message.status,
|
|
60
211
|
type: "session"
|
|
61
212
|
};
|
|
@@ -76,24 +227,30 @@ var WS_NORMAL_CLOSURE = 1000;
|
|
|
76
227
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
77
228
|
var DEFAULT_PING_INTERVAL = 30000;
|
|
78
229
|
var RECONNECT_DELAY_MS = 500;
|
|
230
|
+
var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
|
|
79
231
|
var noop = () => {};
|
|
80
232
|
var noopUnsubscribe = () => noop;
|
|
81
233
|
var NOOP_CONNECTION = {
|
|
234
|
+
start: () => {},
|
|
82
235
|
close: noop,
|
|
83
236
|
endTurn: noop,
|
|
84
237
|
getReadyState: () => WS_CLOSED,
|
|
238
|
+
getScenarioId: () => "",
|
|
85
239
|
getSessionId: () => "",
|
|
86
240
|
send: noop,
|
|
87
241
|
sendAudio: noop,
|
|
88
242
|
subscribe: noopUnsubscribe
|
|
89
243
|
};
|
|
90
244
|
var createSessionId = () => crypto.randomUUID();
|
|
91
|
-
var buildWsUrl = (path, sessionId) => {
|
|
245
|
+
var buildWsUrl = (path, sessionId, scenarioId) => {
|
|
92
246
|
const { hostname, port, protocol } = window.location;
|
|
93
247
|
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
94
248
|
const portSuffix = port ? `:${port}` : "";
|
|
95
249
|
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
96
250
|
url.searchParams.set("sessionId", sessionId);
|
|
251
|
+
if (scenarioId) {
|
|
252
|
+
url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
|
|
253
|
+
}
|
|
97
254
|
return url.toString();
|
|
98
255
|
};
|
|
99
256
|
var isVoiceServerMessage = (value) => {
|
|
@@ -101,6 +258,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
101
258
|
return false;
|
|
102
259
|
}
|
|
103
260
|
switch (value.type) {
|
|
261
|
+
case "audio":
|
|
104
262
|
case "assistant":
|
|
105
263
|
case "complete":
|
|
106
264
|
case "error":
|
|
@@ -136,6 +294,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
136
294
|
const state = {
|
|
137
295
|
isConnected: false,
|
|
138
296
|
pendingMessages: [],
|
|
297
|
+
scenarioId: options.scenarioId ?? null,
|
|
139
298
|
pingInterval: null,
|
|
140
299
|
reconnectAttempts: 0,
|
|
141
300
|
reconnectTimeout: null,
|
|
@@ -173,13 +332,14 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
173
332
|
}, RECONNECT_DELAY_MS);
|
|
174
333
|
};
|
|
175
334
|
const connect = () => {
|
|
176
|
-
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
335
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
|
|
177
336
|
ws.binaryType = "arraybuffer";
|
|
178
337
|
ws.onopen = () => {
|
|
179
338
|
state.isConnected = true;
|
|
180
339
|
state.reconnectAttempts = 0;
|
|
181
340
|
flushPendingMessages();
|
|
182
341
|
listeners.forEach((listener) => listener({
|
|
342
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
183
343
|
sessionId: state.sessionId,
|
|
184
344
|
status: "active",
|
|
185
345
|
type: "session"
|
|
@@ -197,6 +357,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
197
357
|
}
|
|
198
358
|
if (parsed.type === "session") {
|
|
199
359
|
state.sessionId = parsed.sessionId;
|
|
360
|
+
state.scenarioId = parsed.scenarioId ?? state.scenarioId;
|
|
200
361
|
}
|
|
201
362
|
listeners.forEach((listener) => listener(parsed));
|
|
202
363
|
};
|
|
@@ -220,6 +381,19 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
220
381
|
const send = (message) => {
|
|
221
382
|
sendSerialized(JSON.stringify(message));
|
|
222
383
|
};
|
|
384
|
+
const start = (input = {}) => {
|
|
385
|
+
if (input.sessionId) {
|
|
386
|
+
state.sessionId = input.sessionId;
|
|
387
|
+
}
|
|
388
|
+
if (input.scenarioId) {
|
|
389
|
+
state.scenarioId = input.scenarioId;
|
|
390
|
+
}
|
|
391
|
+
send({
|
|
392
|
+
type: "start",
|
|
393
|
+
sessionId: state.sessionId,
|
|
394
|
+
scenarioId: state.scenarioId ?? undefined
|
|
395
|
+
});
|
|
396
|
+
};
|
|
223
397
|
const sendAudio = (audio) => {
|
|
224
398
|
sendSerialized(audio);
|
|
225
399
|
};
|
|
@@ -243,9 +417,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
243
417
|
};
|
|
244
418
|
connect();
|
|
245
419
|
return {
|
|
420
|
+
start,
|
|
246
421
|
close,
|
|
247
422
|
endTurn,
|
|
248
423
|
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
424
|
+
getScenarioId: () => state.scenarioId ?? "",
|
|
249
425
|
getSessionId: () => state.sessionId,
|
|
250
426
|
send,
|
|
251
427
|
sendAudio,
|
|
@@ -255,9 +431,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
255
431
|
|
|
256
432
|
// src/client/store.ts
|
|
257
433
|
var createInitialState = () => ({
|
|
434
|
+
assistantAudio: [],
|
|
258
435
|
assistantTexts: [],
|
|
259
436
|
error: null,
|
|
260
437
|
isConnected: false,
|
|
438
|
+
scenarioId: null,
|
|
261
439
|
partial: "",
|
|
262
440
|
sessionId: null,
|
|
263
441
|
status: "idle",
|
|
@@ -271,6 +449,20 @@ var createVoiceStreamStore = () => {
|
|
|
271
449
|
};
|
|
272
450
|
const dispatch = (action) => {
|
|
273
451
|
switch (action.type) {
|
|
452
|
+
case "audio":
|
|
453
|
+
state = {
|
|
454
|
+
...state,
|
|
455
|
+
assistantAudio: [
|
|
456
|
+
...state.assistantAudio,
|
|
457
|
+
{
|
|
458
|
+
chunk: action.chunk,
|
|
459
|
+
format: action.format,
|
|
460
|
+
receivedAt: action.receivedAt,
|
|
461
|
+
turnId: action.turnId
|
|
462
|
+
}
|
|
463
|
+
]
|
|
464
|
+
};
|
|
465
|
+
break;
|
|
274
466
|
case "assistant":
|
|
275
467
|
state = {
|
|
276
468
|
...state,
|
|
@@ -319,6 +511,7 @@ var createVoiceStreamStore = () => {
|
|
|
319
511
|
state = {
|
|
320
512
|
...state,
|
|
321
513
|
error: null,
|
|
514
|
+
scenarioId: action.scenarioId ?? state.scenarioId,
|
|
322
515
|
isConnected: action.status === "active",
|
|
323
516
|
sessionId: action.sessionId,
|
|
324
517
|
status: action.status
|
|
@@ -352,6 +545,12 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
352
545
|
const connection = createVoiceConnection(path, options);
|
|
353
546
|
const store = createVoiceStreamStore();
|
|
354
547
|
const subscribers = new Set;
|
|
548
|
+
const start = (input) => Promise.resolve().then(() => {
|
|
549
|
+
if (!input?.sessionId && !input?.scenarioId) {
|
|
550
|
+
return;
|
|
551
|
+
}
|
|
552
|
+
connection.start(input);
|
|
553
|
+
});
|
|
355
554
|
const notify = () => {
|
|
356
555
|
subscribers.forEach((subscriber) => subscriber());
|
|
357
556
|
};
|
|
@@ -384,6 +583,10 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
384
583
|
get isConnected() {
|
|
385
584
|
return store.getSnapshot().isConnected;
|
|
386
585
|
},
|
|
586
|
+
get scenarioId() {
|
|
587
|
+
return store.getSnapshot().scenarioId;
|
|
588
|
+
},
|
|
589
|
+
start,
|
|
387
590
|
get partial() {
|
|
388
591
|
return store.getSnapshot().partial;
|
|
389
592
|
},
|
|
@@ -399,6 +602,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
399
602
|
get assistantTexts() {
|
|
400
603
|
return store.getSnapshot().assistantTexts;
|
|
401
604
|
},
|
|
605
|
+
get assistantAudio() {
|
|
606
|
+
return store.getSnapshot().assistantAudio;
|
|
607
|
+
},
|
|
402
608
|
sendAudio(audio) {
|
|
403
609
|
connection.sendAudio(audio);
|
|
404
610
|
},
|
|
@@ -411,127 +617,449 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
411
617
|
};
|
|
412
618
|
};
|
|
413
619
|
|
|
414
|
-
// src/
|
|
415
|
-
var
|
|
416
|
-
var
|
|
417
|
-
var
|
|
418
|
-
|
|
419
|
-
|
|
620
|
+
// src/audioConditioning.ts
|
|
621
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
622
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
623
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
624
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
625
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
626
|
+
if (!config || config.enabled === false) {
|
|
627
|
+
return;
|
|
420
628
|
}
|
|
421
|
-
return
|
|
629
|
+
return {
|
|
630
|
+
enabled: true,
|
|
631
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
632
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
633
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
634
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
635
|
+
};
|
|
422
636
|
};
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
}
|
|
432
|
-
|
|
637
|
+
|
|
638
|
+
// src/turnProfiles.ts
|
|
639
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
640
|
+
balanced: {
|
|
641
|
+
qualityProfile: "general",
|
|
642
|
+
silenceMs: 1400,
|
|
643
|
+
speechThreshold: 0.012,
|
|
644
|
+
transcriptStabilityMs: 1000
|
|
645
|
+
},
|
|
646
|
+
fast: {
|
|
647
|
+
qualityProfile: "general",
|
|
648
|
+
silenceMs: 700,
|
|
649
|
+
speechThreshold: 0.015,
|
|
650
|
+
transcriptStabilityMs: 450
|
|
651
|
+
},
|
|
652
|
+
"long-form": {
|
|
653
|
+
qualityProfile: "general",
|
|
654
|
+
silenceMs: 2200,
|
|
655
|
+
speechThreshold: 0.01,
|
|
656
|
+
transcriptStabilityMs: 1500
|
|
433
657
|
}
|
|
434
|
-
return `${url.pathname}${url.search}${url.hash}`;
|
|
435
658
|
};
|
|
436
|
-
var
|
|
437
|
-
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
659
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
660
|
+
general: {},
|
|
661
|
+
"accent-heavy": {
|
|
662
|
+
silenceMs: 1200,
|
|
663
|
+
speechThreshold: 0.01,
|
|
664
|
+
transcriptStabilityMs: 1200
|
|
665
|
+
},
|
|
666
|
+
"noisy-room": {
|
|
667
|
+
silenceMs: 2000,
|
|
668
|
+
speechThreshold: 0.02,
|
|
669
|
+
transcriptStabilityMs: 1600
|
|
670
|
+
},
|
|
671
|
+
"short-command": {
|
|
672
|
+
silenceMs: 500,
|
|
673
|
+
speechThreshold: 0.016,
|
|
674
|
+
transcriptStabilityMs: 420
|
|
443
675
|
}
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
unsubscribe();
|
|
676
|
+
};
|
|
677
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
678
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
679
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
680
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
681
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
682
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
683
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
684
|
+
return {
|
|
685
|
+
profile,
|
|
686
|
+
qualityProfile,
|
|
687
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
688
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
689
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
459
690
|
};
|
|
460
691
|
};
|
|
461
692
|
|
|
462
|
-
// src/
|
|
463
|
-
var
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
}
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
693
|
+
// src/presets.ts
|
|
694
|
+
var PRESET_INPUTS = {
|
|
695
|
+
chat: {
|
|
696
|
+
audioConditioning: {
|
|
697
|
+
enabled: true,
|
|
698
|
+
maxGain: 2.5,
|
|
699
|
+
noiseGateAttenuation: 0,
|
|
700
|
+
noiseGateThreshold: 0.004,
|
|
701
|
+
targetLevel: 0.08
|
|
702
|
+
},
|
|
703
|
+
capture: {
|
|
704
|
+
channelCount: 1,
|
|
705
|
+
sampleRateHz: 16000
|
|
706
|
+
},
|
|
707
|
+
connection: {
|
|
708
|
+
maxReconnectAttempts: 10,
|
|
709
|
+
pingInterval: 30000,
|
|
710
|
+
reconnect: true
|
|
711
|
+
},
|
|
712
|
+
sttLifecycle: "continuous",
|
|
713
|
+
turnDetection: {
|
|
714
|
+
qualityProfile: "short-command",
|
|
715
|
+
profile: "balanced"
|
|
716
|
+
}
|
|
717
|
+
},
|
|
718
|
+
default: {
|
|
719
|
+
capture: {
|
|
720
|
+
channelCount: 1,
|
|
721
|
+
sampleRateHz: 16000
|
|
722
|
+
},
|
|
723
|
+
connection: {
|
|
724
|
+
maxReconnectAttempts: 10,
|
|
725
|
+
pingInterval: 30000,
|
|
726
|
+
reconnect: true
|
|
727
|
+
},
|
|
728
|
+
sttLifecycle: "continuous",
|
|
729
|
+
turnDetection: {
|
|
730
|
+
qualityProfile: "general",
|
|
731
|
+
profile: "fast"
|
|
732
|
+
}
|
|
733
|
+
},
|
|
734
|
+
dictation: {
|
|
735
|
+
audioConditioning: {
|
|
736
|
+
enabled: true,
|
|
737
|
+
maxGain: 2.25,
|
|
738
|
+
noiseGateAttenuation: 0.05,
|
|
739
|
+
noiseGateThreshold: 0.003,
|
|
740
|
+
targetLevel: 0.08
|
|
741
|
+
},
|
|
742
|
+
capture: {
|
|
743
|
+
channelCount: 1,
|
|
744
|
+
sampleRateHz: 16000
|
|
745
|
+
},
|
|
746
|
+
connection: {
|
|
747
|
+
maxReconnectAttempts: 12,
|
|
748
|
+
pingInterval: 30000,
|
|
749
|
+
reconnect: true
|
|
750
|
+
},
|
|
751
|
+
sttLifecycle: "continuous",
|
|
752
|
+
turnDetection: {
|
|
753
|
+
qualityProfile: "accent-heavy",
|
|
754
|
+
profile: "long-form"
|
|
755
|
+
}
|
|
756
|
+
},
|
|
757
|
+
"guided-intake": {
|
|
758
|
+
audioConditioning: {
|
|
759
|
+
enabled: true,
|
|
760
|
+
maxGain: 2.5,
|
|
761
|
+
noiseGateAttenuation: 0,
|
|
762
|
+
noiseGateThreshold: 0.004,
|
|
763
|
+
targetLevel: 0.08
|
|
764
|
+
},
|
|
765
|
+
capture: {
|
|
766
|
+
channelCount: 1,
|
|
767
|
+
sampleRateHz: 16000
|
|
768
|
+
},
|
|
769
|
+
connection: {
|
|
770
|
+
maxReconnectAttempts: 12,
|
|
771
|
+
pingInterval: 30000,
|
|
772
|
+
reconnect: true
|
|
773
|
+
},
|
|
774
|
+
sttLifecycle: "turn-scoped",
|
|
775
|
+
turnDetection: {
|
|
776
|
+
qualityProfile: "accent-heavy",
|
|
777
|
+
profile: "long-form"
|
|
778
|
+
}
|
|
779
|
+
},
|
|
780
|
+
"noisy-room": {
|
|
781
|
+
audioConditioning: {
|
|
782
|
+
enabled: true,
|
|
783
|
+
maxGain: 3,
|
|
784
|
+
noiseGateAttenuation: 0.12,
|
|
785
|
+
noiseGateThreshold: 0.006,
|
|
786
|
+
targetLevel: 0.085
|
|
787
|
+
},
|
|
788
|
+
capture: {
|
|
789
|
+
channelCount: 1,
|
|
790
|
+
sampleRateHz: 16000
|
|
791
|
+
},
|
|
792
|
+
connection: {
|
|
793
|
+
maxReconnectAttempts: 14,
|
|
794
|
+
pingInterval: 45000,
|
|
795
|
+
reconnect: true
|
|
796
|
+
},
|
|
797
|
+
sttLifecycle: "continuous",
|
|
798
|
+
turnDetection: {
|
|
799
|
+
qualityProfile: "noisy-room",
|
|
800
|
+
profile: "long-form",
|
|
801
|
+
silenceMs: 2100,
|
|
802
|
+
speechThreshold: 0.02,
|
|
803
|
+
transcriptStabilityMs: 1650
|
|
804
|
+
}
|
|
805
|
+
},
|
|
806
|
+
"pstn-balanced": {
|
|
807
|
+
audioConditioning: {
|
|
808
|
+
enabled: true,
|
|
809
|
+
maxGain: 2.8,
|
|
810
|
+
noiseGateAttenuation: 0.07,
|
|
811
|
+
noiseGateThreshold: 0.005,
|
|
812
|
+
targetLevel: 0.08
|
|
813
|
+
},
|
|
814
|
+
capture: {
|
|
815
|
+
channelCount: 1,
|
|
816
|
+
sampleRateHz: 16000
|
|
817
|
+
},
|
|
818
|
+
connection: {
|
|
819
|
+
maxReconnectAttempts: 14,
|
|
820
|
+
pingInterval: 45000,
|
|
821
|
+
reconnect: true
|
|
822
|
+
},
|
|
823
|
+
sttLifecycle: "continuous",
|
|
824
|
+
turnDetection: {
|
|
825
|
+
qualityProfile: "noisy-room",
|
|
826
|
+
profile: "long-form",
|
|
827
|
+
silenceMs: 660,
|
|
828
|
+
speechThreshold: 0.012,
|
|
829
|
+
transcriptStabilityMs: 300
|
|
830
|
+
}
|
|
831
|
+
},
|
|
832
|
+
"pstn-fast": {
|
|
833
|
+
audioConditioning: {
|
|
834
|
+
enabled: true,
|
|
835
|
+
maxGain: 2.75,
|
|
836
|
+
noiseGateAttenuation: 0.06,
|
|
837
|
+
noiseGateThreshold: 0.005,
|
|
838
|
+
targetLevel: 0.08
|
|
839
|
+
},
|
|
840
|
+
capture: {
|
|
841
|
+
channelCount: 1,
|
|
842
|
+
sampleRateHz: 16000
|
|
843
|
+
},
|
|
844
|
+
connection: {
|
|
845
|
+
maxReconnectAttempts: 14,
|
|
846
|
+
pingInterval: 45000,
|
|
847
|
+
reconnect: true
|
|
848
|
+
},
|
|
849
|
+
sttLifecycle: "continuous",
|
|
850
|
+
turnDetection: {
|
|
851
|
+
qualityProfile: "noisy-room",
|
|
852
|
+
profile: "long-form",
|
|
853
|
+
silenceMs: 620,
|
|
854
|
+
speechThreshold: 0.012,
|
|
855
|
+
transcriptStabilityMs: 280
|
|
856
|
+
}
|
|
857
|
+
},
|
|
858
|
+
reliability: {
|
|
859
|
+
audioConditioning: {
|
|
860
|
+
enabled: true,
|
|
861
|
+
maxGain: 2.9,
|
|
862
|
+
noiseGateAttenuation: 0.08,
|
|
863
|
+
noiseGateThreshold: 0.005,
|
|
864
|
+
targetLevel: 0.08
|
|
865
|
+
},
|
|
866
|
+
capture: {
|
|
867
|
+
channelCount: 1,
|
|
868
|
+
sampleRateHz: 16000
|
|
869
|
+
},
|
|
870
|
+
connection: {
|
|
871
|
+
maxReconnectAttempts: 14,
|
|
872
|
+
pingInterval: 45000,
|
|
873
|
+
reconnect: true
|
|
874
|
+
},
|
|
875
|
+
sttLifecycle: "continuous",
|
|
876
|
+
turnDetection: {
|
|
877
|
+
qualityProfile: "noisy-room",
|
|
878
|
+
profile: "long-form"
|
|
488
879
|
}
|
|
489
|
-
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
490
|
-
offsetResult += 1;
|
|
491
|
-
offsetBuffer = nextOffsetBuffer;
|
|
492
880
|
}
|
|
493
|
-
return output;
|
|
494
881
|
};
|
|
495
|
-
var
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
882
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
883
|
+
const preset = PRESET_INPUTS[name];
|
|
884
|
+
return {
|
|
885
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
886
|
+
capture: {
|
|
887
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
888
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
889
|
+
},
|
|
890
|
+
connection: {
|
|
891
|
+
...preset.connection
|
|
892
|
+
},
|
|
893
|
+
name,
|
|
894
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
895
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
896
|
+
};
|
|
897
|
+
};
|
|
898
|
+
|
|
899
|
+
// src/client/controller.ts
|
|
900
|
+
var createInitialState2 = (stream) => ({
|
|
901
|
+
assistantAudio: [...stream.assistantAudio],
|
|
902
|
+
assistantTexts: [...stream.assistantTexts],
|
|
903
|
+
error: stream.error,
|
|
904
|
+
isConnected: stream.isConnected,
|
|
905
|
+
isRecording: false,
|
|
906
|
+
partial: stream.partial,
|
|
907
|
+
recordingError: null,
|
|
908
|
+
sessionId: stream.sessionId,
|
|
909
|
+
scenarioId: stream.scenarioId,
|
|
910
|
+
status: stream.status,
|
|
911
|
+
turns: [...stream.turns]
|
|
912
|
+
});
|
|
913
|
+
var createVoiceController = (path, options = {}) => {
|
|
914
|
+
const preset = resolveVoiceRuntimePreset(options.preset);
|
|
915
|
+
const stream = createVoiceStream(path, {
|
|
916
|
+
...preset.connection,
|
|
917
|
+
...options.connection
|
|
918
|
+
});
|
|
919
|
+
let capture = null;
|
|
920
|
+
let state = createInitialState2(stream);
|
|
921
|
+
const subscribers = new Set;
|
|
922
|
+
const notify = () => {
|
|
923
|
+
for (const subscriber of subscribers) {
|
|
924
|
+
subscriber();
|
|
503
925
|
}
|
|
504
|
-
|
|
505
|
-
|
|
506
|
-
|
|
926
|
+
};
|
|
927
|
+
const sync = () => {
|
|
928
|
+
state = {
|
|
929
|
+
...state,
|
|
930
|
+
assistantAudio: [...stream.assistantAudio],
|
|
931
|
+
assistantTexts: [...stream.assistantTexts],
|
|
932
|
+
error: stream.error,
|
|
933
|
+
isConnected: stream.isConnected,
|
|
934
|
+
partial: stream.partial,
|
|
935
|
+
sessionId: stream.sessionId,
|
|
936
|
+
scenarioId: stream.scenarioId,
|
|
937
|
+
status: stream.status,
|
|
938
|
+
turns: [...stream.turns]
|
|
939
|
+
};
|
|
940
|
+
if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
|
|
941
|
+
capture?.stop();
|
|
942
|
+
capture = null;
|
|
943
|
+
state = {
|
|
944
|
+
...state,
|
|
945
|
+
isRecording: false
|
|
946
|
+
};
|
|
507
947
|
}
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
948
|
+
notify();
|
|
949
|
+
};
|
|
950
|
+
const unsubscribeStream = stream.subscribe(sync);
|
|
951
|
+
sync();
|
|
952
|
+
const ensureCapture = () => {
|
|
953
|
+
if (capture) {
|
|
954
|
+
return capture;
|
|
955
|
+
}
|
|
956
|
+
capture = createMicrophoneCapture({
|
|
957
|
+
channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
|
|
958
|
+
onLevel: options.capture?.onLevel,
|
|
959
|
+
onAudio: (audio) => stream.sendAudio(audio),
|
|
960
|
+
sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
|
|
512
961
|
});
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
962
|
+
return capture;
|
|
963
|
+
};
|
|
964
|
+
const stopRecording = () => {
|
|
965
|
+
capture?.stop();
|
|
966
|
+
capture = null;
|
|
967
|
+
state = {
|
|
968
|
+
...state,
|
|
969
|
+
isRecording: false
|
|
520
970
|
};
|
|
521
|
-
|
|
522
|
-
processorNode.connect(audioContext.destination);
|
|
971
|
+
notify();
|
|
523
972
|
};
|
|
524
|
-
const
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
973
|
+
const startRecording = async () => {
|
|
974
|
+
if (state.isRecording) {
|
|
975
|
+
return;
|
|
976
|
+
}
|
|
977
|
+
try {
|
|
978
|
+
state = {
|
|
979
|
+
...state,
|
|
980
|
+
recordingError: null
|
|
981
|
+
};
|
|
982
|
+
notify();
|
|
983
|
+
await ensureCapture().start();
|
|
984
|
+
state = {
|
|
985
|
+
...state,
|
|
986
|
+
isRecording: true
|
|
987
|
+
};
|
|
988
|
+
notify();
|
|
989
|
+
} catch (error) {
|
|
990
|
+
capture = null;
|
|
991
|
+
state = {
|
|
992
|
+
...state,
|
|
993
|
+
isRecording: false,
|
|
994
|
+
recordingError: error instanceof Error ? error.message : String(error)
|
|
995
|
+
};
|
|
996
|
+
notify();
|
|
997
|
+
throw error;
|
|
998
|
+
}
|
|
999
|
+
};
|
|
1000
|
+
const close = () => {
|
|
1001
|
+
unsubscribeStream();
|
|
1002
|
+
stopRecording();
|
|
1003
|
+
stream.close();
|
|
1004
|
+
};
|
|
1005
|
+
return {
|
|
1006
|
+
bindHTMX(bindingOptions) {
|
|
1007
|
+
return bindVoiceHTMX(stream, bindingOptions);
|
|
1008
|
+
},
|
|
1009
|
+
close,
|
|
1010
|
+
endTurn: () => stream.endTurn(),
|
|
1011
|
+
get error() {
|
|
1012
|
+
return state.error;
|
|
1013
|
+
},
|
|
1014
|
+
getServerSnapshot: () => state,
|
|
1015
|
+
getSnapshot: () => state,
|
|
1016
|
+
get isConnected() {
|
|
1017
|
+
return state.isConnected;
|
|
1018
|
+
},
|
|
1019
|
+
get isRecording() {
|
|
1020
|
+
return state.isRecording;
|
|
1021
|
+
},
|
|
1022
|
+
get partial() {
|
|
1023
|
+
return state.partial;
|
|
1024
|
+
},
|
|
1025
|
+
get recordingError() {
|
|
1026
|
+
return state.recordingError;
|
|
1027
|
+
},
|
|
1028
|
+
sendAudio: (audio) => stream.sendAudio(audio),
|
|
1029
|
+
get sessionId() {
|
|
1030
|
+
return state.sessionId;
|
|
1031
|
+
},
|
|
1032
|
+
get scenarioId() {
|
|
1033
|
+
return state.scenarioId;
|
|
1034
|
+
},
|
|
1035
|
+
startRecording,
|
|
1036
|
+
get status() {
|
|
1037
|
+
return state.status;
|
|
1038
|
+
},
|
|
1039
|
+
stopRecording,
|
|
1040
|
+
subscribe: (subscriber) => {
|
|
1041
|
+
subscribers.add(subscriber);
|
|
1042
|
+
return () => {
|
|
1043
|
+
subscribers.delete(subscriber);
|
|
1044
|
+
};
|
|
1045
|
+
},
|
|
1046
|
+
toggleRecording: async () => {
|
|
1047
|
+
if (state.isRecording) {
|
|
1048
|
+
stopRecording();
|
|
1049
|
+
return;
|
|
1050
|
+
}
|
|
1051
|
+
await startRecording();
|
|
1052
|
+
},
|
|
1053
|
+
get turns() {
|
|
1054
|
+
return state.turns;
|
|
1055
|
+
},
|
|
1056
|
+
get assistantTexts() {
|
|
1057
|
+
return state.assistantTexts;
|
|
1058
|
+
},
|
|
1059
|
+
get assistantAudio() {
|
|
1060
|
+
return state.assistantAudio;
|
|
1061
|
+
}
|
|
533
1062
|
};
|
|
534
|
-
return { start, stop };
|
|
535
1063
|
};
|
|
536
1064
|
|
|
537
1065
|
// src/client/htmxBootstrap.ts
|
|
@@ -540,10 +1068,10 @@ var VOICE_WAVE_WIDTH = 320;
|
|
|
540
1068
|
var VOICE_WAVE_HEIGHT = 88;
|
|
541
1069
|
var DEFAULT_GUIDED_LABEL = "Guided test";
|
|
542
1070
|
var DEFAULT_GENERAL_LABEL = "General recording";
|
|
543
|
-
var DEFAULT_IDLE_LEAD = "Pick a
|
|
1071
|
+
var DEFAULT_IDLE_LEAD = "Pick a scenario to begin the demo.";
|
|
544
1072
|
var DEFAULT_GUIDED_LEAD = "I can walk you through a short guided voice test.";
|
|
545
1073
|
var DEFAULT_GENERAL_LEAD = "I can capture one freeform recording and confirm that it landed.";
|
|
546
|
-
var DEFAULT_IDLE_PROMPT = "Choose a
|
|
1074
|
+
var DEFAULT_IDLE_PROMPT = "Choose a scenario to begin. Guided test asks follow-up prompts. General recording just captures what you say.";
|
|
547
1075
|
var DEFAULT_GENERAL_IDLE_PROMPT = "Click Start general recording to capture one freeform answer.";
|
|
548
1076
|
var DEFAULT_GENERAL_LIVE_PROMPT = "Speak freely. When you pause, the recording will be captured.";
|
|
549
1077
|
var DEFAULT_GENERAL_COMPLETE_PROMPT = "Recording saved. Start again if you want another capture.";
|
|
@@ -631,23 +1159,6 @@ var createVoiceWavePath = (levels, width = VOICE_WAVE_WIDTH, height = VOICE_WAVE
|
|
|
631
1159
|
}
|
|
632
1160
|
return path;
|
|
633
1161
|
};
|
|
634
|
-
var getPcmLevel = (audio) => {
|
|
635
|
-
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
636
|
-
if (bytes.byteLength < 2) {
|
|
637
|
-
return 0;
|
|
638
|
-
}
|
|
639
|
-
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
640
|
-
if (samples.length === 0) {
|
|
641
|
-
return 0;
|
|
642
|
-
}
|
|
643
|
-
let sumSquares = 0;
|
|
644
|
-
for (const sample of samples) {
|
|
645
|
-
const normalized = sample / 32768;
|
|
646
|
-
sumSquares += normalized * normalized;
|
|
647
|
-
}
|
|
648
|
-
const rms = Math.sqrt(sumSquares / samples.length);
|
|
649
|
-
return clamp(rms * 5.5, 0, 1);
|
|
650
|
-
};
|
|
651
1162
|
var parsePromptList = (value) => {
|
|
652
1163
|
if (!value) {
|
|
653
1164
|
return DEFAULT_GUIDED_PROMPTS;
|
|
@@ -704,35 +1215,6 @@ var resolvePromptMessage = (input) => {
|
|
|
704
1215
|
}
|
|
705
1216
|
return input.guidedPrompts[input.turnCount] ?? DEFAULT_GUIDED_OVERFLOW_PROMPT;
|
|
706
1217
|
};
|
|
707
|
-
var createDemoMicrophone = (onAudio, onLevel) => {
|
|
708
|
-
let capture = null;
|
|
709
|
-
return {
|
|
710
|
-
start: async () => {
|
|
711
|
-
if (capture) {
|
|
712
|
-
return;
|
|
713
|
-
}
|
|
714
|
-
const nextCapture = createMicrophoneCapture({
|
|
715
|
-
onAudio: (audio) => {
|
|
716
|
-
onLevel(getPcmLevel(audio));
|
|
717
|
-
onAudio(audio);
|
|
718
|
-
},
|
|
719
|
-
sampleRateHz: 16000
|
|
720
|
-
});
|
|
721
|
-
capture = nextCapture;
|
|
722
|
-
try {
|
|
723
|
-
await capture.start();
|
|
724
|
-
} catch (error) {
|
|
725
|
-
capture = null;
|
|
726
|
-
throw error;
|
|
727
|
-
}
|
|
728
|
-
},
|
|
729
|
-
stop: () => {
|
|
730
|
-
capture?.stop();
|
|
731
|
-
capture = null;
|
|
732
|
-
onLevel(0);
|
|
733
|
-
}
|
|
734
|
-
};
|
|
735
|
-
};
|
|
736
1218
|
var initVoiceHTMXRoot = (root) => {
|
|
737
1219
|
const guidedPath = root.dataset.voiceGuidedPath;
|
|
738
1220
|
const generalPath = root.dataset.voiceGeneralPath;
|
|
@@ -755,12 +1237,26 @@ var initVoiceHTMXRoot = (root) => {
|
|
|
755
1237
|
const voiceMonitorCopy = requireElement(root, root.dataset.voiceMonitorCopy, HTMLElement, "voice-monitor-copy");
|
|
756
1238
|
const voiceWaveGlow = requireElement(root, root.dataset.voiceWaveGlow, SVGPathElement, "voice-wave-glow");
|
|
757
1239
|
const voiceWavePath = requireElement(root, root.dataset.voiceWavePath, SVGPathElement, "voice-wave-path");
|
|
758
|
-
const guidedVoice =
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
1240
|
+
const guidedVoice = createVoiceController(guidedPath, {
|
|
1241
|
+
capture: {
|
|
1242
|
+
onLevel: (level) => {
|
|
1243
|
+
waveLevels = pushVoiceWaveLevel(waveLevels, level);
|
|
1244
|
+
renderWave();
|
|
1245
|
+
}
|
|
1246
|
+
},
|
|
1247
|
+
preset: "guided-intake"
|
|
1248
|
+
});
|
|
1249
|
+
const generalVoice = createVoiceController(generalPath, {
|
|
1250
|
+
capture: {
|
|
1251
|
+
onLevel: (level) => {
|
|
1252
|
+
waveLevels = pushVoiceWaveLevel(waveLevels, level);
|
|
1253
|
+
renderWave();
|
|
1254
|
+
}
|
|
1255
|
+
},
|
|
1256
|
+
preset: "dictation"
|
|
763
1257
|
});
|
|
1258
|
+
const stopGuidedBinding = guidedVoice.bindHTMX({ element: syncElement });
|
|
1259
|
+
const stopGeneralBinding = generalVoice.bindHTMX({ element: syncElement });
|
|
764
1260
|
let activeMode = null;
|
|
765
1261
|
let hasStartedModes = {
|
|
766
1262
|
general: false,
|
|
@@ -821,12 +1317,8 @@ var initVoiceHTMXRoot = (root) => {
|
|
|
821
1317
|
</article>` : ""}`;
|
|
822
1318
|
renderWave();
|
|
823
1319
|
};
|
|
824
|
-
const microphone = createDemoMicrophone((audio) => currentVoice().sendAudio(audio), (level) => {
|
|
825
|
-
waveLevels = pushVoiceWaveLevel(waveLevels, level);
|
|
826
|
-
renderWave();
|
|
827
|
-
});
|
|
828
1320
|
const stopMic = () => {
|
|
829
|
-
|
|
1321
|
+
currentVoice().stopRecording();
|
|
830
1322
|
isCapturing = false;
|
|
831
1323
|
micError = null;
|
|
832
1324
|
waveLevels = createInitialVoiceWaveLevels();
|
|
@@ -839,12 +1331,12 @@ var initVoiceHTMXRoot = (root) => {
|
|
|
839
1331
|
[mode]: true
|
|
840
1332
|
};
|
|
841
1333
|
try {
|
|
842
|
-
await
|
|
1334
|
+
await currentVoice().startRecording();
|
|
843
1335
|
micError = null;
|
|
844
1336
|
isCapturing = true;
|
|
845
1337
|
render();
|
|
846
1338
|
} catch (error) {
|
|
847
|
-
|
|
1339
|
+
currentVoice().stopRecording();
|
|
848
1340
|
isCapturing = false;
|
|
849
1341
|
waveLevels = createInitialVoiceWaveLevels();
|
|
850
1342
|
micError = formatErrorMessage(error);
|
|
@@ -863,7 +1355,8 @@ var initVoiceHTMXRoot = (root) => {
|
|
|
863
1355
|
stopMic();
|
|
864
1356
|
});
|
|
865
1357
|
window.addEventListener("beforeunload", () => {
|
|
866
|
-
|
|
1358
|
+
guidedVoice.stopRecording();
|
|
1359
|
+
generalVoice.stopRecording();
|
|
867
1360
|
stopGuidedBinding();
|
|
868
1361
|
stopGeneralBinding();
|
|
869
1362
|
guidedVoice.close();
|