@absolutejs/voice 0.0.20 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +884 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +759 -3
- package/dist/angular/voice-controller.service.d.ts +27 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +48 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +660 -167
- package/dist/client/index.d.ts +3 -0
- package/dist/client/index.js +991 -6
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +33 -0
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +3721 -298
- package/dist/ops.d.ts +100 -0
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +728 -3
- package/dist/react/useVoiceController.d.ts +26 -0
- package/dist/react/useVoiceStream.d.ts +7 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +691 -3
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +93 -2
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +6247 -402
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +92 -2
- package/dist/testing/stt.d.ts +3 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +487 -10
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +750 -3
- package/dist/vue/useVoiceController.d.ts +30 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +135 -1
package/dist/react/index.js
CHANGED
|
@@ -102,6 +102,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
102
102
|
};
|
|
103
103
|
var serverMessageToAction = (message) => {
|
|
104
104
|
switch (message.type) {
|
|
105
|
+
case "audio":
|
|
106
|
+
return {
|
|
107
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
108
|
+
format: message.format,
|
|
109
|
+
receivedAt: message.receivedAt,
|
|
110
|
+
turnId: message.turnId,
|
|
111
|
+
type: "audio"
|
|
112
|
+
};
|
|
105
113
|
case "assistant":
|
|
106
114
|
return {
|
|
107
115
|
text: message.text,
|
|
@@ -130,6 +138,7 @@ var serverMessageToAction = (message) => {
|
|
|
130
138
|
case "session":
|
|
131
139
|
return {
|
|
132
140
|
sessionId: message.sessionId,
|
|
141
|
+
scenarioId: message.scenarioId,
|
|
133
142
|
status: message.status,
|
|
134
143
|
type: "session"
|
|
135
144
|
};
|
|
@@ -150,24 +159,30 @@ var WS_NORMAL_CLOSURE = 1000;
|
|
|
150
159
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
151
160
|
var DEFAULT_PING_INTERVAL = 30000;
|
|
152
161
|
var RECONNECT_DELAY_MS = 500;
|
|
162
|
+
var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
|
|
153
163
|
var noop = () => {};
|
|
154
164
|
var noopUnsubscribe = () => noop;
|
|
155
165
|
var NOOP_CONNECTION = {
|
|
166
|
+
start: () => {},
|
|
156
167
|
close: noop,
|
|
157
168
|
endTurn: noop,
|
|
158
169
|
getReadyState: () => WS_CLOSED,
|
|
170
|
+
getScenarioId: () => "",
|
|
159
171
|
getSessionId: () => "",
|
|
160
172
|
send: noop,
|
|
161
173
|
sendAudio: noop,
|
|
162
174
|
subscribe: noopUnsubscribe
|
|
163
175
|
};
|
|
164
176
|
var createSessionId = () => crypto.randomUUID();
|
|
165
|
-
var buildWsUrl = (path, sessionId) => {
|
|
177
|
+
var buildWsUrl = (path, sessionId, scenarioId) => {
|
|
166
178
|
const { hostname, port, protocol } = window.location;
|
|
167
179
|
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
168
180
|
const portSuffix = port ? `:${port}` : "";
|
|
169
181
|
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
170
182
|
url.searchParams.set("sessionId", sessionId);
|
|
183
|
+
if (scenarioId) {
|
|
184
|
+
url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
|
|
185
|
+
}
|
|
171
186
|
return url.toString();
|
|
172
187
|
};
|
|
173
188
|
var isVoiceServerMessage = (value) => {
|
|
@@ -175,6 +190,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
175
190
|
return false;
|
|
176
191
|
}
|
|
177
192
|
switch (value.type) {
|
|
193
|
+
case "audio":
|
|
178
194
|
case "assistant":
|
|
179
195
|
case "complete":
|
|
180
196
|
case "error":
|
|
@@ -210,6 +226,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
210
226
|
const state = {
|
|
211
227
|
isConnected: false,
|
|
212
228
|
pendingMessages: [],
|
|
229
|
+
scenarioId: options.scenarioId ?? null,
|
|
213
230
|
pingInterval: null,
|
|
214
231
|
reconnectAttempts: 0,
|
|
215
232
|
reconnectTimeout: null,
|
|
@@ -247,13 +264,14 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
247
264
|
}, RECONNECT_DELAY_MS);
|
|
248
265
|
};
|
|
249
266
|
const connect = () => {
|
|
250
|
-
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
267
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
|
|
251
268
|
ws.binaryType = "arraybuffer";
|
|
252
269
|
ws.onopen = () => {
|
|
253
270
|
state.isConnected = true;
|
|
254
271
|
state.reconnectAttempts = 0;
|
|
255
272
|
flushPendingMessages();
|
|
256
273
|
listeners.forEach((listener) => listener({
|
|
274
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
257
275
|
sessionId: state.sessionId,
|
|
258
276
|
status: "active",
|
|
259
277
|
type: "session"
|
|
@@ -271,6 +289,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
271
289
|
}
|
|
272
290
|
if (parsed.type === "session") {
|
|
273
291
|
state.sessionId = parsed.sessionId;
|
|
292
|
+
state.scenarioId = parsed.scenarioId ?? state.scenarioId;
|
|
274
293
|
}
|
|
275
294
|
listeners.forEach((listener) => listener(parsed));
|
|
276
295
|
};
|
|
@@ -294,6 +313,19 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
294
313
|
const send = (message) => {
|
|
295
314
|
sendSerialized(JSON.stringify(message));
|
|
296
315
|
};
|
|
316
|
+
const start = (input = {}) => {
|
|
317
|
+
if (input.sessionId) {
|
|
318
|
+
state.sessionId = input.sessionId;
|
|
319
|
+
}
|
|
320
|
+
if (input.scenarioId) {
|
|
321
|
+
state.scenarioId = input.scenarioId;
|
|
322
|
+
}
|
|
323
|
+
send({
|
|
324
|
+
type: "start",
|
|
325
|
+
sessionId: state.sessionId,
|
|
326
|
+
scenarioId: state.scenarioId ?? undefined
|
|
327
|
+
});
|
|
328
|
+
};
|
|
297
329
|
const sendAudio = (audio) => {
|
|
298
330
|
sendSerialized(audio);
|
|
299
331
|
};
|
|
@@ -317,9 +349,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
317
349
|
};
|
|
318
350
|
connect();
|
|
319
351
|
return {
|
|
352
|
+
start,
|
|
320
353
|
close,
|
|
321
354
|
endTurn,
|
|
322
355
|
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
356
|
+
getScenarioId: () => state.scenarioId ?? "",
|
|
323
357
|
getSessionId: () => state.sessionId,
|
|
324
358
|
send,
|
|
325
359
|
sendAudio,
|
|
@@ -329,9 +363,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
329
363
|
|
|
330
364
|
// src/client/store.ts
|
|
331
365
|
var createInitialState = () => ({
|
|
366
|
+
assistantAudio: [],
|
|
332
367
|
assistantTexts: [],
|
|
333
368
|
error: null,
|
|
334
369
|
isConnected: false,
|
|
370
|
+
scenarioId: null,
|
|
335
371
|
partial: "",
|
|
336
372
|
sessionId: null,
|
|
337
373
|
status: "idle",
|
|
@@ -345,6 +381,20 @@ var createVoiceStreamStore = () => {
|
|
|
345
381
|
};
|
|
346
382
|
const dispatch = (action) => {
|
|
347
383
|
switch (action.type) {
|
|
384
|
+
case "audio":
|
|
385
|
+
state = {
|
|
386
|
+
...state,
|
|
387
|
+
assistantAudio: [
|
|
388
|
+
...state.assistantAudio,
|
|
389
|
+
{
|
|
390
|
+
chunk: action.chunk,
|
|
391
|
+
format: action.format,
|
|
392
|
+
receivedAt: action.receivedAt,
|
|
393
|
+
turnId: action.turnId
|
|
394
|
+
}
|
|
395
|
+
]
|
|
396
|
+
};
|
|
397
|
+
break;
|
|
348
398
|
case "assistant":
|
|
349
399
|
state = {
|
|
350
400
|
...state,
|
|
@@ -393,6 +443,7 @@ var createVoiceStreamStore = () => {
|
|
|
393
443
|
state = {
|
|
394
444
|
...state,
|
|
395
445
|
error: null,
|
|
446
|
+
scenarioId: action.scenarioId ?? state.scenarioId,
|
|
396
447
|
isConnected: action.status === "active",
|
|
397
448
|
sessionId: action.sessionId,
|
|
398
449
|
status: action.status
|
|
@@ -426,6 +477,12 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
426
477
|
const connection = createVoiceConnection(path, options);
|
|
427
478
|
const store = createVoiceStreamStore();
|
|
428
479
|
const subscribers = new Set;
|
|
480
|
+
const start = (input) => Promise.resolve().then(() => {
|
|
481
|
+
if (!input?.sessionId && !input?.scenarioId) {
|
|
482
|
+
return;
|
|
483
|
+
}
|
|
484
|
+
connection.start(input);
|
|
485
|
+
});
|
|
429
486
|
const notify = () => {
|
|
430
487
|
subscribers.forEach((subscriber) => subscriber());
|
|
431
488
|
};
|
|
@@ -458,6 +515,10 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
458
515
|
get isConnected() {
|
|
459
516
|
return store.getSnapshot().isConnected;
|
|
460
517
|
},
|
|
518
|
+
get scenarioId() {
|
|
519
|
+
return store.getSnapshot().scenarioId;
|
|
520
|
+
},
|
|
521
|
+
start,
|
|
461
522
|
get partial() {
|
|
462
523
|
return store.getSnapshot().partial;
|
|
463
524
|
},
|
|
@@ -473,6 +534,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
473
534
|
get assistantTexts() {
|
|
474
535
|
return store.getSnapshot().assistantTexts;
|
|
475
536
|
},
|
|
537
|
+
get assistantAudio() {
|
|
538
|
+
return store.getSnapshot().assistantAudio;
|
|
539
|
+
},
|
|
476
540
|
sendAudio(audio) {
|
|
477
541
|
connection.sendAudio(audio);
|
|
478
542
|
},
|
|
@@ -487,6 +551,7 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
487
551
|
|
|
488
552
|
// src/react/useVoiceStream.tsx
|
|
489
553
|
var EMPTY_SNAPSHOT = {
|
|
554
|
+
assistantAudio: [],
|
|
490
555
|
assistantTexts: [],
|
|
491
556
|
error: null,
|
|
492
557
|
isConnected: false,
|
|
@@ -510,6 +575,666 @@ var useVoiceStream = (path, options = {}) => {
|
|
|
510
575
|
sendAudio: (audio) => stream.sendAudio(audio)
|
|
511
576
|
};
|
|
512
577
|
};
|
|
578
|
+
// src/react/useVoiceController.tsx
|
|
579
|
+
import { useEffect as useEffect2, useRef as useRef2, useSyncExternalStore as useSyncExternalStore2 } from "react";
|
|
580
|
+
|
|
581
|
+
// src/client/htmx.ts
|
|
582
|
+
var DEFAULT_EVENT_NAME = "voice-refresh";
|
|
583
|
+
var DEFAULT_QUERY_PARAM = "sessionId";
|
|
584
|
+
var resolveElement = (input) => {
|
|
585
|
+
if (typeof input !== "string") {
|
|
586
|
+
return input;
|
|
587
|
+
}
|
|
588
|
+
return document.querySelector(input);
|
|
589
|
+
};
|
|
590
|
+
var buildRoute = (element, route, queryParam, sessionId) => {
|
|
591
|
+
const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
|
|
592
|
+
if (!baseRoute) {
|
|
593
|
+
return "";
|
|
594
|
+
}
|
|
595
|
+
const url = new URL(baseRoute, window.location.origin);
|
|
596
|
+
if (sessionId) {
|
|
597
|
+
url.searchParams.set(queryParam, sessionId);
|
|
598
|
+
} else {
|
|
599
|
+
url.searchParams.delete(queryParam);
|
|
600
|
+
}
|
|
601
|
+
return `${url.pathname}${url.search}${url.hash}`;
|
|
602
|
+
};
|
|
603
|
+
var bindVoiceHTMX = (stream, options) => {
|
|
604
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
605
|
+
return () => {};
|
|
606
|
+
}
|
|
607
|
+
const element = resolveElement(options.element);
|
|
608
|
+
if (!element) {
|
|
609
|
+
return () => {};
|
|
610
|
+
}
|
|
611
|
+
const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
|
|
612
|
+
const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
|
|
613
|
+
const sync = () => {
|
|
614
|
+
const htmxWindow = window;
|
|
615
|
+
const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
|
|
616
|
+
if (nextRoute) {
|
|
617
|
+
element.setAttribute("hx-get", nextRoute);
|
|
618
|
+
}
|
|
619
|
+
htmxWindow.htmx?.process?.(element);
|
|
620
|
+
htmxWindow.htmx?.trigger?.(element, eventName);
|
|
621
|
+
};
|
|
622
|
+
const unsubscribe = stream.subscribe(sync);
|
|
623
|
+
sync();
|
|
624
|
+
return () => {
|
|
625
|
+
unsubscribe();
|
|
626
|
+
};
|
|
627
|
+
};
|
|
628
|
+
|
|
629
|
+
// src/client/microphone.ts
|
|
630
|
+
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
631
|
+
var floatTo16BitPCM = (input) => {
|
|
632
|
+
const output = new Int16Array(input.length);
|
|
633
|
+
for (let index = 0;index < input.length; index += 1) {
|
|
634
|
+
const sample = clampSample(input[index] ?? 0);
|
|
635
|
+
output[index] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
636
|
+
}
|
|
637
|
+
return new Uint8Array(output.buffer);
|
|
638
|
+
};
|
|
639
|
+
var getPcmLevel = (audio) => {
|
|
640
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
641
|
+
if (bytes.byteLength < 2) {
|
|
642
|
+
return 0;
|
|
643
|
+
}
|
|
644
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
645
|
+
if (samples.length === 0) {
|
|
646
|
+
return 0;
|
|
647
|
+
}
|
|
648
|
+
let sumSquares = 0;
|
|
649
|
+
for (const sample of samples) {
|
|
650
|
+
const normalized = sample / 32768;
|
|
651
|
+
sumSquares += normalized * normalized;
|
|
652
|
+
}
|
|
653
|
+
return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
|
|
654
|
+
};
|
|
655
|
+
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
656
|
+
if (sourceRate === targetRate) {
|
|
657
|
+
return input;
|
|
658
|
+
}
|
|
659
|
+
const ratio = sourceRate / targetRate;
|
|
660
|
+
const length = Math.round(input.length / ratio);
|
|
661
|
+
const output = new Float32Array(length);
|
|
662
|
+
let offsetResult = 0;
|
|
663
|
+
let offsetBuffer = 0;
|
|
664
|
+
while (offsetResult < output.length) {
|
|
665
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
666
|
+
let accum = 0;
|
|
667
|
+
let count = 0;
|
|
668
|
+
for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
|
|
669
|
+
accum += input[index] ?? 0;
|
|
670
|
+
count += 1;
|
|
671
|
+
}
|
|
672
|
+
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
673
|
+
offsetResult += 1;
|
|
674
|
+
offsetBuffer = nextOffsetBuffer;
|
|
675
|
+
}
|
|
676
|
+
return output;
|
|
677
|
+
};
|
|
678
|
+
var createMicrophoneCapture = (options) => {
|
|
679
|
+
let audioContext = null;
|
|
680
|
+
let sourceNode = null;
|
|
681
|
+
let processorNode = null;
|
|
682
|
+
let mediaStream = null;
|
|
683
|
+
const start = async () => {
|
|
684
|
+
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
|
|
685
|
+
throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
|
|
686
|
+
}
|
|
687
|
+
const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
|
|
688
|
+
if (!AudioContextCtor) {
|
|
689
|
+
throw new Error("Browser microphone capture requires AudioContext support.");
|
|
690
|
+
}
|
|
691
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
692
|
+
audio: {
|
|
693
|
+
channelCount: options.channelCount ?? 1
|
|
694
|
+
}
|
|
695
|
+
});
|
|
696
|
+
audioContext = new AudioContextCtor;
|
|
697
|
+
sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
698
|
+
processorNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
699
|
+
processorNode.onaudioprocess = (event) => {
|
|
700
|
+
const channel = event.inputBuffer.getChannelData(0);
|
|
701
|
+
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
702
|
+
const pcm = floatTo16BitPCM(downsampled);
|
|
703
|
+
options.onLevel?.(getPcmLevel(pcm));
|
|
704
|
+
options.onAudio(pcm);
|
|
705
|
+
};
|
|
706
|
+
sourceNode.connect(processorNode);
|
|
707
|
+
processorNode.connect(audioContext.destination);
|
|
708
|
+
};
|
|
709
|
+
const stop = () => {
|
|
710
|
+
processorNode?.disconnect();
|
|
711
|
+
sourceNode?.disconnect();
|
|
712
|
+
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
713
|
+
audioContext?.close();
|
|
714
|
+
options.onLevel?.(0);
|
|
715
|
+
audioContext = null;
|
|
716
|
+
mediaStream = null;
|
|
717
|
+
processorNode = null;
|
|
718
|
+
sourceNode = null;
|
|
719
|
+
};
|
|
720
|
+
return { start, stop };
|
|
721
|
+
};
|
|
722
|
+
|
|
723
|
+
// src/audioConditioning.ts
|
|
724
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
725
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
726
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
727
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
728
|
+
var toInt16Array = (audio) => {
|
|
729
|
+
if (audio instanceof ArrayBuffer) {
|
|
730
|
+
return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
|
|
731
|
+
}
|
|
732
|
+
return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
|
|
733
|
+
};
|
|
734
|
+
var computeRms = (samples) => {
|
|
735
|
+
if (samples.length === 0) {
|
|
736
|
+
return 0;
|
|
737
|
+
}
|
|
738
|
+
let sumSquares = 0;
|
|
739
|
+
for (const sample of samples) {
|
|
740
|
+
const normalized = sample / 32768;
|
|
741
|
+
sumSquares += normalized * normalized;
|
|
742
|
+
}
|
|
743
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
744
|
+
};
|
|
745
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
746
|
+
if (!config || config.enabled === false) {
|
|
747
|
+
return;
|
|
748
|
+
}
|
|
749
|
+
return {
|
|
750
|
+
enabled: true,
|
|
751
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
752
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
753
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
754
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
755
|
+
};
|
|
756
|
+
};
|
|
757
|
+
var conditionAudioChunk = (audio, config) => {
|
|
758
|
+
if (!config) {
|
|
759
|
+
return audio;
|
|
760
|
+
}
|
|
761
|
+
const source = toInt16Array(audio);
|
|
762
|
+
if (source.length === 0) {
|
|
763
|
+
return audio;
|
|
764
|
+
}
|
|
765
|
+
const rms = computeRms(source);
|
|
766
|
+
const output = new Int16Array(source.length);
|
|
767
|
+
const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
|
|
768
|
+
const baseLevel = Math.max(rms * gateFactor, 0.000001);
|
|
769
|
+
const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
|
|
770
|
+
const appliedGain = Math.max(0.25, gain) * gateFactor;
|
|
771
|
+
for (let index = 0;index < source.length; index += 1) {
|
|
772
|
+
const next = Math.round(source[index] * appliedGain);
|
|
773
|
+
output[index] = Math.max(-32768, Math.min(32767, next));
|
|
774
|
+
}
|
|
775
|
+
return new Uint8Array(output.buffer);
|
|
776
|
+
};
|
|
777
|
+
|
|
778
|
+
// src/turnProfiles.ts
|
|
779
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
780
|
+
balanced: {
|
|
781
|
+
qualityProfile: "general",
|
|
782
|
+
silenceMs: 1400,
|
|
783
|
+
speechThreshold: 0.012,
|
|
784
|
+
transcriptStabilityMs: 1000
|
|
785
|
+
},
|
|
786
|
+
fast: {
|
|
787
|
+
qualityProfile: "general",
|
|
788
|
+
silenceMs: 700,
|
|
789
|
+
speechThreshold: 0.015,
|
|
790
|
+
transcriptStabilityMs: 450
|
|
791
|
+
},
|
|
792
|
+
"long-form": {
|
|
793
|
+
qualityProfile: "general",
|
|
794
|
+
silenceMs: 2200,
|
|
795
|
+
speechThreshold: 0.01,
|
|
796
|
+
transcriptStabilityMs: 1500
|
|
797
|
+
}
|
|
798
|
+
};
|
|
799
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
800
|
+
general: {},
|
|
801
|
+
"accent-heavy": {
|
|
802
|
+
silenceMs: 1200,
|
|
803
|
+
speechThreshold: 0.01,
|
|
804
|
+
transcriptStabilityMs: 1200
|
|
805
|
+
},
|
|
806
|
+
"noisy-room": {
|
|
807
|
+
silenceMs: 2000,
|
|
808
|
+
speechThreshold: 0.02,
|
|
809
|
+
transcriptStabilityMs: 1600
|
|
810
|
+
},
|
|
811
|
+
"short-command": {
|
|
812
|
+
silenceMs: 500,
|
|
813
|
+
speechThreshold: 0.016,
|
|
814
|
+
transcriptStabilityMs: 420
|
|
815
|
+
}
|
|
816
|
+
};
|
|
817
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
818
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
819
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
820
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
821
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
822
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
823
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
824
|
+
return {
|
|
825
|
+
profile,
|
|
826
|
+
qualityProfile,
|
|
827
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
828
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
829
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
830
|
+
};
|
|
831
|
+
};
|
|
832
|
+
|
|
833
|
+
// src/presets.ts
|
|
834
|
+
var PRESET_INPUTS = {
|
|
835
|
+
chat: {
|
|
836
|
+
audioConditioning: {
|
|
837
|
+
enabled: true,
|
|
838
|
+
maxGain: 2.5,
|
|
839
|
+
noiseGateAttenuation: 0,
|
|
840
|
+
noiseGateThreshold: 0.004,
|
|
841
|
+
targetLevel: 0.08
|
|
842
|
+
},
|
|
843
|
+
capture: {
|
|
844
|
+
channelCount: 1,
|
|
845
|
+
sampleRateHz: 16000
|
|
846
|
+
},
|
|
847
|
+
connection: {
|
|
848
|
+
maxReconnectAttempts: 10,
|
|
849
|
+
pingInterval: 30000,
|
|
850
|
+
reconnect: true
|
|
851
|
+
},
|
|
852
|
+
sttLifecycle: "continuous",
|
|
853
|
+
turnDetection: {
|
|
854
|
+
qualityProfile: "short-command",
|
|
855
|
+
profile: "balanced"
|
|
856
|
+
}
|
|
857
|
+
},
|
|
858
|
+
default: {
|
|
859
|
+
capture: {
|
|
860
|
+
channelCount: 1,
|
|
861
|
+
sampleRateHz: 16000
|
|
862
|
+
},
|
|
863
|
+
connection: {
|
|
864
|
+
maxReconnectAttempts: 10,
|
|
865
|
+
pingInterval: 30000,
|
|
866
|
+
reconnect: true
|
|
867
|
+
},
|
|
868
|
+
sttLifecycle: "continuous",
|
|
869
|
+
turnDetection: {
|
|
870
|
+
qualityProfile: "general",
|
|
871
|
+
profile: "fast"
|
|
872
|
+
}
|
|
873
|
+
},
|
|
874
|
+
dictation: {
|
|
875
|
+
audioConditioning: {
|
|
876
|
+
enabled: true,
|
|
877
|
+
maxGain: 2.25,
|
|
878
|
+
noiseGateAttenuation: 0.05,
|
|
879
|
+
noiseGateThreshold: 0.003,
|
|
880
|
+
targetLevel: 0.08
|
|
881
|
+
},
|
|
882
|
+
capture: {
|
|
883
|
+
channelCount: 1,
|
|
884
|
+
sampleRateHz: 16000
|
|
885
|
+
},
|
|
886
|
+
connection: {
|
|
887
|
+
maxReconnectAttempts: 12,
|
|
888
|
+
pingInterval: 30000,
|
|
889
|
+
reconnect: true
|
|
890
|
+
},
|
|
891
|
+
sttLifecycle: "continuous",
|
|
892
|
+
turnDetection: {
|
|
893
|
+
qualityProfile: "accent-heavy",
|
|
894
|
+
profile: "long-form"
|
|
895
|
+
}
|
|
896
|
+
},
|
|
897
|
+
"guided-intake": {
|
|
898
|
+
audioConditioning: {
|
|
899
|
+
enabled: true,
|
|
900
|
+
maxGain: 2.5,
|
|
901
|
+
noiseGateAttenuation: 0,
|
|
902
|
+
noiseGateThreshold: 0.004,
|
|
903
|
+
targetLevel: 0.08
|
|
904
|
+
},
|
|
905
|
+
capture: {
|
|
906
|
+
channelCount: 1,
|
|
907
|
+
sampleRateHz: 16000
|
|
908
|
+
},
|
|
909
|
+
connection: {
|
|
910
|
+
maxReconnectAttempts: 12,
|
|
911
|
+
pingInterval: 30000,
|
|
912
|
+
reconnect: true
|
|
913
|
+
},
|
|
914
|
+
sttLifecycle: "turn-scoped",
|
|
915
|
+
turnDetection: {
|
|
916
|
+
qualityProfile: "accent-heavy",
|
|
917
|
+
profile: "long-form"
|
|
918
|
+
}
|
|
919
|
+
},
|
|
920
|
+
"noisy-room": {
|
|
921
|
+
audioConditioning: {
|
|
922
|
+
enabled: true,
|
|
923
|
+
maxGain: 3,
|
|
924
|
+
noiseGateAttenuation: 0.12,
|
|
925
|
+
noiseGateThreshold: 0.006,
|
|
926
|
+
targetLevel: 0.085
|
|
927
|
+
},
|
|
928
|
+
capture: {
|
|
929
|
+
channelCount: 1,
|
|
930
|
+
sampleRateHz: 16000
|
|
931
|
+
},
|
|
932
|
+
connection: {
|
|
933
|
+
maxReconnectAttempts: 14,
|
|
934
|
+
pingInterval: 45000,
|
|
935
|
+
reconnect: true
|
|
936
|
+
},
|
|
937
|
+
sttLifecycle: "continuous",
|
|
938
|
+
turnDetection: {
|
|
939
|
+
qualityProfile: "noisy-room",
|
|
940
|
+
profile: "long-form",
|
|
941
|
+
silenceMs: 2100,
|
|
942
|
+
speechThreshold: 0.02,
|
|
943
|
+
transcriptStabilityMs: 1650
|
|
944
|
+
}
|
|
945
|
+
},
|
|
946
|
+
"pstn-balanced": {
|
|
947
|
+
audioConditioning: {
|
|
948
|
+
enabled: true,
|
|
949
|
+
maxGain: 2.8,
|
|
950
|
+
noiseGateAttenuation: 0.07,
|
|
951
|
+
noiseGateThreshold: 0.005,
|
|
952
|
+
targetLevel: 0.08
|
|
953
|
+
},
|
|
954
|
+
capture: {
|
|
955
|
+
channelCount: 1,
|
|
956
|
+
sampleRateHz: 16000
|
|
957
|
+
},
|
|
958
|
+
connection: {
|
|
959
|
+
maxReconnectAttempts: 14,
|
|
960
|
+
pingInterval: 45000,
|
|
961
|
+
reconnect: true
|
|
962
|
+
},
|
|
963
|
+
sttLifecycle: "continuous",
|
|
964
|
+
turnDetection: {
|
|
965
|
+
qualityProfile: "noisy-room",
|
|
966
|
+
profile: "long-form",
|
|
967
|
+
silenceMs: 660,
|
|
968
|
+
speechThreshold: 0.012,
|
|
969
|
+
transcriptStabilityMs: 300
|
|
970
|
+
}
|
|
971
|
+
},
|
|
972
|
+
"pstn-fast": {
|
|
973
|
+
audioConditioning: {
|
|
974
|
+
enabled: true,
|
|
975
|
+
maxGain: 2.75,
|
|
976
|
+
noiseGateAttenuation: 0.06,
|
|
977
|
+
noiseGateThreshold: 0.005,
|
|
978
|
+
targetLevel: 0.08
|
|
979
|
+
},
|
|
980
|
+
capture: {
|
|
981
|
+
channelCount: 1,
|
|
982
|
+
sampleRateHz: 16000
|
|
983
|
+
},
|
|
984
|
+
connection: {
|
|
985
|
+
maxReconnectAttempts: 14,
|
|
986
|
+
pingInterval: 45000,
|
|
987
|
+
reconnect: true
|
|
988
|
+
},
|
|
989
|
+
sttLifecycle: "continuous",
|
|
990
|
+
turnDetection: {
|
|
991
|
+
qualityProfile: "noisy-room",
|
|
992
|
+
profile: "long-form",
|
|
993
|
+
silenceMs: 620,
|
|
994
|
+
speechThreshold: 0.012,
|
|
995
|
+
transcriptStabilityMs: 280
|
|
996
|
+
}
|
|
997
|
+
},
|
|
998
|
+
reliability: {
|
|
999
|
+
audioConditioning: {
|
|
1000
|
+
enabled: true,
|
|
1001
|
+
maxGain: 2.9,
|
|
1002
|
+
noiseGateAttenuation: 0.08,
|
|
1003
|
+
noiseGateThreshold: 0.005,
|
|
1004
|
+
targetLevel: 0.08
|
|
1005
|
+
},
|
|
1006
|
+
capture: {
|
|
1007
|
+
channelCount: 1,
|
|
1008
|
+
sampleRateHz: 16000
|
|
1009
|
+
},
|
|
1010
|
+
connection: {
|
|
1011
|
+
maxReconnectAttempts: 14,
|
|
1012
|
+
pingInterval: 45000,
|
|
1013
|
+
reconnect: true
|
|
1014
|
+
},
|
|
1015
|
+
sttLifecycle: "continuous",
|
|
1016
|
+
turnDetection: {
|
|
1017
|
+
qualityProfile: "noisy-room",
|
|
1018
|
+
profile: "long-form"
|
|
1019
|
+
}
|
|
1020
|
+
}
|
|
1021
|
+
};
|
|
1022
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
1023
|
+
const preset = PRESET_INPUTS[name];
|
|
1024
|
+
return {
|
|
1025
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
1026
|
+
capture: {
|
|
1027
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
1028
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
1029
|
+
},
|
|
1030
|
+
connection: {
|
|
1031
|
+
...preset.connection
|
|
1032
|
+
},
|
|
1033
|
+
name,
|
|
1034
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
1035
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
1036
|
+
};
|
|
1037
|
+
};
|
|
1038
|
+
|
|
1039
|
+
// src/client/controller.ts
|
|
1040
|
+
var createInitialState2 = (stream) => ({
|
|
1041
|
+
assistantAudio: [...stream.assistantAudio],
|
|
1042
|
+
assistantTexts: [...stream.assistantTexts],
|
|
1043
|
+
error: stream.error,
|
|
1044
|
+
isConnected: stream.isConnected,
|
|
1045
|
+
isRecording: false,
|
|
1046
|
+
partial: stream.partial,
|
|
1047
|
+
recordingError: null,
|
|
1048
|
+
sessionId: stream.sessionId,
|
|
1049
|
+
scenarioId: stream.scenarioId,
|
|
1050
|
+
status: stream.status,
|
|
1051
|
+
turns: [...stream.turns]
|
|
1052
|
+
});
|
|
1053
|
+
var createVoiceController = (path, options = {}) => {
|
|
1054
|
+
const preset = resolveVoiceRuntimePreset(options.preset);
|
|
1055
|
+
const stream = createVoiceStream(path, {
|
|
1056
|
+
...preset.connection,
|
|
1057
|
+
...options.connection
|
|
1058
|
+
});
|
|
1059
|
+
let capture = null;
|
|
1060
|
+
let state = createInitialState2(stream);
|
|
1061
|
+
const subscribers = new Set;
|
|
1062
|
+
const notify = () => {
|
|
1063
|
+
for (const subscriber of subscribers) {
|
|
1064
|
+
subscriber();
|
|
1065
|
+
}
|
|
1066
|
+
};
|
|
1067
|
+
const sync = () => {
|
|
1068
|
+
state = {
|
|
1069
|
+
...state,
|
|
1070
|
+
assistantAudio: [...stream.assistantAudio],
|
|
1071
|
+
assistantTexts: [...stream.assistantTexts],
|
|
1072
|
+
error: stream.error,
|
|
1073
|
+
isConnected: stream.isConnected,
|
|
1074
|
+
partial: stream.partial,
|
|
1075
|
+
sessionId: stream.sessionId,
|
|
1076
|
+
scenarioId: stream.scenarioId,
|
|
1077
|
+
status: stream.status,
|
|
1078
|
+
turns: [...stream.turns]
|
|
1079
|
+
};
|
|
1080
|
+
if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
|
|
1081
|
+
capture?.stop();
|
|
1082
|
+
capture = null;
|
|
1083
|
+
state = {
|
|
1084
|
+
...state,
|
|
1085
|
+
isRecording: false
|
|
1086
|
+
};
|
|
1087
|
+
}
|
|
1088
|
+
notify();
|
|
1089
|
+
};
|
|
1090
|
+
const unsubscribeStream = stream.subscribe(sync);
|
|
1091
|
+
sync();
|
|
1092
|
+
const ensureCapture = () => {
|
|
1093
|
+
if (capture) {
|
|
1094
|
+
return capture;
|
|
1095
|
+
}
|
|
1096
|
+
capture = createMicrophoneCapture({
|
|
1097
|
+
channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
|
|
1098
|
+
onLevel: options.capture?.onLevel,
|
|
1099
|
+
onAudio: (audio) => stream.sendAudio(audio),
|
|
1100
|
+
sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
|
|
1101
|
+
});
|
|
1102
|
+
return capture;
|
|
1103
|
+
};
|
|
1104
|
+
const stopRecording = () => {
|
|
1105
|
+
capture?.stop();
|
|
1106
|
+
capture = null;
|
|
1107
|
+
state = {
|
|
1108
|
+
...state,
|
|
1109
|
+
isRecording: false
|
|
1110
|
+
};
|
|
1111
|
+
notify();
|
|
1112
|
+
};
|
|
1113
|
+
const startRecording = async () => {
|
|
1114
|
+
if (state.isRecording) {
|
|
1115
|
+
return;
|
|
1116
|
+
}
|
|
1117
|
+
try {
|
|
1118
|
+
state = {
|
|
1119
|
+
...state,
|
|
1120
|
+
recordingError: null
|
|
1121
|
+
};
|
|
1122
|
+
notify();
|
|
1123
|
+
await ensureCapture().start();
|
|
1124
|
+
state = {
|
|
1125
|
+
...state,
|
|
1126
|
+
isRecording: true
|
|
1127
|
+
};
|
|
1128
|
+
notify();
|
|
1129
|
+
} catch (error) {
|
|
1130
|
+
capture = null;
|
|
1131
|
+
state = {
|
|
1132
|
+
...state,
|
|
1133
|
+
isRecording: false,
|
|
1134
|
+
recordingError: error instanceof Error ? error.message : String(error)
|
|
1135
|
+
};
|
|
1136
|
+
notify();
|
|
1137
|
+
throw error;
|
|
1138
|
+
}
|
|
1139
|
+
};
|
|
1140
|
+
const close = () => {
|
|
1141
|
+
unsubscribeStream();
|
|
1142
|
+
stopRecording();
|
|
1143
|
+
stream.close();
|
|
1144
|
+
};
|
|
1145
|
+
return {
|
|
1146
|
+
bindHTMX(bindingOptions) {
|
|
1147
|
+
return bindVoiceHTMX(stream, bindingOptions);
|
|
1148
|
+
},
|
|
1149
|
+
close,
|
|
1150
|
+
endTurn: () => stream.endTurn(),
|
|
1151
|
+
get error() {
|
|
1152
|
+
return state.error;
|
|
1153
|
+
},
|
|
1154
|
+
getServerSnapshot: () => state,
|
|
1155
|
+
getSnapshot: () => state,
|
|
1156
|
+
get isConnected() {
|
|
1157
|
+
return state.isConnected;
|
|
1158
|
+
},
|
|
1159
|
+
get isRecording() {
|
|
1160
|
+
return state.isRecording;
|
|
1161
|
+
},
|
|
1162
|
+
get partial() {
|
|
1163
|
+
return state.partial;
|
|
1164
|
+
},
|
|
1165
|
+
get recordingError() {
|
|
1166
|
+
return state.recordingError;
|
|
1167
|
+
},
|
|
1168
|
+
sendAudio: (audio) => stream.sendAudio(audio),
|
|
1169
|
+
get sessionId() {
|
|
1170
|
+
return state.sessionId;
|
|
1171
|
+
},
|
|
1172
|
+
get scenarioId() {
|
|
1173
|
+
return state.scenarioId;
|
|
1174
|
+
},
|
|
1175
|
+
startRecording,
|
|
1176
|
+
get status() {
|
|
1177
|
+
return state.status;
|
|
1178
|
+
},
|
|
1179
|
+
stopRecording,
|
|
1180
|
+
subscribe: (subscriber) => {
|
|
1181
|
+
subscribers.add(subscriber);
|
|
1182
|
+
return () => {
|
|
1183
|
+
subscribers.delete(subscriber);
|
|
1184
|
+
};
|
|
1185
|
+
},
|
|
1186
|
+
toggleRecording: async () => {
|
|
1187
|
+
if (state.isRecording) {
|
|
1188
|
+
stopRecording();
|
|
1189
|
+
return;
|
|
1190
|
+
}
|
|
1191
|
+
await startRecording();
|
|
1192
|
+
},
|
|
1193
|
+
get turns() {
|
|
1194
|
+
return state.turns;
|
|
1195
|
+
},
|
|
1196
|
+
get assistantTexts() {
|
|
1197
|
+
return state.assistantTexts;
|
|
1198
|
+
},
|
|
1199
|
+
get assistantAudio() {
|
|
1200
|
+
return state.assistantAudio;
|
|
1201
|
+
}
|
|
1202
|
+
};
|
|
1203
|
+
};
|
|
1204
|
+
|
|
1205
|
+
// src/react/useVoiceController.tsx
|
|
1206
|
+
var EMPTY_SNAPSHOT2 = {
|
|
1207
|
+
assistantAudio: [],
|
|
1208
|
+
assistantTexts: [],
|
|
1209
|
+
error: null,
|
|
1210
|
+
isConnected: false,
|
|
1211
|
+
isRecording: false,
|
|
1212
|
+
partial: "",
|
|
1213
|
+
recordingError: null,
|
|
1214
|
+
sessionId: "",
|
|
1215
|
+
status: "idle",
|
|
1216
|
+
turns: []
|
|
1217
|
+
};
|
|
1218
|
+
var useVoiceController = (path, options = {}) => {
|
|
1219
|
+
const controllerRef = useRef2(null);
|
|
1220
|
+
if (!controllerRef.current) {
|
|
1221
|
+
controllerRef.current = createVoiceController(path, options);
|
|
1222
|
+
}
|
|
1223
|
+
const controller = controllerRef.current;
|
|
1224
|
+
useEffect2(() => () => controller.close(), [controller]);
|
|
1225
|
+
const snapshot = useSyncExternalStore2(controller.subscribe, controller.getSnapshot, controller.getServerSnapshot) ?? EMPTY_SNAPSHOT2;
|
|
1226
|
+
return {
|
|
1227
|
+
...snapshot,
|
|
1228
|
+
bindHTMX: controller.bindHTMX,
|
|
1229
|
+
close: () => controller.close(),
|
|
1230
|
+
endTurn: () => controller.endTurn(),
|
|
1231
|
+
sendAudio: (audio) => controller.sendAudio(audio),
|
|
1232
|
+
startRecording: () => controller.startRecording(),
|
|
1233
|
+
stopRecording: () => controller.stopRecording(),
|
|
1234
|
+
toggleRecording: () => controller.toggleRecording()
|
|
1235
|
+
};
|
|
1236
|
+
};
|
|
513
1237
|
export {
|
|
514
|
-
useVoiceStream
|
|
1238
|
+
useVoiceStream,
|
|
1239
|
+
useVoiceController
|
|
515
1240
|
};
|