@absolutejs/voice 0.0.20 → 0.0.22-beta.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +884 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +759 -3
- package/dist/angular/voice-controller.service.d.ts +27 -0
- package/dist/angular/voice-stream.service.d.ts +6 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +48 -0
- package/dist/client/audioPlayer.d.ts +40 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/duplex.d.ts +3 -0
- package/dist/client/htmxBootstrap.js +660 -167
- package/dist/client/index.d.ts +3 -0
- package/dist/client/index.js +991 -6
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +33 -0
- package/dist/fileStore.d.ts +27 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.js +3721 -298
- package/dist/ops.d.ts +100 -0
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +728 -3
- package/dist/react/useVoiceController.d.ts +26 -0
- package/dist/react/useVoiceStream.d.ts +7 -0
- package/dist/routing.d.ts +3 -0
- package/dist/runtimeOps.d.ts +23 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +691 -3
- package/dist/telephony/response.d.ts +7 -0
- package/dist/telephony/twilio.d.ts +116 -0
- package/dist/testing/benchmark.d.ts +93 -2
- package/dist/testing/corrected.d.ts +41 -0
- package/dist/testing/duplex.d.ts +59 -0
- package/dist/testing/fixtures.d.ts +18 -2
- package/dist/testing/index.d.ts +5 -0
- package/dist/testing/index.js +6247 -402
- package/dist/testing/review.d.ts +143 -0
- package/dist/testing/sessionBenchmark.d.ts +92 -2
- package/dist/testing/stt.d.ts +3 -1
- package/dist/testing/telephony.d.ts +70 -0
- package/dist/testing/tts.d.ts +73 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +487 -10
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +750 -3
- package/dist/vue/useVoiceController.d.ts +30 -0
- package/dist/vue/useVoiceStream.d.ts +11 -0
- package/fixtures/README.md +9 -0
- package/fixtures/manifest.json +59 -1
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/package.json +135 -1
package/dist/svelte/index.js
CHANGED
|
@@ -99,6 +99,14 @@ var normalizeErrorMessage = (value) => {
|
|
|
99
99
|
};
|
|
100
100
|
var serverMessageToAction = (message) => {
|
|
101
101
|
switch (message.type) {
|
|
102
|
+
case "audio":
|
|
103
|
+
return {
|
|
104
|
+
chunk: Uint8Array.from(atob(message.chunkBase64), (char) => char.charCodeAt(0)),
|
|
105
|
+
format: message.format,
|
|
106
|
+
receivedAt: message.receivedAt,
|
|
107
|
+
turnId: message.turnId,
|
|
108
|
+
type: "audio"
|
|
109
|
+
};
|
|
102
110
|
case "assistant":
|
|
103
111
|
return {
|
|
104
112
|
text: message.text,
|
|
@@ -127,6 +135,7 @@ var serverMessageToAction = (message) => {
|
|
|
127
135
|
case "session":
|
|
128
136
|
return {
|
|
129
137
|
sessionId: message.sessionId,
|
|
138
|
+
scenarioId: message.scenarioId,
|
|
130
139
|
status: message.status,
|
|
131
140
|
type: "session"
|
|
132
141
|
};
|
|
@@ -147,24 +156,30 @@ var WS_NORMAL_CLOSURE = 1000;
|
|
|
147
156
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
148
157
|
var DEFAULT_PING_INTERVAL = 30000;
|
|
149
158
|
var RECONNECT_DELAY_MS = 500;
|
|
159
|
+
var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
|
|
150
160
|
var noop = () => {};
|
|
151
161
|
var noopUnsubscribe = () => noop;
|
|
152
162
|
var NOOP_CONNECTION = {
|
|
163
|
+
start: () => {},
|
|
153
164
|
close: noop,
|
|
154
165
|
endTurn: noop,
|
|
155
166
|
getReadyState: () => WS_CLOSED,
|
|
167
|
+
getScenarioId: () => "",
|
|
156
168
|
getSessionId: () => "",
|
|
157
169
|
send: noop,
|
|
158
170
|
sendAudio: noop,
|
|
159
171
|
subscribe: noopUnsubscribe
|
|
160
172
|
};
|
|
161
173
|
var createSessionId = () => crypto.randomUUID();
|
|
162
|
-
var buildWsUrl = (path, sessionId) => {
|
|
174
|
+
var buildWsUrl = (path, sessionId, scenarioId) => {
|
|
163
175
|
const { hostname, port, protocol } = window.location;
|
|
164
176
|
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
165
177
|
const portSuffix = port ? `:${port}` : "";
|
|
166
178
|
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
167
179
|
url.searchParams.set("sessionId", sessionId);
|
|
180
|
+
if (scenarioId) {
|
|
181
|
+
url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
|
|
182
|
+
}
|
|
168
183
|
return url.toString();
|
|
169
184
|
};
|
|
170
185
|
var isVoiceServerMessage = (value) => {
|
|
@@ -172,6 +187,7 @@ var isVoiceServerMessage = (value) => {
|
|
|
172
187
|
return false;
|
|
173
188
|
}
|
|
174
189
|
switch (value.type) {
|
|
190
|
+
case "audio":
|
|
175
191
|
case "assistant":
|
|
176
192
|
case "complete":
|
|
177
193
|
case "error":
|
|
@@ -207,6 +223,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
207
223
|
const state = {
|
|
208
224
|
isConnected: false,
|
|
209
225
|
pendingMessages: [],
|
|
226
|
+
scenarioId: options.scenarioId ?? null,
|
|
210
227
|
pingInterval: null,
|
|
211
228
|
reconnectAttempts: 0,
|
|
212
229
|
reconnectTimeout: null,
|
|
@@ -244,13 +261,14 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
244
261
|
}, RECONNECT_DELAY_MS);
|
|
245
262
|
};
|
|
246
263
|
const connect = () => {
|
|
247
|
-
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
264
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
|
|
248
265
|
ws.binaryType = "arraybuffer";
|
|
249
266
|
ws.onopen = () => {
|
|
250
267
|
state.isConnected = true;
|
|
251
268
|
state.reconnectAttempts = 0;
|
|
252
269
|
flushPendingMessages();
|
|
253
270
|
listeners.forEach((listener) => listener({
|
|
271
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
254
272
|
sessionId: state.sessionId,
|
|
255
273
|
status: "active",
|
|
256
274
|
type: "session"
|
|
@@ -268,6 +286,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
268
286
|
}
|
|
269
287
|
if (parsed.type === "session") {
|
|
270
288
|
state.sessionId = parsed.sessionId;
|
|
289
|
+
state.scenarioId = parsed.scenarioId ?? state.scenarioId;
|
|
271
290
|
}
|
|
272
291
|
listeners.forEach((listener) => listener(parsed));
|
|
273
292
|
};
|
|
@@ -291,6 +310,19 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
291
310
|
const send = (message) => {
|
|
292
311
|
sendSerialized(JSON.stringify(message));
|
|
293
312
|
};
|
|
313
|
+
const start = (input = {}) => {
|
|
314
|
+
if (input.sessionId) {
|
|
315
|
+
state.sessionId = input.sessionId;
|
|
316
|
+
}
|
|
317
|
+
if (input.scenarioId) {
|
|
318
|
+
state.scenarioId = input.scenarioId;
|
|
319
|
+
}
|
|
320
|
+
send({
|
|
321
|
+
type: "start",
|
|
322
|
+
sessionId: state.sessionId,
|
|
323
|
+
scenarioId: state.scenarioId ?? undefined
|
|
324
|
+
});
|
|
325
|
+
};
|
|
294
326
|
const sendAudio = (audio) => {
|
|
295
327
|
sendSerialized(audio);
|
|
296
328
|
};
|
|
@@ -314,9 +346,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
314
346
|
};
|
|
315
347
|
connect();
|
|
316
348
|
return {
|
|
349
|
+
start,
|
|
317
350
|
close,
|
|
318
351
|
endTurn,
|
|
319
352
|
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
353
|
+
getScenarioId: () => state.scenarioId ?? "",
|
|
320
354
|
getSessionId: () => state.sessionId,
|
|
321
355
|
send,
|
|
322
356
|
sendAudio,
|
|
@@ -326,9 +360,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
326
360
|
|
|
327
361
|
// src/client/store.ts
|
|
328
362
|
var createInitialState = () => ({
|
|
363
|
+
assistantAudio: [],
|
|
329
364
|
assistantTexts: [],
|
|
330
365
|
error: null,
|
|
331
366
|
isConnected: false,
|
|
367
|
+
scenarioId: null,
|
|
332
368
|
partial: "",
|
|
333
369
|
sessionId: null,
|
|
334
370
|
status: "idle",
|
|
@@ -342,6 +378,20 @@ var createVoiceStreamStore = () => {
|
|
|
342
378
|
};
|
|
343
379
|
const dispatch = (action) => {
|
|
344
380
|
switch (action.type) {
|
|
381
|
+
case "audio":
|
|
382
|
+
state = {
|
|
383
|
+
...state,
|
|
384
|
+
assistantAudio: [
|
|
385
|
+
...state.assistantAudio,
|
|
386
|
+
{
|
|
387
|
+
chunk: action.chunk,
|
|
388
|
+
format: action.format,
|
|
389
|
+
receivedAt: action.receivedAt,
|
|
390
|
+
turnId: action.turnId
|
|
391
|
+
}
|
|
392
|
+
]
|
|
393
|
+
};
|
|
394
|
+
break;
|
|
345
395
|
case "assistant":
|
|
346
396
|
state = {
|
|
347
397
|
...state,
|
|
@@ -390,6 +440,7 @@ var createVoiceStreamStore = () => {
|
|
|
390
440
|
state = {
|
|
391
441
|
...state,
|
|
392
442
|
error: null,
|
|
443
|
+
scenarioId: action.scenarioId ?? state.scenarioId,
|
|
393
444
|
isConnected: action.status === "active",
|
|
394
445
|
sessionId: action.sessionId,
|
|
395
446
|
status: action.status
|
|
@@ -423,6 +474,12 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
423
474
|
const connection = createVoiceConnection(path, options);
|
|
424
475
|
const store = createVoiceStreamStore();
|
|
425
476
|
const subscribers = new Set;
|
|
477
|
+
const start = (input) => Promise.resolve().then(() => {
|
|
478
|
+
if (!input?.sessionId && !input?.scenarioId) {
|
|
479
|
+
return;
|
|
480
|
+
}
|
|
481
|
+
connection.start(input);
|
|
482
|
+
});
|
|
426
483
|
const notify = () => {
|
|
427
484
|
subscribers.forEach((subscriber) => subscriber());
|
|
428
485
|
};
|
|
@@ -455,6 +512,10 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
455
512
|
get isConnected() {
|
|
456
513
|
return store.getSnapshot().isConnected;
|
|
457
514
|
},
|
|
515
|
+
get scenarioId() {
|
|
516
|
+
return store.getSnapshot().scenarioId;
|
|
517
|
+
},
|
|
518
|
+
start,
|
|
458
519
|
get partial() {
|
|
459
520
|
return store.getSnapshot().partial;
|
|
460
521
|
},
|
|
@@ -470,6 +531,9 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
470
531
|
get assistantTexts() {
|
|
471
532
|
return store.getSnapshot().assistantTexts;
|
|
472
533
|
},
|
|
534
|
+
get assistantAudio() {
|
|
535
|
+
return store.getSnapshot().assistantAudio;
|
|
536
|
+
},
|
|
473
537
|
sendAudio(audio) {
|
|
474
538
|
connection.sendAudio(audio);
|
|
475
539
|
},
|
|
@@ -484,6 +548,630 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
484
548
|
|
|
485
549
|
// src/svelte/createVoiceStream.ts
|
|
486
550
|
var createVoiceStream2 = (path, options = {}) => createVoiceStream(path, options);
|
|
551
|
+
// src/client/htmx.ts
|
|
552
|
+
var DEFAULT_EVENT_NAME = "voice-refresh";
|
|
553
|
+
var DEFAULT_QUERY_PARAM = "sessionId";
|
|
554
|
+
var resolveElement = (input) => {
|
|
555
|
+
if (typeof input !== "string") {
|
|
556
|
+
return input;
|
|
557
|
+
}
|
|
558
|
+
return document.querySelector(input);
|
|
559
|
+
};
|
|
560
|
+
var buildRoute = (element, route, queryParam, sessionId) => {
|
|
561
|
+
const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
|
|
562
|
+
if (!baseRoute) {
|
|
563
|
+
return "";
|
|
564
|
+
}
|
|
565
|
+
const url = new URL(baseRoute, window.location.origin);
|
|
566
|
+
if (sessionId) {
|
|
567
|
+
url.searchParams.set(queryParam, sessionId);
|
|
568
|
+
} else {
|
|
569
|
+
url.searchParams.delete(queryParam);
|
|
570
|
+
}
|
|
571
|
+
return `${url.pathname}${url.search}${url.hash}`;
|
|
572
|
+
};
|
|
573
|
+
var bindVoiceHTMX = (stream, options) => {
|
|
574
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
575
|
+
return () => {};
|
|
576
|
+
}
|
|
577
|
+
const element = resolveElement(options.element);
|
|
578
|
+
if (!element) {
|
|
579
|
+
return () => {};
|
|
580
|
+
}
|
|
581
|
+
const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
|
|
582
|
+
const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
|
|
583
|
+
const sync = () => {
|
|
584
|
+
const htmxWindow = window;
|
|
585
|
+
const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
|
|
586
|
+
if (nextRoute) {
|
|
587
|
+
element.setAttribute("hx-get", nextRoute);
|
|
588
|
+
}
|
|
589
|
+
htmxWindow.htmx?.process?.(element);
|
|
590
|
+
htmxWindow.htmx?.trigger?.(element, eventName);
|
|
591
|
+
};
|
|
592
|
+
const unsubscribe = stream.subscribe(sync);
|
|
593
|
+
sync();
|
|
594
|
+
return () => {
|
|
595
|
+
unsubscribe();
|
|
596
|
+
};
|
|
597
|
+
};
|
|
598
|
+
|
|
599
|
+
// src/client/microphone.ts
|
|
600
|
+
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
601
|
+
var floatTo16BitPCM = (input) => {
|
|
602
|
+
const output = new Int16Array(input.length);
|
|
603
|
+
for (let index = 0;index < input.length; index += 1) {
|
|
604
|
+
const sample = clampSample(input[index] ?? 0);
|
|
605
|
+
output[index] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
606
|
+
}
|
|
607
|
+
return new Uint8Array(output.buffer);
|
|
608
|
+
};
|
|
609
|
+
var getPcmLevel = (audio) => {
|
|
610
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
611
|
+
if (bytes.byteLength < 2) {
|
|
612
|
+
return 0;
|
|
613
|
+
}
|
|
614
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
615
|
+
if (samples.length === 0) {
|
|
616
|
+
return 0;
|
|
617
|
+
}
|
|
618
|
+
let sumSquares = 0;
|
|
619
|
+
for (const sample of samples) {
|
|
620
|
+
const normalized = sample / 32768;
|
|
621
|
+
sumSquares += normalized * normalized;
|
|
622
|
+
}
|
|
623
|
+
return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
|
|
624
|
+
};
|
|
625
|
+
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
626
|
+
if (sourceRate === targetRate) {
|
|
627
|
+
return input;
|
|
628
|
+
}
|
|
629
|
+
const ratio = sourceRate / targetRate;
|
|
630
|
+
const length = Math.round(input.length / ratio);
|
|
631
|
+
const output = new Float32Array(length);
|
|
632
|
+
let offsetResult = 0;
|
|
633
|
+
let offsetBuffer = 0;
|
|
634
|
+
while (offsetResult < output.length) {
|
|
635
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
636
|
+
let accum = 0;
|
|
637
|
+
let count = 0;
|
|
638
|
+
for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
|
|
639
|
+
accum += input[index] ?? 0;
|
|
640
|
+
count += 1;
|
|
641
|
+
}
|
|
642
|
+
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
643
|
+
offsetResult += 1;
|
|
644
|
+
offsetBuffer = nextOffsetBuffer;
|
|
645
|
+
}
|
|
646
|
+
return output;
|
|
647
|
+
};
|
|
648
|
+
var createMicrophoneCapture = (options) => {
|
|
649
|
+
let audioContext = null;
|
|
650
|
+
let sourceNode = null;
|
|
651
|
+
let processorNode = null;
|
|
652
|
+
let mediaStream = null;
|
|
653
|
+
const start = async () => {
|
|
654
|
+
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
|
|
655
|
+
throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
|
|
656
|
+
}
|
|
657
|
+
const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
|
|
658
|
+
if (!AudioContextCtor) {
|
|
659
|
+
throw new Error("Browser microphone capture requires AudioContext support.");
|
|
660
|
+
}
|
|
661
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
662
|
+
audio: {
|
|
663
|
+
channelCount: options.channelCount ?? 1
|
|
664
|
+
}
|
|
665
|
+
});
|
|
666
|
+
audioContext = new AudioContextCtor;
|
|
667
|
+
sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
668
|
+
processorNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
669
|
+
processorNode.onaudioprocess = (event) => {
|
|
670
|
+
const channel = event.inputBuffer.getChannelData(0);
|
|
671
|
+
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
672
|
+
const pcm = floatTo16BitPCM(downsampled);
|
|
673
|
+
options.onLevel?.(getPcmLevel(pcm));
|
|
674
|
+
options.onAudio(pcm);
|
|
675
|
+
};
|
|
676
|
+
sourceNode.connect(processorNode);
|
|
677
|
+
processorNode.connect(audioContext.destination);
|
|
678
|
+
};
|
|
679
|
+
const stop = () => {
|
|
680
|
+
processorNode?.disconnect();
|
|
681
|
+
sourceNode?.disconnect();
|
|
682
|
+
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
683
|
+
audioContext?.close();
|
|
684
|
+
options.onLevel?.(0);
|
|
685
|
+
audioContext = null;
|
|
686
|
+
mediaStream = null;
|
|
687
|
+
processorNode = null;
|
|
688
|
+
sourceNode = null;
|
|
689
|
+
};
|
|
690
|
+
return { start, stop };
|
|
691
|
+
};
|
|
692
|
+
|
|
693
|
+
// src/audioConditioning.ts
|
|
694
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
695
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
696
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
697
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
698
|
+
var toInt16Array = (audio) => {
|
|
699
|
+
if (audio instanceof ArrayBuffer) {
|
|
700
|
+
return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
|
|
701
|
+
}
|
|
702
|
+
return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
|
|
703
|
+
};
|
|
704
|
+
var computeRms = (samples) => {
|
|
705
|
+
if (samples.length === 0) {
|
|
706
|
+
return 0;
|
|
707
|
+
}
|
|
708
|
+
let sumSquares = 0;
|
|
709
|
+
for (const sample of samples) {
|
|
710
|
+
const normalized = sample / 32768;
|
|
711
|
+
sumSquares += normalized * normalized;
|
|
712
|
+
}
|
|
713
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
714
|
+
};
|
|
715
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
716
|
+
if (!config || config.enabled === false) {
|
|
717
|
+
return;
|
|
718
|
+
}
|
|
719
|
+
return {
|
|
720
|
+
enabled: true,
|
|
721
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
722
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
723
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
724
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
725
|
+
};
|
|
726
|
+
};
|
|
727
|
+
var conditionAudioChunk = (audio, config) => {
|
|
728
|
+
if (!config) {
|
|
729
|
+
return audio;
|
|
730
|
+
}
|
|
731
|
+
const source = toInt16Array(audio);
|
|
732
|
+
if (source.length === 0) {
|
|
733
|
+
return audio;
|
|
734
|
+
}
|
|
735
|
+
const rms = computeRms(source);
|
|
736
|
+
const output = new Int16Array(source.length);
|
|
737
|
+
const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
|
|
738
|
+
const baseLevel = Math.max(rms * gateFactor, 0.000001);
|
|
739
|
+
const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
|
|
740
|
+
const appliedGain = Math.max(0.25, gain) * gateFactor;
|
|
741
|
+
for (let index = 0;index < source.length; index += 1) {
|
|
742
|
+
const next = Math.round(source[index] * appliedGain);
|
|
743
|
+
output[index] = Math.max(-32768, Math.min(32767, next));
|
|
744
|
+
}
|
|
745
|
+
return new Uint8Array(output.buffer);
|
|
746
|
+
};
|
|
747
|
+
|
|
748
|
+
// src/turnProfiles.ts
|
|
749
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
750
|
+
balanced: {
|
|
751
|
+
qualityProfile: "general",
|
|
752
|
+
silenceMs: 1400,
|
|
753
|
+
speechThreshold: 0.012,
|
|
754
|
+
transcriptStabilityMs: 1000
|
|
755
|
+
},
|
|
756
|
+
fast: {
|
|
757
|
+
qualityProfile: "general",
|
|
758
|
+
silenceMs: 700,
|
|
759
|
+
speechThreshold: 0.015,
|
|
760
|
+
transcriptStabilityMs: 450
|
|
761
|
+
},
|
|
762
|
+
"long-form": {
|
|
763
|
+
qualityProfile: "general",
|
|
764
|
+
silenceMs: 2200,
|
|
765
|
+
speechThreshold: 0.01,
|
|
766
|
+
transcriptStabilityMs: 1500
|
|
767
|
+
}
|
|
768
|
+
};
|
|
769
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
770
|
+
general: {},
|
|
771
|
+
"accent-heavy": {
|
|
772
|
+
silenceMs: 1200,
|
|
773
|
+
speechThreshold: 0.01,
|
|
774
|
+
transcriptStabilityMs: 1200
|
|
775
|
+
},
|
|
776
|
+
"noisy-room": {
|
|
777
|
+
silenceMs: 2000,
|
|
778
|
+
speechThreshold: 0.02,
|
|
779
|
+
transcriptStabilityMs: 1600
|
|
780
|
+
},
|
|
781
|
+
"short-command": {
|
|
782
|
+
silenceMs: 500,
|
|
783
|
+
speechThreshold: 0.016,
|
|
784
|
+
transcriptStabilityMs: 420
|
|
785
|
+
}
|
|
786
|
+
};
|
|
787
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
788
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
789
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
790
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
791
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
792
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
793
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
794
|
+
return {
|
|
795
|
+
profile,
|
|
796
|
+
qualityProfile,
|
|
797
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
798
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
799
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
800
|
+
};
|
|
801
|
+
};
|
|
802
|
+
|
|
803
|
+
// src/presets.ts
|
|
804
|
+
var PRESET_INPUTS = {
|
|
805
|
+
chat: {
|
|
806
|
+
audioConditioning: {
|
|
807
|
+
enabled: true,
|
|
808
|
+
maxGain: 2.5,
|
|
809
|
+
noiseGateAttenuation: 0,
|
|
810
|
+
noiseGateThreshold: 0.004,
|
|
811
|
+
targetLevel: 0.08
|
|
812
|
+
},
|
|
813
|
+
capture: {
|
|
814
|
+
channelCount: 1,
|
|
815
|
+
sampleRateHz: 16000
|
|
816
|
+
},
|
|
817
|
+
connection: {
|
|
818
|
+
maxReconnectAttempts: 10,
|
|
819
|
+
pingInterval: 30000,
|
|
820
|
+
reconnect: true
|
|
821
|
+
},
|
|
822
|
+
sttLifecycle: "continuous",
|
|
823
|
+
turnDetection: {
|
|
824
|
+
qualityProfile: "short-command",
|
|
825
|
+
profile: "balanced"
|
|
826
|
+
}
|
|
827
|
+
},
|
|
828
|
+
default: {
|
|
829
|
+
capture: {
|
|
830
|
+
channelCount: 1,
|
|
831
|
+
sampleRateHz: 16000
|
|
832
|
+
},
|
|
833
|
+
connection: {
|
|
834
|
+
maxReconnectAttempts: 10,
|
|
835
|
+
pingInterval: 30000,
|
|
836
|
+
reconnect: true
|
|
837
|
+
},
|
|
838
|
+
sttLifecycle: "continuous",
|
|
839
|
+
turnDetection: {
|
|
840
|
+
qualityProfile: "general",
|
|
841
|
+
profile: "fast"
|
|
842
|
+
}
|
|
843
|
+
},
|
|
844
|
+
dictation: {
|
|
845
|
+
audioConditioning: {
|
|
846
|
+
enabled: true,
|
|
847
|
+
maxGain: 2.25,
|
|
848
|
+
noiseGateAttenuation: 0.05,
|
|
849
|
+
noiseGateThreshold: 0.003,
|
|
850
|
+
targetLevel: 0.08
|
|
851
|
+
},
|
|
852
|
+
capture: {
|
|
853
|
+
channelCount: 1,
|
|
854
|
+
sampleRateHz: 16000
|
|
855
|
+
},
|
|
856
|
+
connection: {
|
|
857
|
+
maxReconnectAttempts: 12,
|
|
858
|
+
pingInterval: 30000,
|
|
859
|
+
reconnect: true
|
|
860
|
+
},
|
|
861
|
+
sttLifecycle: "continuous",
|
|
862
|
+
turnDetection: {
|
|
863
|
+
qualityProfile: "accent-heavy",
|
|
864
|
+
profile: "long-form"
|
|
865
|
+
}
|
|
866
|
+
},
|
|
867
|
+
"guided-intake": {
|
|
868
|
+
audioConditioning: {
|
|
869
|
+
enabled: true,
|
|
870
|
+
maxGain: 2.5,
|
|
871
|
+
noiseGateAttenuation: 0,
|
|
872
|
+
noiseGateThreshold: 0.004,
|
|
873
|
+
targetLevel: 0.08
|
|
874
|
+
},
|
|
875
|
+
capture: {
|
|
876
|
+
channelCount: 1,
|
|
877
|
+
sampleRateHz: 16000
|
|
878
|
+
},
|
|
879
|
+
connection: {
|
|
880
|
+
maxReconnectAttempts: 12,
|
|
881
|
+
pingInterval: 30000,
|
|
882
|
+
reconnect: true
|
|
883
|
+
},
|
|
884
|
+
sttLifecycle: "turn-scoped",
|
|
885
|
+
turnDetection: {
|
|
886
|
+
qualityProfile: "accent-heavy",
|
|
887
|
+
profile: "long-form"
|
|
888
|
+
}
|
|
889
|
+
},
|
|
890
|
+
"noisy-room": {
|
|
891
|
+
audioConditioning: {
|
|
892
|
+
enabled: true,
|
|
893
|
+
maxGain: 3,
|
|
894
|
+
noiseGateAttenuation: 0.12,
|
|
895
|
+
noiseGateThreshold: 0.006,
|
|
896
|
+
targetLevel: 0.085
|
|
897
|
+
},
|
|
898
|
+
capture: {
|
|
899
|
+
channelCount: 1,
|
|
900
|
+
sampleRateHz: 16000
|
|
901
|
+
},
|
|
902
|
+
connection: {
|
|
903
|
+
maxReconnectAttempts: 14,
|
|
904
|
+
pingInterval: 45000,
|
|
905
|
+
reconnect: true
|
|
906
|
+
},
|
|
907
|
+
sttLifecycle: "continuous",
|
|
908
|
+
turnDetection: {
|
|
909
|
+
qualityProfile: "noisy-room",
|
|
910
|
+
profile: "long-form",
|
|
911
|
+
silenceMs: 2100,
|
|
912
|
+
speechThreshold: 0.02,
|
|
913
|
+
transcriptStabilityMs: 1650
|
|
914
|
+
}
|
|
915
|
+
},
|
|
916
|
+
"pstn-balanced": {
|
|
917
|
+
audioConditioning: {
|
|
918
|
+
enabled: true,
|
|
919
|
+
maxGain: 2.8,
|
|
920
|
+
noiseGateAttenuation: 0.07,
|
|
921
|
+
noiseGateThreshold: 0.005,
|
|
922
|
+
targetLevel: 0.08
|
|
923
|
+
},
|
|
924
|
+
capture: {
|
|
925
|
+
channelCount: 1,
|
|
926
|
+
sampleRateHz: 16000
|
|
927
|
+
},
|
|
928
|
+
connection: {
|
|
929
|
+
maxReconnectAttempts: 14,
|
|
930
|
+
pingInterval: 45000,
|
|
931
|
+
reconnect: true
|
|
932
|
+
},
|
|
933
|
+
sttLifecycle: "continuous",
|
|
934
|
+
turnDetection: {
|
|
935
|
+
qualityProfile: "noisy-room",
|
|
936
|
+
profile: "long-form",
|
|
937
|
+
silenceMs: 660,
|
|
938
|
+
speechThreshold: 0.012,
|
|
939
|
+
transcriptStabilityMs: 300
|
|
940
|
+
}
|
|
941
|
+
},
|
|
942
|
+
"pstn-fast": {
|
|
943
|
+
audioConditioning: {
|
|
944
|
+
enabled: true,
|
|
945
|
+
maxGain: 2.75,
|
|
946
|
+
noiseGateAttenuation: 0.06,
|
|
947
|
+
noiseGateThreshold: 0.005,
|
|
948
|
+
targetLevel: 0.08
|
|
949
|
+
},
|
|
950
|
+
capture: {
|
|
951
|
+
channelCount: 1,
|
|
952
|
+
sampleRateHz: 16000
|
|
953
|
+
},
|
|
954
|
+
connection: {
|
|
955
|
+
maxReconnectAttempts: 14,
|
|
956
|
+
pingInterval: 45000,
|
|
957
|
+
reconnect: true
|
|
958
|
+
},
|
|
959
|
+
sttLifecycle: "continuous",
|
|
960
|
+
turnDetection: {
|
|
961
|
+
qualityProfile: "noisy-room",
|
|
962
|
+
profile: "long-form",
|
|
963
|
+
silenceMs: 620,
|
|
964
|
+
speechThreshold: 0.012,
|
|
965
|
+
transcriptStabilityMs: 280
|
|
966
|
+
}
|
|
967
|
+
},
|
|
968
|
+
reliability: {
|
|
969
|
+
audioConditioning: {
|
|
970
|
+
enabled: true,
|
|
971
|
+
maxGain: 2.9,
|
|
972
|
+
noiseGateAttenuation: 0.08,
|
|
973
|
+
noiseGateThreshold: 0.005,
|
|
974
|
+
targetLevel: 0.08
|
|
975
|
+
},
|
|
976
|
+
capture: {
|
|
977
|
+
channelCount: 1,
|
|
978
|
+
sampleRateHz: 16000
|
|
979
|
+
},
|
|
980
|
+
connection: {
|
|
981
|
+
maxReconnectAttempts: 14,
|
|
982
|
+
pingInterval: 45000,
|
|
983
|
+
reconnect: true
|
|
984
|
+
},
|
|
985
|
+
sttLifecycle: "continuous",
|
|
986
|
+
turnDetection: {
|
|
987
|
+
qualityProfile: "noisy-room",
|
|
988
|
+
profile: "long-form"
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
};
|
|
992
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
993
|
+
const preset = PRESET_INPUTS[name];
|
|
994
|
+
return {
|
|
995
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
996
|
+
capture: {
|
|
997
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
998
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
999
|
+
},
|
|
1000
|
+
connection: {
|
|
1001
|
+
...preset.connection
|
|
1002
|
+
},
|
|
1003
|
+
name,
|
|
1004
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
1005
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
1006
|
+
};
|
|
1007
|
+
};
|
|
1008
|
+
|
|
1009
|
+
// src/client/controller.ts
|
|
1010
|
+
var createInitialState2 = (stream) => ({
|
|
1011
|
+
assistantAudio: [...stream.assistantAudio],
|
|
1012
|
+
assistantTexts: [...stream.assistantTexts],
|
|
1013
|
+
error: stream.error,
|
|
1014
|
+
isConnected: stream.isConnected,
|
|
1015
|
+
isRecording: false,
|
|
1016
|
+
partial: stream.partial,
|
|
1017
|
+
recordingError: null,
|
|
1018
|
+
sessionId: stream.sessionId,
|
|
1019
|
+
scenarioId: stream.scenarioId,
|
|
1020
|
+
status: stream.status,
|
|
1021
|
+
turns: [...stream.turns]
|
|
1022
|
+
});
|
|
1023
|
+
var createVoiceController = (path, options = {}) => {
|
|
1024
|
+
const preset = resolveVoiceRuntimePreset(options.preset);
|
|
1025
|
+
const stream = createVoiceStream(path, {
|
|
1026
|
+
...preset.connection,
|
|
1027
|
+
...options.connection
|
|
1028
|
+
});
|
|
1029
|
+
let capture = null;
|
|
1030
|
+
let state = createInitialState2(stream);
|
|
1031
|
+
const subscribers = new Set;
|
|
1032
|
+
const notify = () => {
|
|
1033
|
+
for (const subscriber of subscribers) {
|
|
1034
|
+
subscriber();
|
|
1035
|
+
}
|
|
1036
|
+
};
|
|
1037
|
+
const sync = () => {
|
|
1038
|
+
state = {
|
|
1039
|
+
...state,
|
|
1040
|
+
assistantAudio: [...stream.assistantAudio],
|
|
1041
|
+
assistantTexts: [...stream.assistantTexts],
|
|
1042
|
+
error: stream.error,
|
|
1043
|
+
isConnected: stream.isConnected,
|
|
1044
|
+
partial: stream.partial,
|
|
1045
|
+
sessionId: stream.sessionId,
|
|
1046
|
+
scenarioId: stream.scenarioId,
|
|
1047
|
+
status: stream.status,
|
|
1048
|
+
turns: [...stream.turns]
|
|
1049
|
+
};
|
|
1050
|
+
if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
|
|
1051
|
+
capture?.stop();
|
|
1052
|
+
capture = null;
|
|
1053
|
+
state = {
|
|
1054
|
+
...state,
|
|
1055
|
+
isRecording: false
|
|
1056
|
+
};
|
|
1057
|
+
}
|
|
1058
|
+
notify();
|
|
1059
|
+
};
|
|
1060
|
+
const unsubscribeStream = stream.subscribe(sync);
|
|
1061
|
+
sync();
|
|
1062
|
+
const ensureCapture = () => {
|
|
1063
|
+
if (capture) {
|
|
1064
|
+
return capture;
|
|
1065
|
+
}
|
|
1066
|
+
capture = createMicrophoneCapture({
|
|
1067
|
+
channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
|
|
1068
|
+
onLevel: options.capture?.onLevel,
|
|
1069
|
+
onAudio: (audio) => stream.sendAudio(audio),
|
|
1070
|
+
sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
|
|
1071
|
+
});
|
|
1072
|
+
return capture;
|
|
1073
|
+
};
|
|
1074
|
+
const stopRecording = () => {
|
|
1075
|
+
capture?.stop();
|
|
1076
|
+
capture = null;
|
|
1077
|
+
state = {
|
|
1078
|
+
...state,
|
|
1079
|
+
isRecording: false
|
|
1080
|
+
};
|
|
1081
|
+
notify();
|
|
1082
|
+
};
|
|
1083
|
+
const startRecording = async () => {
|
|
1084
|
+
if (state.isRecording) {
|
|
1085
|
+
return;
|
|
1086
|
+
}
|
|
1087
|
+
try {
|
|
1088
|
+
state = {
|
|
1089
|
+
...state,
|
|
1090
|
+
recordingError: null
|
|
1091
|
+
};
|
|
1092
|
+
notify();
|
|
1093
|
+
await ensureCapture().start();
|
|
1094
|
+
state = {
|
|
1095
|
+
...state,
|
|
1096
|
+
isRecording: true
|
|
1097
|
+
};
|
|
1098
|
+
notify();
|
|
1099
|
+
} catch (error) {
|
|
1100
|
+
capture = null;
|
|
1101
|
+
state = {
|
|
1102
|
+
...state,
|
|
1103
|
+
isRecording: false,
|
|
1104
|
+
recordingError: error instanceof Error ? error.message : String(error)
|
|
1105
|
+
};
|
|
1106
|
+
notify();
|
|
1107
|
+
throw error;
|
|
1108
|
+
}
|
|
1109
|
+
};
|
|
1110
|
+
const close = () => {
|
|
1111
|
+
unsubscribeStream();
|
|
1112
|
+
stopRecording();
|
|
1113
|
+
stream.close();
|
|
1114
|
+
};
|
|
1115
|
+
return {
|
|
1116
|
+
bindHTMX(bindingOptions) {
|
|
1117
|
+
return bindVoiceHTMX(stream, bindingOptions);
|
|
1118
|
+
},
|
|
1119
|
+
close,
|
|
1120
|
+
endTurn: () => stream.endTurn(),
|
|
1121
|
+
get error() {
|
|
1122
|
+
return state.error;
|
|
1123
|
+
},
|
|
1124
|
+
getServerSnapshot: () => state,
|
|
1125
|
+
getSnapshot: () => state,
|
|
1126
|
+
get isConnected() {
|
|
1127
|
+
return state.isConnected;
|
|
1128
|
+
},
|
|
1129
|
+
get isRecording() {
|
|
1130
|
+
return state.isRecording;
|
|
1131
|
+
},
|
|
1132
|
+
get partial() {
|
|
1133
|
+
return state.partial;
|
|
1134
|
+
},
|
|
1135
|
+
get recordingError() {
|
|
1136
|
+
return state.recordingError;
|
|
1137
|
+
},
|
|
1138
|
+
sendAudio: (audio) => stream.sendAudio(audio),
|
|
1139
|
+
get sessionId() {
|
|
1140
|
+
return state.sessionId;
|
|
1141
|
+
},
|
|
1142
|
+
get scenarioId() {
|
|
1143
|
+
return state.scenarioId;
|
|
1144
|
+
},
|
|
1145
|
+
startRecording,
|
|
1146
|
+
get status() {
|
|
1147
|
+
return state.status;
|
|
1148
|
+
},
|
|
1149
|
+
stopRecording,
|
|
1150
|
+
subscribe: (subscriber) => {
|
|
1151
|
+
subscribers.add(subscriber);
|
|
1152
|
+
return () => {
|
|
1153
|
+
subscribers.delete(subscriber);
|
|
1154
|
+
};
|
|
1155
|
+
},
|
|
1156
|
+
toggleRecording: async () => {
|
|
1157
|
+
if (state.isRecording) {
|
|
1158
|
+
stopRecording();
|
|
1159
|
+
return;
|
|
1160
|
+
}
|
|
1161
|
+
await startRecording();
|
|
1162
|
+
},
|
|
1163
|
+
get turns() {
|
|
1164
|
+
return state.turns;
|
|
1165
|
+
},
|
|
1166
|
+
get assistantTexts() {
|
|
1167
|
+
return state.assistantTexts;
|
|
1168
|
+
},
|
|
1169
|
+
get assistantAudio() {
|
|
1170
|
+
return state.assistantAudio;
|
|
1171
|
+
}
|
|
1172
|
+
};
|
|
1173
|
+
};
|
|
487
1174
|
export {
|
|
488
|
-
createVoiceStream2 as createVoiceStream
|
|
1175
|
+
createVoiceStream2 as createVoiceStream,
|
|
1176
|
+
createVoiceController
|
|
489
1177
|
};
|