@absolutejs/voice 0.0.19 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +669 -3
- package/dist/angular/voice-controller.service.d.ts +21 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +7 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/htmxBootstrap.js +576 -167
- package/dist/client/index.d.ts +1 -0
- package/dist/client/index.js +486 -3
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +16 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1314 -283
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +642 -3
- package/dist/react/useVoiceController.d.ts +20 -0
- package/dist/react/useVoiceStream.d.ts +1 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +607 -3
- package/dist/testing/benchmark.d.ts +36 -0
- package/dist/testing/fixtures.d.ts +1 -0
- package/dist/testing/index.d.ts +2 -0
- package/dist/testing/index.js +1975 -4
- package/dist/testing/resilience.d.ts +20 -0
- package/dist/testing/sessionBenchmark.d.ts +126 -0
- package/dist/testing/stt.d.ts +1 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +198 -8
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +660 -3
- package/dist/vue/useVoiceController.d.ts +19 -0
- package/fixtures/README.md +24 -0
- package/fixtures/manifest.json +127 -0
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/fixtures/pcm/multiturn-three-mixed.pcm +0 -0
- package/fixtures/pcm/multiturn-two-clean.pcm +0 -0
- package/fixtures/pcm/stella-bulgaria-bulgarian20.pcm +0 -0
- package/fixtures/pcm/stella-jamaica-jamaican-creole-english1.pcm +0 -0
- package/fixtures/pcm/stella-liberia-liberian-pidgin-english2.pcm +0 -0
- package/fixtures/pcm/stella-sierra-leone-krio5.pcm +0 -0
- package/package.json +25 -1
package/dist/react/index.js
CHANGED
|
@@ -130,6 +130,7 @@ var serverMessageToAction = (message) => {
|
|
|
130
130
|
case "session":
|
|
131
131
|
return {
|
|
132
132
|
sessionId: message.sessionId,
|
|
133
|
+
scenarioId: message.scenarioId,
|
|
133
134
|
status: message.status,
|
|
134
135
|
type: "session"
|
|
135
136
|
};
|
|
@@ -150,24 +151,30 @@ var WS_NORMAL_CLOSURE = 1000;
|
|
|
150
151
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
151
152
|
var DEFAULT_PING_INTERVAL = 30000;
|
|
152
153
|
var RECONNECT_DELAY_MS = 500;
|
|
154
|
+
var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
|
|
153
155
|
var noop = () => {};
|
|
154
156
|
var noopUnsubscribe = () => noop;
|
|
155
157
|
var NOOP_CONNECTION = {
|
|
158
|
+
start: () => {},
|
|
156
159
|
close: noop,
|
|
157
160
|
endTurn: noop,
|
|
158
161
|
getReadyState: () => WS_CLOSED,
|
|
162
|
+
getScenarioId: () => "",
|
|
159
163
|
getSessionId: () => "",
|
|
160
164
|
send: noop,
|
|
161
165
|
sendAudio: noop,
|
|
162
166
|
subscribe: noopUnsubscribe
|
|
163
167
|
};
|
|
164
168
|
var createSessionId = () => crypto.randomUUID();
|
|
165
|
-
var buildWsUrl = (path, sessionId) => {
|
|
169
|
+
var buildWsUrl = (path, sessionId, scenarioId) => {
|
|
166
170
|
const { hostname, port, protocol } = window.location;
|
|
167
171
|
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
168
172
|
const portSuffix = port ? `:${port}` : "";
|
|
169
173
|
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
170
174
|
url.searchParams.set("sessionId", sessionId);
|
|
175
|
+
if (scenarioId) {
|
|
176
|
+
url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
|
|
177
|
+
}
|
|
171
178
|
return url.toString();
|
|
172
179
|
};
|
|
173
180
|
var isVoiceServerMessage = (value) => {
|
|
@@ -210,6 +217,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
210
217
|
const state = {
|
|
211
218
|
isConnected: false,
|
|
212
219
|
pendingMessages: [],
|
|
220
|
+
scenarioId: options.scenarioId ?? null,
|
|
213
221
|
pingInterval: null,
|
|
214
222
|
reconnectAttempts: 0,
|
|
215
223
|
reconnectTimeout: null,
|
|
@@ -247,13 +255,14 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
247
255
|
}, RECONNECT_DELAY_MS);
|
|
248
256
|
};
|
|
249
257
|
const connect = () => {
|
|
250
|
-
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
258
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
|
|
251
259
|
ws.binaryType = "arraybuffer";
|
|
252
260
|
ws.onopen = () => {
|
|
253
261
|
state.isConnected = true;
|
|
254
262
|
state.reconnectAttempts = 0;
|
|
255
263
|
flushPendingMessages();
|
|
256
264
|
listeners.forEach((listener) => listener({
|
|
265
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
257
266
|
sessionId: state.sessionId,
|
|
258
267
|
status: "active",
|
|
259
268
|
type: "session"
|
|
@@ -271,6 +280,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
271
280
|
}
|
|
272
281
|
if (parsed.type === "session") {
|
|
273
282
|
state.sessionId = parsed.sessionId;
|
|
283
|
+
state.scenarioId = parsed.scenarioId ?? state.scenarioId;
|
|
274
284
|
}
|
|
275
285
|
listeners.forEach((listener) => listener(parsed));
|
|
276
286
|
};
|
|
@@ -294,6 +304,19 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
294
304
|
const send = (message) => {
|
|
295
305
|
sendSerialized(JSON.stringify(message));
|
|
296
306
|
};
|
|
307
|
+
const start = (input = {}) => {
|
|
308
|
+
if (input.sessionId) {
|
|
309
|
+
state.sessionId = input.sessionId;
|
|
310
|
+
}
|
|
311
|
+
if (input.scenarioId) {
|
|
312
|
+
state.scenarioId = input.scenarioId;
|
|
313
|
+
}
|
|
314
|
+
send({
|
|
315
|
+
type: "start",
|
|
316
|
+
sessionId: state.sessionId,
|
|
317
|
+
scenarioId: state.scenarioId ?? undefined
|
|
318
|
+
});
|
|
319
|
+
};
|
|
297
320
|
const sendAudio = (audio) => {
|
|
298
321
|
sendSerialized(audio);
|
|
299
322
|
};
|
|
@@ -317,9 +340,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
317
340
|
};
|
|
318
341
|
connect();
|
|
319
342
|
return {
|
|
343
|
+
start,
|
|
320
344
|
close,
|
|
321
345
|
endTurn,
|
|
322
346
|
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
347
|
+
getScenarioId: () => state.scenarioId ?? "",
|
|
323
348
|
getSessionId: () => state.sessionId,
|
|
324
349
|
send,
|
|
325
350
|
sendAudio,
|
|
@@ -332,6 +357,7 @@ var createInitialState = () => ({
|
|
|
332
357
|
assistantTexts: [],
|
|
333
358
|
error: null,
|
|
334
359
|
isConnected: false,
|
|
360
|
+
scenarioId: null,
|
|
335
361
|
partial: "",
|
|
336
362
|
sessionId: null,
|
|
337
363
|
status: "idle",
|
|
@@ -393,6 +419,7 @@ var createVoiceStreamStore = () => {
|
|
|
393
419
|
state = {
|
|
394
420
|
...state,
|
|
395
421
|
error: null,
|
|
422
|
+
scenarioId: action.scenarioId ?? state.scenarioId,
|
|
396
423
|
isConnected: action.status === "active",
|
|
397
424
|
sessionId: action.sessionId,
|
|
398
425
|
status: action.status
|
|
@@ -426,6 +453,12 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
426
453
|
const connection = createVoiceConnection(path, options);
|
|
427
454
|
const store = createVoiceStreamStore();
|
|
428
455
|
const subscribers = new Set;
|
|
456
|
+
const start = (input) => Promise.resolve().then(() => {
|
|
457
|
+
if (!input?.sessionId && !input?.scenarioId) {
|
|
458
|
+
return;
|
|
459
|
+
}
|
|
460
|
+
connection.start(input);
|
|
461
|
+
});
|
|
429
462
|
const notify = () => {
|
|
430
463
|
subscribers.forEach((subscriber) => subscriber());
|
|
431
464
|
};
|
|
@@ -458,6 +491,10 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
458
491
|
get isConnected() {
|
|
459
492
|
return store.getSnapshot().isConnected;
|
|
460
493
|
},
|
|
494
|
+
get scenarioId() {
|
|
495
|
+
return store.getSnapshot().scenarioId;
|
|
496
|
+
},
|
|
497
|
+
start,
|
|
461
498
|
get partial() {
|
|
462
499
|
return store.getSnapshot().partial;
|
|
463
500
|
},
|
|
@@ -510,6 +547,608 @@ var useVoiceStream = (path, options = {}) => {
|
|
|
510
547
|
sendAudio: (audio) => stream.sendAudio(audio)
|
|
511
548
|
};
|
|
512
549
|
};
|
|
550
|
+
// src/react/useVoiceController.tsx
|
|
551
|
+
import { useEffect as useEffect2, useRef as useRef2, useSyncExternalStore as useSyncExternalStore2 } from "react";
|
|
552
|
+
|
|
553
|
+
// src/client/htmx.ts
|
|
554
|
+
var DEFAULT_EVENT_NAME = "voice-refresh";
|
|
555
|
+
var DEFAULT_QUERY_PARAM = "sessionId";
|
|
556
|
+
var resolveElement = (input) => {
|
|
557
|
+
if (typeof input !== "string") {
|
|
558
|
+
return input;
|
|
559
|
+
}
|
|
560
|
+
return document.querySelector(input);
|
|
561
|
+
};
|
|
562
|
+
var buildRoute = (element, route, queryParam, sessionId) => {
|
|
563
|
+
const baseRoute = route ?? element.getAttribute("hx-get") ?? "";
|
|
564
|
+
if (!baseRoute) {
|
|
565
|
+
return "";
|
|
566
|
+
}
|
|
567
|
+
const url = new URL(baseRoute, window.location.origin);
|
|
568
|
+
if (sessionId) {
|
|
569
|
+
url.searchParams.set(queryParam, sessionId);
|
|
570
|
+
} else {
|
|
571
|
+
url.searchParams.delete(queryParam);
|
|
572
|
+
}
|
|
573
|
+
return `${url.pathname}${url.search}${url.hash}`;
|
|
574
|
+
};
|
|
575
|
+
var bindVoiceHTMX = (stream, options) => {
|
|
576
|
+
if (typeof window === "undefined" || typeof document === "undefined") {
|
|
577
|
+
return () => {};
|
|
578
|
+
}
|
|
579
|
+
const element = resolveElement(options.element);
|
|
580
|
+
if (!element) {
|
|
581
|
+
return () => {};
|
|
582
|
+
}
|
|
583
|
+
const eventName = options.eventName ?? DEFAULT_EVENT_NAME;
|
|
584
|
+
const queryParam = options.sessionQueryParam ?? DEFAULT_QUERY_PARAM;
|
|
585
|
+
const sync = () => {
|
|
586
|
+
const htmxWindow = window;
|
|
587
|
+
const nextRoute = buildRoute(element, options.route, queryParam, stream.sessionId);
|
|
588
|
+
if (nextRoute) {
|
|
589
|
+
element.setAttribute("hx-get", nextRoute);
|
|
590
|
+
}
|
|
591
|
+
htmxWindow.htmx?.process?.(element);
|
|
592
|
+
htmxWindow.htmx?.trigger?.(element, eventName);
|
|
593
|
+
};
|
|
594
|
+
const unsubscribe = stream.subscribe(sync);
|
|
595
|
+
sync();
|
|
596
|
+
return () => {
|
|
597
|
+
unsubscribe();
|
|
598
|
+
};
|
|
599
|
+
};
|
|
600
|
+
|
|
601
|
+
// src/client/microphone.ts
|
|
602
|
+
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
603
|
+
var floatTo16BitPCM = (input) => {
|
|
604
|
+
const output = new Int16Array(input.length);
|
|
605
|
+
for (let index = 0;index < input.length; index += 1) {
|
|
606
|
+
const sample = clampSample(input[index] ?? 0);
|
|
607
|
+
output[index] = sample < 0 ? sample * 32768 : sample * 32767;
|
|
608
|
+
}
|
|
609
|
+
return new Uint8Array(output.buffer);
|
|
610
|
+
};
|
|
611
|
+
var getPcmLevel = (audio) => {
|
|
612
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
613
|
+
if (bytes.byteLength < 2) {
|
|
614
|
+
return 0;
|
|
615
|
+
}
|
|
616
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
617
|
+
if (samples.length === 0) {
|
|
618
|
+
return 0;
|
|
619
|
+
}
|
|
620
|
+
let sumSquares = 0;
|
|
621
|
+
for (const sample of samples) {
|
|
622
|
+
const normalized = sample / 32768;
|
|
623
|
+
sumSquares += normalized * normalized;
|
|
624
|
+
}
|
|
625
|
+
return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
|
|
626
|
+
};
|
|
627
|
+
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
628
|
+
if (sourceRate === targetRate) {
|
|
629
|
+
return input;
|
|
630
|
+
}
|
|
631
|
+
const ratio = sourceRate / targetRate;
|
|
632
|
+
const length = Math.round(input.length / ratio);
|
|
633
|
+
const output = new Float32Array(length);
|
|
634
|
+
let offsetResult = 0;
|
|
635
|
+
let offsetBuffer = 0;
|
|
636
|
+
while (offsetResult < output.length) {
|
|
637
|
+
const nextOffsetBuffer = Math.round((offsetResult + 1) * ratio);
|
|
638
|
+
let accum = 0;
|
|
639
|
+
let count = 0;
|
|
640
|
+
for (let index = offsetBuffer;index < nextOffsetBuffer && index < input.length; index += 1) {
|
|
641
|
+
accum += input[index] ?? 0;
|
|
642
|
+
count += 1;
|
|
643
|
+
}
|
|
644
|
+
output[offsetResult] = count > 0 ? accum / count : 0;
|
|
645
|
+
offsetResult += 1;
|
|
646
|
+
offsetBuffer = nextOffsetBuffer;
|
|
647
|
+
}
|
|
648
|
+
return output;
|
|
649
|
+
};
|
|
650
|
+
var createMicrophoneCapture = (options) => {
|
|
651
|
+
let audioContext = null;
|
|
652
|
+
let sourceNode = null;
|
|
653
|
+
let processorNode = null;
|
|
654
|
+
let mediaStream = null;
|
|
655
|
+
const start = async () => {
|
|
656
|
+
if (typeof navigator === "undefined" || !navigator.mediaDevices?.getUserMedia) {
|
|
657
|
+
throw new Error("Browser microphone capture requires navigator.mediaDevices.getUserMedia.");
|
|
658
|
+
}
|
|
659
|
+
const AudioContextCtor = (typeof window !== "undefined" ? window.AudioContext ?? window.webkitAudioContext : undefined) ?? AudioContext;
|
|
660
|
+
if (!AudioContextCtor) {
|
|
661
|
+
throw new Error("Browser microphone capture requires AudioContext support.");
|
|
662
|
+
}
|
|
663
|
+
mediaStream = await navigator.mediaDevices.getUserMedia({
|
|
664
|
+
audio: {
|
|
665
|
+
channelCount: options.channelCount ?? 1
|
|
666
|
+
}
|
|
667
|
+
});
|
|
668
|
+
audioContext = new AudioContextCtor;
|
|
669
|
+
sourceNode = audioContext.createMediaStreamSource(mediaStream);
|
|
670
|
+
processorNode = audioContext.createScriptProcessor(4096, 1, 1);
|
|
671
|
+
processorNode.onaudioprocess = (event) => {
|
|
672
|
+
const channel = event.inputBuffer.getChannelData(0);
|
|
673
|
+
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
674
|
+
const pcm = floatTo16BitPCM(downsampled);
|
|
675
|
+
options.onLevel?.(getPcmLevel(pcm));
|
|
676
|
+
options.onAudio(pcm);
|
|
677
|
+
};
|
|
678
|
+
sourceNode.connect(processorNode);
|
|
679
|
+
processorNode.connect(audioContext.destination);
|
|
680
|
+
};
|
|
681
|
+
const stop = () => {
|
|
682
|
+
processorNode?.disconnect();
|
|
683
|
+
sourceNode?.disconnect();
|
|
684
|
+
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
685
|
+
audioContext?.close();
|
|
686
|
+
options.onLevel?.(0);
|
|
687
|
+
audioContext = null;
|
|
688
|
+
mediaStream = null;
|
|
689
|
+
processorNode = null;
|
|
690
|
+
sourceNode = null;
|
|
691
|
+
};
|
|
692
|
+
return { start, stop };
|
|
693
|
+
};
|
|
694
|
+
|
|
695
|
+
// src/audioConditioning.ts
|
|
696
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
697
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
698
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
699
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
700
|
+
var toInt16Array = (audio) => {
|
|
701
|
+
if (audio instanceof ArrayBuffer) {
|
|
702
|
+
return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
|
|
703
|
+
}
|
|
704
|
+
return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
|
|
705
|
+
};
|
|
706
|
+
var computeRms = (samples) => {
|
|
707
|
+
if (samples.length === 0) {
|
|
708
|
+
return 0;
|
|
709
|
+
}
|
|
710
|
+
let sumSquares = 0;
|
|
711
|
+
for (const sample of samples) {
|
|
712
|
+
const normalized = sample / 32768;
|
|
713
|
+
sumSquares += normalized * normalized;
|
|
714
|
+
}
|
|
715
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
716
|
+
};
|
|
717
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
718
|
+
if (!config || config.enabled === false) {
|
|
719
|
+
return;
|
|
720
|
+
}
|
|
721
|
+
return {
|
|
722
|
+
enabled: true,
|
|
723
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
724
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
725
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
726
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
727
|
+
};
|
|
728
|
+
};
|
|
729
|
+
var conditionAudioChunk = (audio, config) => {
|
|
730
|
+
if (!config) {
|
|
731
|
+
return audio;
|
|
732
|
+
}
|
|
733
|
+
const source = toInt16Array(audio);
|
|
734
|
+
if (source.length === 0) {
|
|
735
|
+
return audio;
|
|
736
|
+
}
|
|
737
|
+
const rms = computeRms(source);
|
|
738
|
+
const output = new Int16Array(source.length);
|
|
739
|
+
const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
|
|
740
|
+
const baseLevel = Math.max(rms * gateFactor, 0.000001);
|
|
741
|
+
const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
|
|
742
|
+
const appliedGain = Math.max(0.25, gain) * gateFactor;
|
|
743
|
+
for (let index = 0;index < source.length; index += 1) {
|
|
744
|
+
const next = Math.round(source[index] * appliedGain);
|
|
745
|
+
output[index] = Math.max(-32768, Math.min(32767, next));
|
|
746
|
+
}
|
|
747
|
+
return new Uint8Array(output.buffer);
|
|
748
|
+
};
|
|
749
|
+
|
|
750
|
+
// src/turnProfiles.ts
|
|
751
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
752
|
+
balanced: {
|
|
753
|
+
qualityProfile: "general",
|
|
754
|
+
silenceMs: 1400,
|
|
755
|
+
speechThreshold: 0.012,
|
|
756
|
+
transcriptStabilityMs: 1000
|
|
757
|
+
},
|
|
758
|
+
fast: {
|
|
759
|
+
qualityProfile: "general",
|
|
760
|
+
silenceMs: 700,
|
|
761
|
+
speechThreshold: 0.015,
|
|
762
|
+
transcriptStabilityMs: 450
|
|
763
|
+
},
|
|
764
|
+
"long-form": {
|
|
765
|
+
qualityProfile: "general",
|
|
766
|
+
silenceMs: 2200,
|
|
767
|
+
speechThreshold: 0.01,
|
|
768
|
+
transcriptStabilityMs: 1500
|
|
769
|
+
}
|
|
770
|
+
};
|
|
771
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
772
|
+
general: {},
|
|
773
|
+
"accent-heavy": {
|
|
774
|
+
silenceMs: 1200,
|
|
775
|
+
speechThreshold: 0.01,
|
|
776
|
+
transcriptStabilityMs: 1200
|
|
777
|
+
},
|
|
778
|
+
"noisy-room": {
|
|
779
|
+
silenceMs: 2000,
|
|
780
|
+
speechThreshold: 0.02,
|
|
781
|
+
transcriptStabilityMs: 1600
|
|
782
|
+
},
|
|
783
|
+
"short-command": {
|
|
784
|
+
silenceMs: 500,
|
|
785
|
+
speechThreshold: 0.016,
|
|
786
|
+
transcriptStabilityMs: 420
|
|
787
|
+
}
|
|
788
|
+
};
|
|
789
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
790
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
791
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
792
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
793
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
794
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
795
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
796
|
+
return {
|
|
797
|
+
profile,
|
|
798
|
+
qualityProfile,
|
|
799
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
800
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
801
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
802
|
+
};
|
|
803
|
+
};
|
|
804
|
+
|
|
805
|
+
// src/presets.ts
|
|
806
|
+
var PRESET_INPUTS = {
|
|
807
|
+
chat: {
|
|
808
|
+
audioConditioning: {
|
|
809
|
+
enabled: true,
|
|
810
|
+
maxGain: 2.5,
|
|
811
|
+
noiseGateAttenuation: 0,
|
|
812
|
+
noiseGateThreshold: 0.004,
|
|
813
|
+
targetLevel: 0.08
|
|
814
|
+
},
|
|
815
|
+
capture: {
|
|
816
|
+
channelCount: 1,
|
|
817
|
+
sampleRateHz: 16000
|
|
818
|
+
},
|
|
819
|
+
connection: {
|
|
820
|
+
maxReconnectAttempts: 10,
|
|
821
|
+
pingInterval: 30000,
|
|
822
|
+
reconnect: true
|
|
823
|
+
},
|
|
824
|
+
sttLifecycle: "continuous",
|
|
825
|
+
turnDetection: {
|
|
826
|
+
qualityProfile: "short-command",
|
|
827
|
+
profile: "balanced"
|
|
828
|
+
}
|
|
829
|
+
},
|
|
830
|
+
default: {
|
|
831
|
+
capture: {
|
|
832
|
+
channelCount: 1,
|
|
833
|
+
sampleRateHz: 16000
|
|
834
|
+
},
|
|
835
|
+
connection: {
|
|
836
|
+
maxReconnectAttempts: 10,
|
|
837
|
+
pingInterval: 30000,
|
|
838
|
+
reconnect: true
|
|
839
|
+
},
|
|
840
|
+
sttLifecycle: "continuous",
|
|
841
|
+
turnDetection: {
|
|
842
|
+
qualityProfile: "general",
|
|
843
|
+
profile: "fast"
|
|
844
|
+
}
|
|
845
|
+
},
|
|
846
|
+
dictation: {
|
|
847
|
+
audioConditioning: {
|
|
848
|
+
enabled: true,
|
|
849
|
+
maxGain: 2.25,
|
|
850
|
+
noiseGateAttenuation: 0.05,
|
|
851
|
+
noiseGateThreshold: 0.003,
|
|
852
|
+
targetLevel: 0.08
|
|
853
|
+
},
|
|
854
|
+
capture: {
|
|
855
|
+
channelCount: 1,
|
|
856
|
+
sampleRateHz: 16000
|
|
857
|
+
},
|
|
858
|
+
connection: {
|
|
859
|
+
maxReconnectAttempts: 12,
|
|
860
|
+
pingInterval: 30000,
|
|
861
|
+
reconnect: true
|
|
862
|
+
},
|
|
863
|
+
sttLifecycle: "continuous",
|
|
864
|
+
turnDetection: {
|
|
865
|
+
qualityProfile: "accent-heavy",
|
|
866
|
+
profile: "long-form"
|
|
867
|
+
}
|
|
868
|
+
},
|
|
869
|
+
"guided-intake": {
|
|
870
|
+
audioConditioning: {
|
|
871
|
+
enabled: true,
|
|
872
|
+
maxGain: 2.5,
|
|
873
|
+
noiseGateAttenuation: 0,
|
|
874
|
+
noiseGateThreshold: 0.004,
|
|
875
|
+
targetLevel: 0.08
|
|
876
|
+
},
|
|
877
|
+
capture: {
|
|
878
|
+
channelCount: 1,
|
|
879
|
+
sampleRateHz: 16000
|
|
880
|
+
},
|
|
881
|
+
connection: {
|
|
882
|
+
maxReconnectAttempts: 12,
|
|
883
|
+
pingInterval: 30000,
|
|
884
|
+
reconnect: true
|
|
885
|
+
},
|
|
886
|
+
sttLifecycle: "turn-scoped",
|
|
887
|
+
turnDetection: {
|
|
888
|
+
qualityProfile: "accent-heavy",
|
|
889
|
+
profile: "long-form"
|
|
890
|
+
}
|
|
891
|
+
},
|
|
892
|
+
"noisy-room": {
|
|
893
|
+
audioConditioning: {
|
|
894
|
+
enabled: true,
|
|
895
|
+
maxGain: 3,
|
|
896
|
+
noiseGateAttenuation: 0.12,
|
|
897
|
+
noiseGateThreshold: 0.006,
|
|
898
|
+
targetLevel: 0.085
|
|
899
|
+
},
|
|
900
|
+
capture: {
|
|
901
|
+
channelCount: 1,
|
|
902
|
+
sampleRateHz: 16000
|
|
903
|
+
},
|
|
904
|
+
connection: {
|
|
905
|
+
maxReconnectAttempts: 14,
|
|
906
|
+
pingInterval: 45000,
|
|
907
|
+
reconnect: true
|
|
908
|
+
},
|
|
909
|
+
sttLifecycle: "continuous",
|
|
910
|
+
turnDetection: {
|
|
911
|
+
qualityProfile: "noisy-room",
|
|
912
|
+
profile: "long-form",
|
|
913
|
+
silenceMs: 2100,
|
|
914
|
+
speechThreshold: 0.02,
|
|
915
|
+
transcriptStabilityMs: 1650
|
|
916
|
+
}
|
|
917
|
+
},
|
|
918
|
+
reliability: {
|
|
919
|
+
audioConditioning: {
|
|
920
|
+
enabled: true,
|
|
921
|
+
maxGain: 2.9,
|
|
922
|
+
noiseGateAttenuation: 0.08,
|
|
923
|
+
noiseGateThreshold: 0.005,
|
|
924
|
+
targetLevel: 0.08
|
|
925
|
+
},
|
|
926
|
+
capture: {
|
|
927
|
+
channelCount: 1,
|
|
928
|
+
sampleRateHz: 16000
|
|
929
|
+
},
|
|
930
|
+
connection: {
|
|
931
|
+
maxReconnectAttempts: 14,
|
|
932
|
+
pingInterval: 45000,
|
|
933
|
+
reconnect: true
|
|
934
|
+
},
|
|
935
|
+
sttLifecycle: "continuous",
|
|
936
|
+
turnDetection: {
|
|
937
|
+
qualityProfile: "noisy-room",
|
|
938
|
+
profile: "long-form"
|
|
939
|
+
}
|
|
940
|
+
}
|
|
941
|
+
};
|
|
942
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
943
|
+
const preset = PRESET_INPUTS[name];
|
|
944
|
+
return {
|
|
945
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
946
|
+
capture: {
|
|
947
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
948
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
949
|
+
},
|
|
950
|
+
connection: {
|
|
951
|
+
...preset.connection
|
|
952
|
+
},
|
|
953
|
+
name,
|
|
954
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
955
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
956
|
+
};
|
|
957
|
+
};
|
|
958
|
+
|
|
959
|
+
// src/client/controller.ts
|
|
960
|
+
var createInitialState2 = (stream) => ({
|
|
961
|
+
assistantTexts: [...stream.assistantTexts],
|
|
962
|
+
error: stream.error,
|
|
963
|
+
isConnected: stream.isConnected,
|
|
964
|
+
isRecording: false,
|
|
965
|
+
partial: stream.partial,
|
|
966
|
+
recordingError: null,
|
|
967
|
+
sessionId: stream.sessionId,
|
|
968
|
+
scenarioId: stream.scenarioId,
|
|
969
|
+
status: stream.status,
|
|
970
|
+
turns: [...stream.turns]
|
|
971
|
+
});
|
|
972
|
+
var createVoiceController = (path, options = {}) => {
|
|
973
|
+
const preset = resolveVoiceRuntimePreset(options.preset);
|
|
974
|
+
const stream = createVoiceStream(path, {
|
|
975
|
+
...preset.connection,
|
|
976
|
+
...options.connection
|
|
977
|
+
});
|
|
978
|
+
let capture = null;
|
|
979
|
+
let state = createInitialState2(stream);
|
|
980
|
+
const subscribers = new Set;
|
|
981
|
+
const notify = () => {
|
|
982
|
+
for (const subscriber of subscribers) {
|
|
983
|
+
subscriber();
|
|
984
|
+
}
|
|
985
|
+
};
|
|
986
|
+
const sync = () => {
|
|
987
|
+
state = {
|
|
988
|
+
...state,
|
|
989
|
+
assistantTexts: [...stream.assistantTexts],
|
|
990
|
+
error: stream.error,
|
|
991
|
+
isConnected: stream.isConnected,
|
|
992
|
+
partial: stream.partial,
|
|
993
|
+
sessionId: stream.sessionId,
|
|
994
|
+
scenarioId: stream.scenarioId,
|
|
995
|
+
status: stream.status,
|
|
996
|
+
turns: [...stream.turns]
|
|
997
|
+
};
|
|
998
|
+
if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
|
|
999
|
+
capture?.stop();
|
|
1000
|
+
capture = null;
|
|
1001
|
+
state = {
|
|
1002
|
+
...state,
|
|
1003
|
+
isRecording: false
|
|
1004
|
+
};
|
|
1005
|
+
}
|
|
1006
|
+
notify();
|
|
1007
|
+
};
|
|
1008
|
+
const unsubscribeStream = stream.subscribe(sync);
|
|
1009
|
+
sync();
|
|
1010
|
+
const ensureCapture = () => {
|
|
1011
|
+
if (capture) {
|
|
1012
|
+
return capture;
|
|
1013
|
+
}
|
|
1014
|
+
capture = createMicrophoneCapture({
|
|
1015
|
+
channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
|
|
1016
|
+
onLevel: options.capture?.onLevel,
|
|
1017
|
+
onAudio: (audio) => stream.sendAudio(audio),
|
|
1018
|
+
sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
|
|
1019
|
+
});
|
|
1020
|
+
return capture;
|
|
1021
|
+
};
|
|
1022
|
+
const stopRecording = () => {
|
|
1023
|
+
capture?.stop();
|
|
1024
|
+
capture = null;
|
|
1025
|
+
state = {
|
|
1026
|
+
...state,
|
|
1027
|
+
isRecording: false
|
|
1028
|
+
};
|
|
1029
|
+
notify();
|
|
1030
|
+
};
|
|
1031
|
+
const startRecording = async () => {
|
|
1032
|
+
if (state.isRecording) {
|
|
1033
|
+
return;
|
|
1034
|
+
}
|
|
1035
|
+
try {
|
|
1036
|
+
state = {
|
|
1037
|
+
...state,
|
|
1038
|
+
recordingError: null
|
|
1039
|
+
};
|
|
1040
|
+
notify();
|
|
1041
|
+
await ensureCapture().start();
|
|
1042
|
+
state = {
|
|
1043
|
+
...state,
|
|
1044
|
+
isRecording: true
|
|
1045
|
+
};
|
|
1046
|
+
notify();
|
|
1047
|
+
} catch (error) {
|
|
1048
|
+
capture = null;
|
|
1049
|
+
state = {
|
|
1050
|
+
...state,
|
|
1051
|
+
isRecording: false,
|
|
1052
|
+
recordingError: error instanceof Error ? error.message : String(error)
|
|
1053
|
+
};
|
|
1054
|
+
notify();
|
|
1055
|
+
throw error;
|
|
1056
|
+
}
|
|
1057
|
+
};
|
|
1058
|
+
const close = () => {
|
|
1059
|
+
unsubscribeStream();
|
|
1060
|
+
stopRecording();
|
|
1061
|
+
stream.close();
|
|
1062
|
+
};
|
|
1063
|
+
return {
|
|
1064
|
+
bindHTMX(bindingOptions) {
|
|
1065
|
+
return bindVoiceHTMX(stream, bindingOptions);
|
|
1066
|
+
},
|
|
1067
|
+
close,
|
|
1068
|
+
endTurn: () => stream.endTurn(),
|
|
1069
|
+
get error() {
|
|
1070
|
+
return state.error;
|
|
1071
|
+
},
|
|
1072
|
+
getServerSnapshot: () => state,
|
|
1073
|
+
getSnapshot: () => state,
|
|
1074
|
+
get isConnected() {
|
|
1075
|
+
return state.isConnected;
|
|
1076
|
+
},
|
|
1077
|
+
get isRecording() {
|
|
1078
|
+
return state.isRecording;
|
|
1079
|
+
},
|
|
1080
|
+
get partial() {
|
|
1081
|
+
return state.partial;
|
|
1082
|
+
},
|
|
1083
|
+
get recordingError() {
|
|
1084
|
+
return state.recordingError;
|
|
1085
|
+
},
|
|
1086
|
+
sendAudio: (audio) => stream.sendAudio(audio),
|
|
1087
|
+
get sessionId() {
|
|
1088
|
+
return state.sessionId;
|
|
1089
|
+
},
|
|
1090
|
+
get scenarioId() {
|
|
1091
|
+
return state.scenarioId;
|
|
1092
|
+
},
|
|
1093
|
+
startRecording,
|
|
1094
|
+
get status() {
|
|
1095
|
+
return state.status;
|
|
1096
|
+
},
|
|
1097
|
+
stopRecording,
|
|
1098
|
+
subscribe: (subscriber) => {
|
|
1099
|
+
subscribers.add(subscriber);
|
|
1100
|
+
return () => {
|
|
1101
|
+
subscribers.delete(subscriber);
|
|
1102
|
+
};
|
|
1103
|
+
},
|
|
1104
|
+
toggleRecording: async () => {
|
|
1105
|
+
if (state.isRecording) {
|
|
1106
|
+
stopRecording();
|
|
1107
|
+
return;
|
|
1108
|
+
}
|
|
1109
|
+
await startRecording();
|
|
1110
|
+
},
|
|
1111
|
+
get turns() {
|
|
1112
|
+
return state.turns;
|
|
1113
|
+
},
|
|
1114
|
+
get assistantTexts() {
|
|
1115
|
+
return state.assistantTexts;
|
|
1116
|
+
}
|
|
1117
|
+
};
|
|
1118
|
+
};
|
|
1119
|
+
|
|
1120
|
+
// src/react/useVoiceController.tsx
|
|
1121
|
+
var EMPTY_SNAPSHOT2 = {
|
|
1122
|
+
assistantTexts: [],
|
|
1123
|
+
error: null,
|
|
1124
|
+
isConnected: false,
|
|
1125
|
+
isRecording: false,
|
|
1126
|
+
partial: "",
|
|
1127
|
+
recordingError: null,
|
|
1128
|
+
sessionId: "",
|
|
1129
|
+
status: "idle",
|
|
1130
|
+
turns: []
|
|
1131
|
+
};
|
|
1132
|
+
var useVoiceController = (path, options = {}) => {
|
|
1133
|
+
const controllerRef = useRef2(null);
|
|
1134
|
+
if (!controllerRef.current) {
|
|
1135
|
+
controllerRef.current = createVoiceController(path, options);
|
|
1136
|
+
}
|
|
1137
|
+
const controller = controllerRef.current;
|
|
1138
|
+
useEffect2(() => () => controller.close(), [controller]);
|
|
1139
|
+
const snapshot = useSyncExternalStore2(controller.subscribe, controller.getSnapshot, controller.getServerSnapshot) ?? EMPTY_SNAPSHOT2;
|
|
1140
|
+
return {
|
|
1141
|
+
...snapshot,
|
|
1142
|
+
bindHTMX: controller.bindHTMX,
|
|
1143
|
+
close: () => controller.close(),
|
|
1144
|
+
endTurn: () => controller.endTurn(),
|
|
1145
|
+
sendAudio: (audio) => controller.sendAudio(audio),
|
|
1146
|
+
startRecording: () => controller.startRecording(),
|
|
1147
|
+
stopRecording: () => controller.stopRecording(),
|
|
1148
|
+
toggleRecording: () => controller.toggleRecording()
|
|
1149
|
+
};
|
|
1150
|
+
};
|
|
513
1151
|
export {
|
|
514
|
-
useVoiceStream
|
|
1152
|
+
useVoiceStream,
|
|
1153
|
+
useVoiceController
|
|
515
1154
|
};
|