@absolutejs/voice 0.0.19 → 0.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +387 -4
- package/dist/angular/index.d.ts +1 -0
- package/dist/angular/index.js +669 -3
- package/dist/angular/voice-controller.service.d.ts +21 -0
- package/dist/audioConditioning.d.ts +3 -0
- package/dist/client/actions.d.ts +7 -0
- package/dist/client/connection.d.ts +5 -0
- package/dist/client/controller.d.ts +2 -0
- package/dist/client/htmxBootstrap.js +576 -167
- package/dist/client/index.d.ts +1 -0
- package/dist/client/index.js +486 -3
- package/dist/client/microphone.d.ts +4 -2
- package/dist/correction.d.ts +16 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.js +1314 -283
- package/dist/presets.d.ts +13 -0
- package/dist/react/index.d.ts +1 -0
- package/dist/react/index.js +642 -3
- package/dist/react/useVoiceController.d.ts +20 -0
- package/dist/react/useVoiceStream.d.ts +1 -0
- package/dist/store.d.ts +2 -2
- package/dist/svelte/index.d.ts +1 -0
- package/dist/svelte/index.js +607 -3
- package/dist/testing/benchmark.d.ts +36 -0
- package/dist/testing/fixtures.d.ts +1 -0
- package/dist/testing/index.d.ts +2 -0
- package/dist/testing/index.js +1975 -4
- package/dist/testing/resilience.d.ts +20 -0
- package/dist/testing/sessionBenchmark.d.ts +126 -0
- package/dist/testing/stt.d.ts +1 -0
- package/dist/turnDetection.d.ts +5 -1
- package/dist/turnProfiles.d.ts +6 -0
- package/dist/types.d.ts +198 -8
- package/dist/vue/index.d.ts +1 -0
- package/dist/vue/index.js +660 -3
- package/dist/vue/useVoiceController.d.ts +19 -0
- package/fixtures/README.md +24 -0
- package/fixtures/manifest.json +127 -0
- package/fixtures/pcm/dialogue-three-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-three-mixed.pcm +0 -0
- package/fixtures/pcm/dialogue-two-clean.pcm +0 -0
- package/fixtures/pcm/dialogue-two-noisy.pcm +0 -0
- package/fixtures/pcm/multiturn-three-mixed.pcm +0 -0
- package/fixtures/pcm/multiturn-two-clean.pcm +0 -0
- package/fixtures/pcm/stella-bulgaria-bulgarian20.pcm +0 -0
- package/fixtures/pcm/stella-jamaica-jamaican-creole-english1.pcm +0 -0
- package/fixtures/pcm/stella-liberia-liberian-pidgin-english2.pcm +0 -0
- package/fixtures/pcm/stella-sierra-leone-krio5.pcm +0 -0
- package/package.json +25 -1
package/dist/client/index.d.ts
CHANGED
package/dist/client/index.js
CHANGED
|
@@ -76,24 +76,30 @@ var WS_NORMAL_CLOSURE = 1000;
|
|
|
76
76
|
var DEFAULT_MAX_RECONNECT_ATTEMPTS = 10;
|
|
77
77
|
var DEFAULT_PING_INTERVAL = 30000;
|
|
78
78
|
var RECONNECT_DELAY_MS = 500;
|
|
79
|
+
var DEFAULT_SCENARIO_QUERY_PARAM = "scenarioId";
|
|
79
80
|
var noop = () => {};
|
|
80
81
|
var noopUnsubscribe = () => noop;
|
|
81
82
|
var NOOP_CONNECTION = {
|
|
83
|
+
start: () => {},
|
|
82
84
|
close: noop,
|
|
83
85
|
endTurn: noop,
|
|
84
86
|
getReadyState: () => WS_CLOSED,
|
|
87
|
+
getScenarioId: () => "",
|
|
85
88
|
getSessionId: () => "",
|
|
86
89
|
send: noop,
|
|
87
90
|
sendAudio: noop,
|
|
88
91
|
subscribe: noopUnsubscribe
|
|
89
92
|
};
|
|
90
93
|
var createSessionId = () => crypto.randomUUID();
|
|
91
|
-
var buildWsUrl = (path, sessionId) => {
|
|
94
|
+
var buildWsUrl = (path, sessionId, scenarioId) => {
|
|
92
95
|
const { hostname, port, protocol } = window.location;
|
|
93
96
|
const wsProtocol = protocol === "https:" ? "wss:" : "ws:";
|
|
94
97
|
const portSuffix = port ? `:${port}` : "";
|
|
95
98
|
const url = new URL(`${wsProtocol}//${hostname}${portSuffix}${path}`);
|
|
96
99
|
url.searchParams.set("sessionId", sessionId);
|
|
100
|
+
if (scenarioId) {
|
|
101
|
+
url.searchParams.set(DEFAULT_SCENARIO_QUERY_PARAM, scenarioId);
|
|
102
|
+
}
|
|
97
103
|
return url.toString();
|
|
98
104
|
};
|
|
99
105
|
var isVoiceServerMessage = (value) => {
|
|
@@ -136,6 +142,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
136
142
|
const state = {
|
|
137
143
|
isConnected: false,
|
|
138
144
|
pendingMessages: [],
|
|
145
|
+
scenarioId: options.scenarioId ?? null,
|
|
139
146
|
pingInterval: null,
|
|
140
147
|
reconnectAttempts: 0,
|
|
141
148
|
reconnectTimeout: null,
|
|
@@ -173,13 +180,14 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
173
180
|
}, RECONNECT_DELAY_MS);
|
|
174
181
|
};
|
|
175
182
|
const connect = () => {
|
|
176
|
-
const ws = new WebSocket(buildWsUrl(path, state.sessionId));
|
|
183
|
+
const ws = new WebSocket(buildWsUrl(path, state.sessionId, state.scenarioId));
|
|
177
184
|
ws.binaryType = "arraybuffer";
|
|
178
185
|
ws.onopen = () => {
|
|
179
186
|
state.isConnected = true;
|
|
180
187
|
state.reconnectAttempts = 0;
|
|
181
188
|
flushPendingMessages();
|
|
182
189
|
listeners.forEach((listener) => listener({
|
|
190
|
+
scenarioId: state.scenarioId ?? undefined,
|
|
183
191
|
sessionId: state.sessionId,
|
|
184
192
|
status: "active",
|
|
185
193
|
type: "session"
|
|
@@ -197,6 +205,7 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
197
205
|
}
|
|
198
206
|
if (parsed.type === "session") {
|
|
199
207
|
state.sessionId = parsed.sessionId;
|
|
208
|
+
state.scenarioId = parsed.scenarioId ?? state.scenarioId;
|
|
200
209
|
}
|
|
201
210
|
listeners.forEach((listener) => listener(parsed));
|
|
202
211
|
};
|
|
@@ -220,6 +229,19 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
220
229
|
const send = (message) => {
|
|
221
230
|
sendSerialized(JSON.stringify(message));
|
|
222
231
|
};
|
|
232
|
+
const start = (input = {}) => {
|
|
233
|
+
if (input.sessionId) {
|
|
234
|
+
state.sessionId = input.sessionId;
|
|
235
|
+
}
|
|
236
|
+
if (input.scenarioId) {
|
|
237
|
+
state.scenarioId = input.scenarioId;
|
|
238
|
+
}
|
|
239
|
+
send({
|
|
240
|
+
type: "start",
|
|
241
|
+
sessionId: state.sessionId,
|
|
242
|
+
scenarioId: state.scenarioId ?? undefined
|
|
243
|
+
});
|
|
244
|
+
};
|
|
223
245
|
const sendAudio = (audio) => {
|
|
224
246
|
sendSerialized(audio);
|
|
225
247
|
};
|
|
@@ -243,9 +265,11 @@ var createVoiceConnection = (path, options = {}) => {
|
|
|
243
265
|
};
|
|
244
266
|
connect();
|
|
245
267
|
return {
|
|
268
|
+
start,
|
|
246
269
|
close,
|
|
247
270
|
endTurn,
|
|
248
271
|
getReadyState: () => state.ws?.readyState ?? WS_CLOSED,
|
|
272
|
+
getScenarioId: () => state.scenarioId ?? "",
|
|
249
273
|
getSessionId: () => state.sessionId,
|
|
250
274
|
send,
|
|
251
275
|
sendAudio,
|
|
@@ -310,6 +334,7 @@ var serverMessageToAction = (message) => {
|
|
|
310
334
|
case "session":
|
|
311
335
|
return {
|
|
312
336
|
sessionId: message.sessionId,
|
|
337
|
+
scenarioId: message.scenarioId,
|
|
313
338
|
status: message.status,
|
|
314
339
|
type: "session"
|
|
315
340
|
};
|
|
@@ -328,6 +353,7 @@ var createInitialState = () => ({
|
|
|
328
353
|
assistantTexts: [],
|
|
329
354
|
error: null,
|
|
330
355
|
isConnected: false,
|
|
356
|
+
scenarioId: null,
|
|
331
357
|
partial: "",
|
|
332
358
|
sessionId: null,
|
|
333
359
|
status: "idle",
|
|
@@ -389,6 +415,7 @@ var createVoiceStreamStore = () => {
|
|
|
389
415
|
state = {
|
|
390
416
|
...state,
|
|
391
417
|
error: null,
|
|
418
|
+
scenarioId: action.scenarioId ?? state.scenarioId,
|
|
392
419
|
isConnected: action.status === "active",
|
|
393
420
|
sessionId: action.sessionId,
|
|
394
421
|
status: action.status
|
|
@@ -422,6 +449,12 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
422
449
|
const connection = createVoiceConnection(path, options);
|
|
423
450
|
const store = createVoiceStreamStore();
|
|
424
451
|
const subscribers = new Set;
|
|
452
|
+
const start = (input) => Promise.resolve().then(() => {
|
|
453
|
+
if (!input?.sessionId && !input?.scenarioId) {
|
|
454
|
+
return;
|
|
455
|
+
}
|
|
456
|
+
connection.start(input);
|
|
457
|
+
});
|
|
425
458
|
const notify = () => {
|
|
426
459
|
subscribers.forEach((subscriber) => subscriber());
|
|
427
460
|
};
|
|
@@ -454,6 +487,10 @@ var createVoiceStream = (path, options = {}) => {
|
|
|
454
487
|
get isConnected() {
|
|
455
488
|
return store.getSnapshot().isConnected;
|
|
456
489
|
},
|
|
490
|
+
get scenarioId() {
|
|
491
|
+
return store.getSnapshot().scenarioId;
|
|
492
|
+
},
|
|
493
|
+
start,
|
|
457
494
|
get partial() {
|
|
458
495
|
return store.getSnapshot().partial;
|
|
459
496
|
},
|
|
@@ -527,6 +564,7 @@ var bindVoiceHTMX = (stream, options) => {
|
|
|
527
564
|
unsubscribe();
|
|
528
565
|
};
|
|
529
566
|
};
|
|
567
|
+
|
|
530
568
|
// src/client/microphone.ts
|
|
531
569
|
var clampSample = (value) => Math.max(-1, Math.min(1, value));
|
|
532
570
|
var floatTo16BitPCM = (input) => {
|
|
@@ -537,6 +575,22 @@ var floatTo16BitPCM = (input) => {
|
|
|
537
575
|
}
|
|
538
576
|
return new Uint8Array(output.buffer);
|
|
539
577
|
};
|
|
578
|
+
var getPcmLevel = (audio) => {
|
|
579
|
+
const bytes = audio instanceof Uint8Array ? audio : new Uint8Array(audio);
|
|
580
|
+
if (bytes.byteLength < 2) {
|
|
581
|
+
return 0;
|
|
582
|
+
}
|
|
583
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
584
|
+
if (samples.length === 0) {
|
|
585
|
+
return 0;
|
|
586
|
+
}
|
|
587
|
+
let sumSquares = 0;
|
|
588
|
+
for (const sample of samples) {
|
|
589
|
+
const normalized = sample / 32768;
|
|
590
|
+
sumSquares += normalized * normalized;
|
|
591
|
+
}
|
|
592
|
+
return Math.min(1, Math.max(0, Math.sqrt(sumSquares / samples.length) * 5.5));
|
|
593
|
+
};
|
|
540
594
|
var downsampleBuffer = (input, sourceRate, targetRate) => {
|
|
541
595
|
if (sourceRate === targetRate) {
|
|
542
596
|
return input;
|
|
@@ -584,7 +638,9 @@ var createMicrophoneCapture = (options) => {
|
|
|
584
638
|
processorNode.onaudioprocess = (event) => {
|
|
585
639
|
const channel = event.inputBuffer.getChannelData(0);
|
|
586
640
|
const downsampled = downsampleBuffer(channel, audioContext?.sampleRate ?? 48000, options.sampleRateHz ?? 16000);
|
|
587
|
-
|
|
641
|
+
const pcm = floatTo16BitPCM(downsampled);
|
|
642
|
+
options.onLevel?.(getPcmLevel(pcm));
|
|
643
|
+
options.onAudio(pcm);
|
|
588
644
|
};
|
|
589
645
|
sourceNode.connect(processorNode);
|
|
590
646
|
processorNode.connect(audioContext.destination);
|
|
@@ -594,6 +650,7 @@ var createMicrophoneCapture = (options) => {
|
|
|
594
650
|
sourceNode?.disconnect();
|
|
595
651
|
mediaStream?.getTracks().forEach((track) => track.stop());
|
|
596
652
|
audioContext?.close();
|
|
653
|
+
options.onLevel?.(0);
|
|
597
654
|
audioContext = null;
|
|
598
655
|
mediaStream = null;
|
|
599
656
|
processorNode = null;
|
|
@@ -601,8 +658,434 @@ var createMicrophoneCapture = (options) => {
|
|
|
601
658
|
};
|
|
602
659
|
return { start, stop };
|
|
603
660
|
};
|
|
661
|
+
|
|
662
|
+
// src/audioConditioning.ts
|
|
663
|
+
var DEFAULT_TARGET_LEVEL = 0.08;
|
|
664
|
+
var DEFAULT_MAX_GAIN = 3;
|
|
665
|
+
var DEFAULT_NOISE_GATE_THRESHOLD = 0.006;
|
|
666
|
+
var DEFAULT_NOISE_GATE_ATTENUATION = 0.15;
|
|
667
|
+
var toInt16Array = (audio) => {
|
|
668
|
+
if (audio instanceof ArrayBuffer) {
|
|
669
|
+
return new Int16Array(audio, 0, Math.floor(audio.byteLength / 2));
|
|
670
|
+
}
|
|
671
|
+
return new Int16Array(audio.buffer, audio.byteOffset, Math.floor(audio.byteLength / 2));
|
|
672
|
+
};
|
|
673
|
+
var computeRms = (samples) => {
|
|
674
|
+
if (samples.length === 0) {
|
|
675
|
+
return 0;
|
|
676
|
+
}
|
|
677
|
+
let sumSquares = 0;
|
|
678
|
+
for (const sample of samples) {
|
|
679
|
+
const normalized = sample / 32768;
|
|
680
|
+
sumSquares += normalized * normalized;
|
|
681
|
+
}
|
|
682
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
683
|
+
};
|
|
684
|
+
var resolveAudioConditioningConfig = (config) => {
|
|
685
|
+
if (!config || config.enabled === false) {
|
|
686
|
+
return;
|
|
687
|
+
}
|
|
688
|
+
return {
|
|
689
|
+
enabled: true,
|
|
690
|
+
maxGain: config.maxGain ?? DEFAULT_MAX_GAIN,
|
|
691
|
+
noiseGateAttenuation: config.noiseGateAttenuation ?? DEFAULT_NOISE_GATE_ATTENUATION,
|
|
692
|
+
noiseGateThreshold: config.noiseGateThreshold ?? DEFAULT_NOISE_GATE_THRESHOLD,
|
|
693
|
+
targetLevel: config.targetLevel ?? DEFAULT_TARGET_LEVEL
|
|
694
|
+
};
|
|
695
|
+
};
|
|
696
|
+
var conditionAudioChunk = (audio, config) => {
|
|
697
|
+
if (!config) {
|
|
698
|
+
return audio;
|
|
699
|
+
}
|
|
700
|
+
const source = toInt16Array(audio);
|
|
701
|
+
if (source.length === 0) {
|
|
702
|
+
return audio;
|
|
703
|
+
}
|
|
704
|
+
const rms = computeRms(source);
|
|
705
|
+
const output = new Int16Array(source.length);
|
|
706
|
+
const gateFactor = rms < config.noiseGateThreshold ? config.noiseGateAttenuation : 1;
|
|
707
|
+
const baseLevel = Math.max(rms * gateFactor, 0.000001);
|
|
708
|
+
const gain = Math.min(config.maxGain, config.targetLevel / baseLevel);
|
|
709
|
+
const appliedGain = Math.max(0.25, gain) * gateFactor;
|
|
710
|
+
for (let index = 0;index < source.length; index += 1) {
|
|
711
|
+
const next = Math.round(source[index] * appliedGain);
|
|
712
|
+
output[index] = Math.max(-32768, Math.min(32767, next));
|
|
713
|
+
}
|
|
714
|
+
return new Uint8Array(output.buffer);
|
|
715
|
+
};
|
|
716
|
+
|
|
717
|
+
// src/turnProfiles.ts
|
|
718
|
+
var TURN_PROFILE_DEFAULTS = {
|
|
719
|
+
balanced: {
|
|
720
|
+
qualityProfile: "general",
|
|
721
|
+
silenceMs: 1400,
|
|
722
|
+
speechThreshold: 0.012,
|
|
723
|
+
transcriptStabilityMs: 1000
|
|
724
|
+
},
|
|
725
|
+
fast: {
|
|
726
|
+
qualityProfile: "general",
|
|
727
|
+
silenceMs: 700,
|
|
728
|
+
speechThreshold: 0.015,
|
|
729
|
+
transcriptStabilityMs: 450
|
|
730
|
+
},
|
|
731
|
+
"long-form": {
|
|
732
|
+
qualityProfile: "general",
|
|
733
|
+
silenceMs: 2200,
|
|
734
|
+
speechThreshold: 0.01,
|
|
735
|
+
transcriptStabilityMs: 1500
|
|
736
|
+
}
|
|
737
|
+
};
|
|
738
|
+
var QUALITY_PROFILE_DEFAULTS = {
|
|
739
|
+
general: {},
|
|
740
|
+
"accent-heavy": {
|
|
741
|
+
silenceMs: 1200,
|
|
742
|
+
speechThreshold: 0.01,
|
|
743
|
+
transcriptStabilityMs: 1200
|
|
744
|
+
},
|
|
745
|
+
"noisy-room": {
|
|
746
|
+
silenceMs: 2000,
|
|
747
|
+
speechThreshold: 0.02,
|
|
748
|
+
transcriptStabilityMs: 1600
|
|
749
|
+
},
|
|
750
|
+
"short-command": {
|
|
751
|
+
silenceMs: 500,
|
|
752
|
+
speechThreshold: 0.016,
|
|
753
|
+
transcriptStabilityMs: 420
|
|
754
|
+
}
|
|
755
|
+
};
|
|
756
|
+
var DEFAULT_TURN_PROFILE = "fast";
|
|
757
|
+
var DEFAULT_QUALITY_PROFILE = "general";
|
|
758
|
+
var resolveTurnDetectionConfig = (config) => {
|
|
759
|
+
const profile = config?.profile ?? DEFAULT_TURN_PROFILE;
|
|
760
|
+
const qualityProfile = config?.qualityProfile ?? DEFAULT_QUALITY_PROFILE;
|
|
761
|
+
const preset = TURN_PROFILE_DEFAULTS[profile];
|
|
762
|
+
const quality = QUALITY_PROFILE_DEFAULTS[qualityProfile];
|
|
763
|
+
return {
|
|
764
|
+
profile,
|
|
765
|
+
qualityProfile,
|
|
766
|
+
silenceMs: config?.silenceMs ?? quality.silenceMs ?? preset.silenceMs,
|
|
767
|
+
speechThreshold: config?.speechThreshold ?? quality.speechThreshold ?? preset.speechThreshold,
|
|
768
|
+
transcriptStabilityMs: config?.transcriptStabilityMs ?? quality.transcriptStabilityMs ?? preset.transcriptStabilityMs
|
|
769
|
+
};
|
|
770
|
+
};
|
|
771
|
+
|
|
772
|
+
// src/presets.ts
|
|
773
|
+
var PRESET_INPUTS = {
|
|
774
|
+
chat: {
|
|
775
|
+
audioConditioning: {
|
|
776
|
+
enabled: true,
|
|
777
|
+
maxGain: 2.5,
|
|
778
|
+
noiseGateAttenuation: 0,
|
|
779
|
+
noiseGateThreshold: 0.004,
|
|
780
|
+
targetLevel: 0.08
|
|
781
|
+
},
|
|
782
|
+
capture: {
|
|
783
|
+
channelCount: 1,
|
|
784
|
+
sampleRateHz: 16000
|
|
785
|
+
},
|
|
786
|
+
connection: {
|
|
787
|
+
maxReconnectAttempts: 10,
|
|
788
|
+
pingInterval: 30000,
|
|
789
|
+
reconnect: true
|
|
790
|
+
},
|
|
791
|
+
sttLifecycle: "continuous",
|
|
792
|
+
turnDetection: {
|
|
793
|
+
qualityProfile: "short-command",
|
|
794
|
+
profile: "balanced"
|
|
795
|
+
}
|
|
796
|
+
},
|
|
797
|
+
default: {
|
|
798
|
+
capture: {
|
|
799
|
+
channelCount: 1,
|
|
800
|
+
sampleRateHz: 16000
|
|
801
|
+
},
|
|
802
|
+
connection: {
|
|
803
|
+
maxReconnectAttempts: 10,
|
|
804
|
+
pingInterval: 30000,
|
|
805
|
+
reconnect: true
|
|
806
|
+
},
|
|
807
|
+
sttLifecycle: "continuous",
|
|
808
|
+
turnDetection: {
|
|
809
|
+
qualityProfile: "general",
|
|
810
|
+
profile: "fast"
|
|
811
|
+
}
|
|
812
|
+
},
|
|
813
|
+
dictation: {
|
|
814
|
+
audioConditioning: {
|
|
815
|
+
enabled: true,
|
|
816
|
+
maxGain: 2.25,
|
|
817
|
+
noiseGateAttenuation: 0.05,
|
|
818
|
+
noiseGateThreshold: 0.003,
|
|
819
|
+
targetLevel: 0.08
|
|
820
|
+
},
|
|
821
|
+
capture: {
|
|
822
|
+
channelCount: 1,
|
|
823
|
+
sampleRateHz: 16000
|
|
824
|
+
},
|
|
825
|
+
connection: {
|
|
826
|
+
maxReconnectAttempts: 12,
|
|
827
|
+
pingInterval: 30000,
|
|
828
|
+
reconnect: true
|
|
829
|
+
},
|
|
830
|
+
sttLifecycle: "continuous",
|
|
831
|
+
turnDetection: {
|
|
832
|
+
qualityProfile: "accent-heavy",
|
|
833
|
+
profile: "long-form"
|
|
834
|
+
}
|
|
835
|
+
},
|
|
836
|
+
"guided-intake": {
|
|
837
|
+
audioConditioning: {
|
|
838
|
+
enabled: true,
|
|
839
|
+
maxGain: 2.5,
|
|
840
|
+
noiseGateAttenuation: 0,
|
|
841
|
+
noiseGateThreshold: 0.004,
|
|
842
|
+
targetLevel: 0.08
|
|
843
|
+
},
|
|
844
|
+
capture: {
|
|
845
|
+
channelCount: 1,
|
|
846
|
+
sampleRateHz: 16000
|
|
847
|
+
},
|
|
848
|
+
connection: {
|
|
849
|
+
maxReconnectAttempts: 12,
|
|
850
|
+
pingInterval: 30000,
|
|
851
|
+
reconnect: true
|
|
852
|
+
},
|
|
853
|
+
sttLifecycle: "turn-scoped",
|
|
854
|
+
turnDetection: {
|
|
855
|
+
qualityProfile: "accent-heavy",
|
|
856
|
+
profile: "long-form"
|
|
857
|
+
}
|
|
858
|
+
},
|
|
859
|
+
"noisy-room": {
|
|
860
|
+
audioConditioning: {
|
|
861
|
+
enabled: true,
|
|
862
|
+
maxGain: 3,
|
|
863
|
+
noiseGateAttenuation: 0.12,
|
|
864
|
+
noiseGateThreshold: 0.006,
|
|
865
|
+
targetLevel: 0.085
|
|
866
|
+
},
|
|
867
|
+
capture: {
|
|
868
|
+
channelCount: 1,
|
|
869
|
+
sampleRateHz: 16000
|
|
870
|
+
},
|
|
871
|
+
connection: {
|
|
872
|
+
maxReconnectAttempts: 14,
|
|
873
|
+
pingInterval: 45000,
|
|
874
|
+
reconnect: true
|
|
875
|
+
},
|
|
876
|
+
sttLifecycle: "continuous",
|
|
877
|
+
turnDetection: {
|
|
878
|
+
qualityProfile: "noisy-room",
|
|
879
|
+
profile: "long-form",
|
|
880
|
+
silenceMs: 2100,
|
|
881
|
+
speechThreshold: 0.02,
|
|
882
|
+
transcriptStabilityMs: 1650
|
|
883
|
+
}
|
|
884
|
+
},
|
|
885
|
+
reliability: {
|
|
886
|
+
audioConditioning: {
|
|
887
|
+
enabled: true,
|
|
888
|
+
maxGain: 2.9,
|
|
889
|
+
noiseGateAttenuation: 0.08,
|
|
890
|
+
noiseGateThreshold: 0.005,
|
|
891
|
+
targetLevel: 0.08
|
|
892
|
+
},
|
|
893
|
+
capture: {
|
|
894
|
+
channelCount: 1,
|
|
895
|
+
sampleRateHz: 16000
|
|
896
|
+
},
|
|
897
|
+
connection: {
|
|
898
|
+
maxReconnectAttempts: 14,
|
|
899
|
+
pingInterval: 45000,
|
|
900
|
+
reconnect: true
|
|
901
|
+
},
|
|
902
|
+
sttLifecycle: "continuous",
|
|
903
|
+
turnDetection: {
|
|
904
|
+
qualityProfile: "noisy-room",
|
|
905
|
+
profile: "long-form"
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
};
|
|
909
|
+
var resolveVoiceRuntimePreset = (name = "default") => {
|
|
910
|
+
const preset = PRESET_INPUTS[name];
|
|
911
|
+
return {
|
|
912
|
+
audioConditioning: resolveAudioConditioningConfig(preset.audioConditioning),
|
|
913
|
+
capture: {
|
|
914
|
+
channelCount: preset.capture?.channelCount ?? 1,
|
|
915
|
+
sampleRateHz: preset.capture?.sampleRateHz ?? 16000
|
|
916
|
+
},
|
|
917
|
+
connection: {
|
|
918
|
+
...preset.connection
|
|
919
|
+
},
|
|
920
|
+
name,
|
|
921
|
+
sttLifecycle: preset.sttLifecycle ?? "continuous",
|
|
922
|
+
turnDetection: resolveTurnDetectionConfig(preset.turnDetection)
|
|
923
|
+
};
|
|
924
|
+
};
|
|
925
|
+
|
|
926
|
+
// src/client/controller.ts
|
|
927
|
+
var createInitialState2 = (stream) => ({
|
|
928
|
+
assistantTexts: [...stream.assistantTexts],
|
|
929
|
+
error: stream.error,
|
|
930
|
+
isConnected: stream.isConnected,
|
|
931
|
+
isRecording: false,
|
|
932
|
+
partial: stream.partial,
|
|
933
|
+
recordingError: null,
|
|
934
|
+
sessionId: stream.sessionId,
|
|
935
|
+
scenarioId: stream.scenarioId,
|
|
936
|
+
status: stream.status,
|
|
937
|
+
turns: [...stream.turns]
|
|
938
|
+
});
|
|
939
|
+
var createVoiceController = (path, options = {}) => {
|
|
940
|
+
const preset = resolveVoiceRuntimePreset(options.preset);
|
|
941
|
+
const stream = createVoiceStream(path, {
|
|
942
|
+
...preset.connection,
|
|
943
|
+
...options.connection
|
|
944
|
+
});
|
|
945
|
+
let capture = null;
|
|
946
|
+
let state = createInitialState2(stream);
|
|
947
|
+
const subscribers = new Set;
|
|
948
|
+
const notify = () => {
|
|
949
|
+
for (const subscriber of subscribers) {
|
|
950
|
+
subscriber();
|
|
951
|
+
}
|
|
952
|
+
};
|
|
953
|
+
const sync = () => {
|
|
954
|
+
state = {
|
|
955
|
+
...state,
|
|
956
|
+
assistantTexts: [...stream.assistantTexts],
|
|
957
|
+
error: stream.error,
|
|
958
|
+
isConnected: stream.isConnected,
|
|
959
|
+
partial: stream.partial,
|
|
960
|
+
sessionId: stream.sessionId,
|
|
961
|
+
scenarioId: stream.scenarioId,
|
|
962
|
+
status: stream.status,
|
|
963
|
+
turns: [...stream.turns]
|
|
964
|
+
};
|
|
965
|
+
if (options.autoStopOnComplete !== false && state.status === "completed" && state.isRecording) {
|
|
966
|
+
capture?.stop();
|
|
967
|
+
capture = null;
|
|
968
|
+
state = {
|
|
969
|
+
...state,
|
|
970
|
+
isRecording: false
|
|
971
|
+
};
|
|
972
|
+
}
|
|
973
|
+
notify();
|
|
974
|
+
};
|
|
975
|
+
const unsubscribeStream = stream.subscribe(sync);
|
|
976
|
+
sync();
|
|
977
|
+
const ensureCapture = () => {
|
|
978
|
+
if (capture) {
|
|
979
|
+
return capture;
|
|
980
|
+
}
|
|
981
|
+
capture = createMicrophoneCapture({
|
|
982
|
+
channelCount: options.capture?.channelCount ?? preset.capture.channelCount,
|
|
983
|
+
onLevel: options.capture?.onLevel,
|
|
984
|
+
onAudio: (audio) => stream.sendAudio(audio),
|
|
985
|
+
sampleRateHz: options.capture?.sampleRateHz ?? preset.capture.sampleRateHz
|
|
986
|
+
});
|
|
987
|
+
return capture;
|
|
988
|
+
};
|
|
989
|
+
const stopRecording = () => {
|
|
990
|
+
capture?.stop();
|
|
991
|
+
capture = null;
|
|
992
|
+
state = {
|
|
993
|
+
...state,
|
|
994
|
+
isRecording: false
|
|
995
|
+
};
|
|
996
|
+
notify();
|
|
997
|
+
};
|
|
998
|
+
const startRecording = async () => {
|
|
999
|
+
if (state.isRecording) {
|
|
1000
|
+
return;
|
|
1001
|
+
}
|
|
1002
|
+
try {
|
|
1003
|
+
state = {
|
|
1004
|
+
...state,
|
|
1005
|
+
recordingError: null
|
|
1006
|
+
};
|
|
1007
|
+
notify();
|
|
1008
|
+
await ensureCapture().start();
|
|
1009
|
+
state = {
|
|
1010
|
+
...state,
|
|
1011
|
+
isRecording: true
|
|
1012
|
+
};
|
|
1013
|
+
notify();
|
|
1014
|
+
} catch (error) {
|
|
1015
|
+
capture = null;
|
|
1016
|
+
state = {
|
|
1017
|
+
...state,
|
|
1018
|
+
isRecording: false,
|
|
1019
|
+
recordingError: error instanceof Error ? error.message : String(error)
|
|
1020
|
+
};
|
|
1021
|
+
notify();
|
|
1022
|
+
throw error;
|
|
1023
|
+
}
|
|
1024
|
+
};
|
|
1025
|
+
const close = () => {
|
|
1026
|
+
unsubscribeStream();
|
|
1027
|
+
stopRecording();
|
|
1028
|
+
stream.close();
|
|
1029
|
+
};
|
|
1030
|
+
return {
|
|
1031
|
+
bindHTMX(bindingOptions) {
|
|
1032
|
+
return bindVoiceHTMX(stream, bindingOptions);
|
|
1033
|
+
},
|
|
1034
|
+
close,
|
|
1035
|
+
endTurn: () => stream.endTurn(),
|
|
1036
|
+
get error() {
|
|
1037
|
+
return state.error;
|
|
1038
|
+
},
|
|
1039
|
+
getServerSnapshot: () => state,
|
|
1040
|
+
getSnapshot: () => state,
|
|
1041
|
+
get isConnected() {
|
|
1042
|
+
return state.isConnected;
|
|
1043
|
+
},
|
|
1044
|
+
get isRecording() {
|
|
1045
|
+
return state.isRecording;
|
|
1046
|
+
},
|
|
1047
|
+
get partial() {
|
|
1048
|
+
return state.partial;
|
|
1049
|
+
},
|
|
1050
|
+
get recordingError() {
|
|
1051
|
+
return state.recordingError;
|
|
1052
|
+
},
|
|
1053
|
+
sendAudio: (audio) => stream.sendAudio(audio),
|
|
1054
|
+
get sessionId() {
|
|
1055
|
+
return state.sessionId;
|
|
1056
|
+
},
|
|
1057
|
+
get scenarioId() {
|
|
1058
|
+
return state.scenarioId;
|
|
1059
|
+
},
|
|
1060
|
+
startRecording,
|
|
1061
|
+
get status() {
|
|
1062
|
+
return state.status;
|
|
1063
|
+
},
|
|
1064
|
+
stopRecording,
|
|
1065
|
+
subscribe: (subscriber) => {
|
|
1066
|
+
subscribers.add(subscriber);
|
|
1067
|
+
return () => {
|
|
1068
|
+
subscribers.delete(subscriber);
|
|
1069
|
+
};
|
|
1070
|
+
},
|
|
1071
|
+
toggleRecording: async () => {
|
|
1072
|
+
if (state.isRecording) {
|
|
1073
|
+
stopRecording();
|
|
1074
|
+
return;
|
|
1075
|
+
}
|
|
1076
|
+
await startRecording();
|
|
1077
|
+
},
|
|
1078
|
+
get turns() {
|
|
1079
|
+
return state.turns;
|
|
1080
|
+
},
|
|
1081
|
+
get assistantTexts() {
|
|
1082
|
+
return state.assistantTexts;
|
|
1083
|
+
}
|
|
1084
|
+
};
|
|
1085
|
+
};
|
|
604
1086
|
export {
|
|
605
1087
|
createVoiceStream,
|
|
1088
|
+
createVoiceController,
|
|
606
1089
|
createVoiceConnection,
|
|
607
1090
|
createMicrophoneCapture,
|
|
608
1091
|
bindVoiceHTMX
|
|
@@ -1,7 +1,9 @@
|
|
|
1
|
+
import type { VoiceCaptureOptions } from '../types';
|
|
1
2
|
type MicrophoneCaptureOptions = {
|
|
2
|
-
channelCount?:
|
|
3
|
+
channelCount?: VoiceCaptureOptions['channelCount'];
|
|
4
|
+
onLevel?: VoiceCaptureOptions['onLevel'];
|
|
3
5
|
onAudio: (audio: Uint8Array) => void;
|
|
4
|
-
sampleRateHz?:
|
|
6
|
+
sampleRateHz?: VoiceCaptureOptions['sampleRateHz'];
|
|
5
7
|
};
|
|
6
8
|
type MicrophoneCapture = {
|
|
7
9
|
start: () => Promise<void>;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { VoicePhraseHint, VoiceTurnCorrectionHandler } from './types';
|
|
2
|
+
export type VoicePhraseHintCorrectionMatch = {
|
|
3
|
+
alias: string;
|
|
4
|
+
hint: VoicePhraseHint;
|
|
5
|
+
};
|
|
6
|
+
export type VoicePhraseHintCorrectionResult = {
|
|
7
|
+
changed: boolean;
|
|
8
|
+
matches: VoicePhraseHintCorrectionMatch[];
|
|
9
|
+
text: string;
|
|
10
|
+
};
|
|
11
|
+
export type VoicePhraseHintCorrectionOptions = {
|
|
12
|
+
provider?: string;
|
|
13
|
+
reason?: string;
|
|
14
|
+
};
|
|
15
|
+
export declare const applyPhraseHintCorrections: (text: string, phraseHints: VoicePhraseHint[]) => VoicePhraseHintCorrectionResult;
|
|
16
|
+
export declare const createPhraseHintCorrectionHandler: (options?: VoicePhraseHintCorrectionOptions) => VoiceTurnCorrectionHandler;
|
package/dist/index.d.ts
CHANGED
|
@@ -2,4 +2,8 @@ export { voice } from './plugin';
|
|
|
2
2
|
export { createVoiceMemoryStore } from './memoryStore';
|
|
3
3
|
export { createVoiceSession } from './session';
|
|
4
4
|
export { createId, createVoiceSessionRecord } from './store';
|
|
5
|
+
export { applyPhraseHintCorrections, createPhraseHintCorrectionHandler } from './correction';
|
|
6
|
+
export { conditionAudioChunk, resolveAudioConditioningConfig } from './audioConditioning';
|
|
7
|
+
export { resolveVoiceRuntimePreset } from './presets';
|
|
8
|
+
export { resolveTurnDetectionConfig, TURN_PROFILE_DEFAULTS } from './turnProfiles';
|
|
5
9
|
export * from './types';
|