@absolutejs/voice 0.0.15 → 0.0.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +48 -6
- package/dist/turnDetection.d.ts +3 -1
- package/dist/types.d.ts +2 -0
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -236,6 +236,29 @@ var toVoiceSessionSummary = (session) => ({
|
|
|
236
236
|
|
|
237
237
|
// src/turnDetection.ts
|
|
238
238
|
var DEFAULT_SILENCE_MS = 700;
|
|
239
|
+
var DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
240
|
+
var toUint8Array = (audio) => {
|
|
241
|
+
if (audio instanceof ArrayBuffer) {
|
|
242
|
+
return new Uint8Array(audio);
|
|
243
|
+
}
|
|
244
|
+
return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
|
|
245
|
+
};
|
|
246
|
+
var measureAudioLevel = (audio) => {
|
|
247
|
+
const bytes = toUint8Array(audio);
|
|
248
|
+
if (bytes.byteLength < 2) {
|
|
249
|
+
return 0;
|
|
250
|
+
}
|
|
251
|
+
const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
|
|
252
|
+
if (samples.length === 0) {
|
|
253
|
+
return 0;
|
|
254
|
+
}
|
|
255
|
+
let sumSquares = 0;
|
|
256
|
+
for (const sample of samples) {
|
|
257
|
+
const normalized = sample / 32768;
|
|
258
|
+
sumSquares += normalized * normalized;
|
|
259
|
+
}
|
|
260
|
+
return Math.sqrt(sumSquares / samples.length);
|
|
261
|
+
};
|
|
239
262
|
var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
|
|
240
263
|
var mergeTranscriptTexts = (transcripts) => {
|
|
241
264
|
const merged = [];
|
|
@@ -293,11 +316,13 @@ var createVoiceSession = (options) => {
|
|
|
293
316
|
timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
|
|
294
317
|
};
|
|
295
318
|
const turnDetection = {
|
|
296
|
-
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS
|
|
319
|
+
silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
|
|
320
|
+
speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
|
|
297
321
|
};
|
|
298
322
|
let socket = options.socket;
|
|
299
323
|
let sttSession = null;
|
|
300
324
|
let silenceTimer = null;
|
|
325
|
+
let speechDetected = false;
|
|
301
326
|
const clearSilenceTimer = () => {
|
|
302
327
|
if (!silenceTimer) {
|
|
303
328
|
return;
|
|
@@ -339,7 +364,9 @@ var createVoiceSession = (options) => {
|
|
|
339
364
|
}
|
|
340
365
|
};
|
|
341
366
|
const scheduleSilenceCommit = () => {
|
|
342
|
-
|
|
367
|
+
if (silenceTimer) {
|
|
368
|
+
return;
|
|
369
|
+
}
|
|
343
370
|
silenceTimer = setTimeout(() => {
|
|
344
371
|
api.commitTurn("silence");
|
|
345
372
|
}, turnDetection.silenceMs);
|
|
@@ -370,7 +397,6 @@ var createVoiceSession = (options) => {
|
|
|
370
397
|
transcript,
|
|
371
398
|
type: "partial"
|
|
372
399
|
});
|
|
373
|
-
scheduleSilenceCommit();
|
|
374
400
|
};
|
|
375
401
|
const handleFinal = async (transcript) => {
|
|
376
402
|
await writeSession((session) => {
|
|
@@ -394,7 +420,6 @@ var createVoiceSession = (options) => {
|
|
|
394
420
|
transcript,
|
|
395
421
|
type: "final"
|
|
396
422
|
});
|
|
397
|
-
scheduleSilenceCommit();
|
|
398
423
|
};
|
|
399
424
|
const ensureAdapter = async () => {
|
|
400
425
|
if (sttSession) {
|
|
@@ -496,6 +521,7 @@ var createVoiceSession = (options) => {
|
|
|
496
521
|
currentSession.status = "active";
|
|
497
522
|
currentSession.turns = [...currentSession.turns, turn];
|
|
498
523
|
});
|
|
524
|
+
speechDetected = false;
|
|
499
525
|
logger.info("voice turn committed", {
|
|
500
526
|
reason,
|
|
501
527
|
sessionId: options.id,
|
|
@@ -529,6 +555,7 @@ var createVoiceSession = (options) => {
|
|
|
529
555
|
type: "complete"
|
|
530
556
|
});
|
|
531
557
|
await closeAdapter("complete");
|
|
558
|
+
speechDetected = false;
|
|
532
559
|
await options.route.onComplete({
|
|
533
560
|
api,
|
|
534
561
|
context: options.context,
|
|
@@ -599,6 +626,7 @@ var createVoiceSession = (options) => {
|
|
|
599
626
|
session.reconnect.lastDisconnectAt = Date.now();
|
|
600
627
|
session.status = "reconnecting";
|
|
601
628
|
});
|
|
629
|
+
speechDetected = false;
|
|
602
630
|
},
|
|
603
631
|
fail: async (error) => {
|
|
604
632
|
clearSilenceTimer();
|
|
@@ -613,6 +641,7 @@ var createVoiceSession = (options) => {
|
|
|
613
641
|
type: "error"
|
|
614
642
|
});
|
|
615
643
|
await closeAdapter("failed");
|
|
644
|
+
speechDetected = false;
|
|
616
645
|
await options.route.onError?.({
|
|
617
646
|
api,
|
|
618
647
|
context: options.context,
|
|
@@ -627,11 +656,22 @@ var createVoiceSession = (options) => {
|
|
|
627
656
|
return;
|
|
628
657
|
}
|
|
629
658
|
const adapter = await ensureAdapter();
|
|
659
|
+
const audioLevel = measureAudioLevel(audio);
|
|
630
660
|
await writeSession((currentSession) => {
|
|
631
661
|
currentSession.currentTurn.lastAudioAt = Date.now();
|
|
632
662
|
currentSession.lastActivityAt = Date.now();
|
|
633
663
|
currentSession.status = "active";
|
|
634
664
|
});
|
|
665
|
+
if (audioLevel >= turnDetection.speechThreshold) {
|
|
666
|
+
speechDetected = true;
|
|
667
|
+
clearSilenceTimer();
|
|
668
|
+
} else if (speechDetected) {
|
|
669
|
+
const currentSession = await readSession();
|
|
670
|
+
const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText));
|
|
671
|
+
if (hasTurnText) {
|
|
672
|
+
scheduleSilenceCommit();
|
|
673
|
+
}
|
|
674
|
+
}
|
|
635
675
|
await adapter.send(audio);
|
|
636
676
|
},
|
|
637
677
|
snapshot: async () => readSession()
|
|
@@ -800,7 +840,8 @@ var voice = (config) => {
|
|
|
800
840
|
store: config.session,
|
|
801
841
|
stt: config.stt,
|
|
802
842
|
turnDetection: {
|
|
803
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700
|
|
843
|
+
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
844
|
+
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
804
845
|
}
|
|
805
846
|
});
|
|
806
847
|
if (!current) {
|
|
@@ -835,7 +876,8 @@ var voice = (config) => {
|
|
|
835
876
|
store: config.session,
|
|
836
877
|
stt: config.stt,
|
|
837
878
|
turnDetection: {
|
|
838
|
-
silenceMs: config.turnDetection?.silenceMs ?? 700
|
|
879
|
+
silenceMs: config.turnDetection?.silenceMs ?? 700,
|
|
880
|
+
speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
|
|
839
881
|
}
|
|
840
882
|
});
|
|
841
883
|
runtime.activeSessions.set(sessionId, session);
|
package/dist/turnDetection.d.ts
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
-
import type { Transcript } from './types';
|
|
1
|
+
import type { AudioChunk, Transcript } from './types';
|
|
2
2
|
export declare const DEFAULT_SILENCE_MS = 700;
|
|
3
|
+
export declare const DEFAULT_SPEECH_THRESHOLD = 0.015;
|
|
4
|
+
export declare const measureAudioLevel: (audio: AudioChunk) => number;
|
|
3
5
|
export declare const buildTurnText: (transcripts: Transcript[], partialText: string) => string;
|
package/dist/types.d.ts
CHANGED
|
@@ -211,6 +211,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
|
|
|
211
211
|
reconnect?: VoiceReconnectConfig;
|
|
212
212
|
turnDetection?: {
|
|
213
213
|
silenceMs?: number;
|
|
214
|
+
speechThreshold?: number;
|
|
214
215
|
};
|
|
215
216
|
logger?: VoiceLogger;
|
|
216
217
|
htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
|
|
@@ -224,6 +225,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
|
|
|
224
225
|
reconnect: Required<VoiceReconnectConfig>;
|
|
225
226
|
turnDetection: {
|
|
226
227
|
silenceMs: number;
|
|
228
|
+
speechThreshold: number;
|
|
227
229
|
};
|
|
228
230
|
route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
|
|
229
231
|
logger?: VoiceLogger;
|