@absolutejs/voice 0.0.15 → 0.0.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -236,6 +236,29 @@ var toVoiceSessionSummary = (session) => ({
236
236
 
237
237
  // src/turnDetection.ts
238
238
  var DEFAULT_SILENCE_MS = 700;
239
+ var DEFAULT_SPEECH_THRESHOLD = 0.015;
240
+ var toUint8Array = (audio) => {
241
+ if (audio instanceof ArrayBuffer) {
242
+ return new Uint8Array(audio);
243
+ }
244
+ return new Uint8Array(audio.buffer, audio.byteOffset, audio.byteLength);
245
+ };
246
+ var measureAudioLevel = (audio) => {
247
+ const bytes = toUint8Array(audio);
248
+ if (bytes.byteLength < 2) {
249
+ return 0;
250
+ }
251
+ const samples = new Int16Array(bytes.buffer, bytes.byteOffset, Math.floor(bytes.byteLength / 2));
252
+ if (samples.length === 0) {
253
+ return 0;
254
+ }
255
+ let sumSquares = 0;
256
+ for (const sample of samples) {
257
+ const normalized = sample / 32768;
258
+ sumSquares += normalized * normalized;
259
+ }
260
+ return Math.sqrt(sumSquares / samples.length);
261
+ };
239
262
  var normalizeText = (value) => value.trim().replace(/\s+/g, " ");
240
263
  var mergeTranscriptTexts = (transcripts) => {
241
264
  const merged = [];
@@ -293,11 +316,13 @@ var createVoiceSession = (options) => {
293
316
  timeout: options.reconnect.timeout ?? DEFAULT_RECONNECT_TIMEOUT
294
317
  };
295
318
  const turnDetection = {
296
- silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS
319
+ silenceMs: options.turnDetection.silenceMs ?? DEFAULT_SILENCE_MS,
320
+ speechThreshold: options.turnDetection.speechThreshold ?? DEFAULT_SPEECH_THRESHOLD
297
321
  };
298
322
  let socket = options.socket;
299
323
  let sttSession = null;
300
324
  let silenceTimer = null;
325
+ let speechDetected = false;
301
326
  const clearSilenceTimer = () => {
302
327
  if (!silenceTimer) {
303
328
  return;
@@ -339,7 +364,9 @@ var createVoiceSession = (options) => {
339
364
  }
340
365
  };
341
366
  const scheduleSilenceCommit = () => {
342
- clearSilenceTimer();
367
+ if (silenceTimer) {
368
+ return;
369
+ }
343
370
  silenceTimer = setTimeout(() => {
344
371
  api.commitTurn("silence");
345
372
  }, turnDetection.silenceMs);
@@ -370,7 +397,6 @@ var createVoiceSession = (options) => {
370
397
  transcript,
371
398
  type: "partial"
372
399
  });
373
- scheduleSilenceCommit();
374
400
  };
375
401
  const handleFinal = async (transcript) => {
376
402
  await writeSession((session) => {
@@ -394,7 +420,6 @@ var createVoiceSession = (options) => {
394
420
  transcript,
395
421
  type: "final"
396
422
  });
397
- scheduleSilenceCommit();
398
423
  };
399
424
  const ensureAdapter = async () => {
400
425
  if (sttSession) {
@@ -496,6 +521,7 @@ var createVoiceSession = (options) => {
496
521
  currentSession.status = "active";
497
522
  currentSession.turns = [...currentSession.turns, turn];
498
523
  });
524
+ speechDetected = false;
499
525
  logger.info("voice turn committed", {
500
526
  reason,
501
527
  sessionId: options.id,
@@ -529,6 +555,7 @@ var createVoiceSession = (options) => {
529
555
  type: "complete"
530
556
  });
531
557
  await closeAdapter("complete");
558
+ speechDetected = false;
532
559
  await options.route.onComplete({
533
560
  api,
534
561
  context: options.context,
@@ -599,6 +626,7 @@ var createVoiceSession = (options) => {
599
626
  session.reconnect.lastDisconnectAt = Date.now();
600
627
  session.status = "reconnecting";
601
628
  });
629
+ speechDetected = false;
602
630
  },
603
631
  fail: async (error) => {
604
632
  clearSilenceTimer();
@@ -613,6 +641,7 @@ var createVoiceSession = (options) => {
613
641
  type: "error"
614
642
  });
615
643
  await closeAdapter("failed");
644
+ speechDetected = false;
616
645
  await options.route.onError?.({
617
646
  api,
618
647
  context: options.context,
@@ -627,11 +656,22 @@ var createVoiceSession = (options) => {
627
656
  return;
628
657
  }
629
658
  const adapter = await ensureAdapter();
659
+ const audioLevel = measureAudioLevel(audio);
630
660
  await writeSession((currentSession) => {
631
661
  currentSession.currentTurn.lastAudioAt = Date.now();
632
662
  currentSession.lastActivityAt = Date.now();
633
663
  currentSession.status = "active";
634
664
  });
665
+ if (audioLevel >= turnDetection.speechThreshold) {
666
+ speechDetected = true;
667
+ clearSilenceTimer();
668
+ } else if (speechDetected) {
669
+ const currentSession = await readSession();
670
+ const hasTurnText = Boolean(buildTurnText(currentSession.currentTurn.transcripts, currentSession.currentTurn.partialText));
671
+ if (hasTurnText) {
672
+ scheduleSilenceCommit();
673
+ }
674
+ }
635
675
  await adapter.send(audio);
636
676
  },
637
677
  snapshot: async () => readSession()
@@ -800,7 +840,8 @@ var voice = (config) => {
800
840
  store: config.session,
801
841
  stt: config.stt,
802
842
  turnDetection: {
803
- silenceMs: config.turnDetection?.silenceMs ?? 700
843
+ silenceMs: config.turnDetection?.silenceMs ?? 700,
844
+ speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
804
845
  }
805
846
  });
806
847
  if (!current) {
@@ -835,7 +876,8 @@ var voice = (config) => {
835
876
  store: config.session,
836
877
  stt: config.stt,
837
878
  turnDetection: {
838
- silenceMs: config.turnDetection?.silenceMs ?? 700
879
+ silenceMs: config.turnDetection?.silenceMs ?? 700,
880
+ speechThreshold: config.turnDetection?.speechThreshold ?? 0.015
839
881
  }
840
882
  });
841
883
  runtime.activeSessions.set(sessionId, session);
@@ -1,3 +1,5 @@
1
- import type { Transcript } from './types';
1
+ import type { AudioChunk, Transcript } from './types';
2
2
  export declare const DEFAULT_SILENCE_MS = 700;
3
+ export declare const DEFAULT_SPEECH_THRESHOLD = 0.015;
4
+ export declare const measureAudioLevel: (audio: AudioChunk) => number;
3
5
  export declare const buildTurnText: (transcripts: Transcript[], partialText: string) => string;
package/dist/types.d.ts CHANGED
@@ -211,6 +211,7 @@ export type VoicePluginConfig<TContext = unknown, TSession extends VoiceSessionR
211
211
  reconnect?: VoiceReconnectConfig;
212
212
  turnDetection?: {
213
213
  silenceMs?: number;
214
+ speechThreshold?: number;
214
215
  };
215
216
  logger?: VoiceLogger;
216
217
  htmx?: boolean | VoiceHTMXConfig<TSession, NoInfer<TResult>>;
@@ -224,6 +225,7 @@ export type CreateVoiceSessionOptions<TContext = unknown, TSession extends Voice
224
225
  reconnect: Required<VoiceReconnectConfig>;
225
226
  turnDetection: {
226
227
  silenceMs: number;
228
+ speechThreshold: number;
227
229
  };
228
230
  route: VoiceNormalizedRouteConfig<TContext, TSession, TResult>;
229
231
  logger?: VoiceLogger;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.15",
3
+ "version": "0.0.16",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",