@absolutejs/voice 0.0.22-beta.597 → 0.0.22-beta.598

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,20 @@
1
- import type { Transcript } from "./types";
1
+ import type { AudioFormat, Transcript } from "./types";
2
2
  export type VoiceSemanticTurnInput = {
3
3
  audioLevel?: number;
4
4
  lastFinalTranscript?: Transcript;
5
5
  partialText: string;
6
6
  silenceMs: number;
7
7
  transcripts: Transcript[];
8
+ /**
9
+ * The current turn's buffered user audio (PCM chunks, oldest→newest) and its
10
+ * format. Lets an AUDIO-based end-of-turn detector (e.g. a smart-turn / Whisper
11
+ * EOT model) judge completion from prosody — pitch, pace, trailing intonation —
12
+ * which a transcript-only judge fundamentally cannot see. Undefined when no
13
+ * audio was buffered for the turn (the runtime only stores chunks above the
14
+ * speech threshold).
15
+ */
16
+ turnAudio?: ReadonlyArray<Uint8Array>;
17
+ turnAudioFormat?: AudioFormat;
8
18
  };
9
19
  export type VoiceSemanticTurnVerdict = {
10
20
  confidence?: number;
package/dist/index.js CHANGED
@@ -4137,6 +4137,17 @@ var createVoiceSession = (options) => {
4137
4137
  pruneTurnAudio();
4138
4138
  return currentTurnAudio.map((audio) => audio.chunk);
4139
4139
  };
4140
+ const turnAudioInputFormat = recordingConfig?.userInputFormat ?? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT;
4141
+ const getTurnAudioForDetector = () => {
4142
+ if (!options.semanticTurnDetector || currentTurnAudio.length === 0) {
4143
+ return { turnAudio: undefined, turnAudioFormat: undefined };
4144
+ }
4145
+ const turnAudio = currentTurnAudio.map((audio) => {
4146
+ const c = audio.chunk;
4147
+ return c instanceof ArrayBuffer ? new Uint8Array(c) : new Uint8Array(c.buffer, c.byteOffset, c.byteLength);
4148
+ });
4149
+ return { turnAudio, turnAudioFormat: turnAudioInputFormat };
4150
+ };
4140
4151
  const clearSilenceTimer = () => {
4141
4152
  if (!silenceTimer) {
4142
4153
  return;
@@ -4476,7 +4487,8 @@ var createVoiceSession = (options) => {
4476
4487
  lastFinalTranscript: transcripts.at(-1),
4477
4488
  partialText,
4478
4489
  silenceMs,
4479
- transcripts
4490
+ transcripts,
4491
+ ...getTurnAudioForDetector()
4480
4492
  }));
4481
4493
  endOfTurn = verdict.endOfTurn;
4482
4494
  } catch {
@@ -5261,7 +5273,8 @@ var createVoiceSession = (options) => {
5261
5273
  lastFinalTranscript: transcript,
5262
5274
  partialText: session.currentTurn.partialText,
5263
5275
  silenceMs: session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : 0,
5264
- transcripts: session.currentTurn.transcripts
5276
+ transcripts: session.currentTurn.transcripts,
5277
+ ...getTurnAudioForDetector()
5265
5278
  }));
5266
5279
  if (verdict.endOfTurn) {
5267
5280
  clearSilenceTimer();
@@ -6364,6 +6364,17 @@ var createVoiceSession = (options) => {
6364
6364
  pruneTurnAudio();
6365
6365
  return currentTurnAudio.map((audio) => audio.chunk);
6366
6366
  };
6367
+ const turnAudioInputFormat = recordingConfig?.userInputFormat ?? options.realtimeInputFormat ?? DEFAULT_REALTIME_FORMAT;
6368
+ const getTurnAudioForDetector = () => {
6369
+ if (!options.semanticTurnDetector || currentTurnAudio.length === 0) {
6370
+ return { turnAudio: undefined, turnAudioFormat: undefined };
6371
+ }
6372
+ const turnAudio = currentTurnAudio.map((audio) => {
6373
+ const c = audio.chunk;
6374
+ return c instanceof ArrayBuffer ? new Uint8Array(c) : new Uint8Array(c.buffer, c.byteOffset, c.byteLength);
6375
+ });
6376
+ return { turnAudio, turnAudioFormat: turnAudioInputFormat };
6377
+ };
6367
6378
  const clearSilenceTimer = () => {
6368
6379
  if (!silenceTimer) {
6369
6380
  return;
@@ -6703,7 +6714,8 @@ var createVoiceSession = (options) => {
6703
6714
  lastFinalTranscript: transcripts.at(-1),
6704
6715
  partialText,
6705
6716
  silenceMs,
6706
- transcripts
6717
+ transcripts,
6718
+ ...getTurnAudioForDetector()
6707
6719
  }));
6708
6720
  endOfTurn = verdict.endOfTurn;
6709
6721
  } catch {
@@ -7488,7 +7500,8 @@ var createVoiceSession = (options) => {
7488
7500
  lastFinalTranscript: transcript,
7489
7501
  partialText: session.currentTurn.partialText,
7490
7502
  silenceMs: session.currentTurn.silenceStartedAt !== undefined ? Date.now() - session.currentTurn.silenceStartedAt : 0,
7491
- transcripts: session.currentTurn.transcripts
7503
+ transcripts: session.currentTurn.transcripts,
7504
+ ...getTurnAudioForDetector()
7492
7505
  }));
7493
7506
  if (verdict.endOfTurn) {
7494
7507
  clearSilenceTimer();
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.597",
3
+ "version": "0.0.22-beta.598",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",