@absolutejs/voice 0.0.22-beta.611 → 0.0.22-beta.613

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,6 +15,14 @@ export type VoiceBargeInVerdict = {
15
15
  shouldCancel: boolean;
16
16
  /** Diagnostic label, surfaced on the barge_in / barge_in_suppressed trace. */
17
17
  reason?: string;
18
+ /**
19
+ * The acoustic measurements the decision used, surfaced on the trace for
20
+ * tuning the thresholds against real audio. Omitted when no audio was judged.
21
+ */
22
+ metrics?: {
23
+ voicedMs: number;
24
+ rms: number;
25
+ };
18
26
  };
19
27
  export type VoiceBargeInDetector = {
20
28
  evaluate: (input: VoiceBargeInInput) => Promise<VoiceBargeInVerdict> | VoiceBargeInVerdict;
package/dist/index.js CHANGED
@@ -5287,12 +5287,17 @@ var createVoiceSession = (options) => {
5287
5287
  partialText: triggeringText,
5288
5288
  wordCount,
5289
5289
  ...getTurnAudioForDetector()
5290
- })) : { reason: undefined, shouldCancel: !isBackchannelByText };
5290
+ })) : {
5291
+ metrics: undefined,
5292
+ reason: undefined,
5293
+ shouldCancel: !isBackchannelByText
5294
+ };
5291
5295
  const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
5292
5296
  if (verdict.shouldCancel) {
5293
5297
  backchannelSuppressedAt = null;
5294
5298
  appendTurnLatencyStage({
5295
5299
  metadata: {
5300
+ ...verdict.metrics,
5296
5301
  partial: triggeringText.slice(0, 200),
5297
5302
  source: reason,
5298
5303
  wordCount
@@ -5305,6 +5310,7 @@ var createVoiceSession = (options) => {
5305
5310
  backchannelSuppressedAt = Date.now();
5306
5311
  appendTurnLatencyStage({
5307
5312
  metadata: {
5313
+ ...verdict.metrics,
5308
5314
  partial: triggeringText.slice(0, 200),
5309
5315
  reason,
5310
5316
  wordCount
@@ -42001,26 +42007,29 @@ var createRegexSemanticTurnDetector = (options) => {
42001
42007
  };
42002
42008
  };
42003
42009
  // src/core/bargeInDetector.ts
42010
+ var VOICED_FLOOR = 0.02;
42004
42011
  var measureTurnAudio = (chunks, format) => {
42005
42012
  const channels = format.channels ?? 1;
42006
42013
  const sampleRate = format.sampleRateHz ?? 16000;
42007
42014
  let sumSquares = 0;
42008
- let sampleCount = 0;
42015
+ let voicedSamples = 0;
42009
42016
  for (const chunk of chunks) {
42010
42017
  const usableBytes = chunk.byteLength - chunk.byteLength % 2;
42011
42018
  const view = new DataView(chunk.buffer, chunk.byteOffset, usableBytes);
42012
42019
  for (let offset = 0;offset < usableBytes; offset += 2) {
42013
42020
  const sample = view.getInt16(offset, true) / 32768;
42014
- sumSquares += sample * sample;
42015
- sampleCount += 1;
42021
+ if (Math.abs(sample) >= VOICED_FLOOR) {
42022
+ sumSquares += sample * sample;
42023
+ voicedSamples += 1;
42024
+ }
42016
42025
  }
42017
42026
  }
42018
- if (sampleCount === 0) {
42027
+ if (voicedSamples === 0) {
42019
42028
  return { durationMs: 0, rms: 0 };
42020
42029
  }
42021
42030
  return {
42022
- durationMs: sampleCount / channels / sampleRate * 1000,
42023
- rms: Math.sqrt(sumSquares / sampleCount)
42031
+ durationMs: voicedSamples / channels / sampleRate * 1000,
42032
+ rms: Math.sqrt(sumSquares / voicedSamples)
42024
42033
  };
42025
42034
  };
42026
42035
  var createAcousticBargeInDetector = (options = {}) => {
@@ -42034,19 +42043,20 @@ var createAcousticBargeInDetector = (options = {}) => {
42034
42043
  return input.isBackchannelByText ? { reason: "text_backchannel", shouldCancel: false } : { reason: "text_only", shouldCancel: true };
42035
42044
  }
42036
42045
  const { durationMs, rms } = measureTurnAudio(turnAudio, turnAudioFormat);
42046
+ const metrics = { rms, voicedMs: Math.round(durationMs) };
42037
42047
  if (durationMs >= sustainedMs) {
42038
- return { reason: "acoustic_sustained", shouldCancel: true };
42048
+ return { metrics, reason: "acoustic_sustained", shouldCancel: true };
42039
42049
  }
42040
42050
  if (input.isBackchannelByText) {
42041
- return { reason: "acoustic_backchannel", shouldCancel: false };
42051
+ return { metrics, reason: "acoustic_backchannel", shouldCancel: false };
42042
42052
  }
42043
42053
  if (rms >= emphaticRms) {
42044
- return { reason: "acoustic_emphatic", shouldCancel: true };
42054
+ return { metrics, reason: "acoustic_emphatic", shouldCancel: true };
42045
42055
  }
42046
42056
  if (rms <= noiseFloorRms) {
42047
- return { reason: "acoustic_noise_floor", shouldCancel: false };
42057
+ return { metrics, reason: "acoustic_noise_floor", shouldCancel: false };
42048
42058
  }
42049
- return { reason: "acoustic_ambiguous", shouldCancel: true };
42059
+ return { metrics, reason: "acoustic_ambiguous", shouldCancel: true };
42050
42060
  }
42051
42061
  };
42052
42062
  };
@@ -7514,12 +7514,17 @@ var createVoiceSession = (options) => {
7514
7514
  partialText: triggeringText,
7515
7515
  wordCount,
7516
7516
  ...getTurnAudioForDetector()
7517
- })) : { reason: undefined, shouldCancel: !isBackchannelByText };
7517
+ })) : {
7518
+ metrics: undefined,
7519
+ reason: undefined,
7520
+ shouldCancel: !isBackchannelByText
7521
+ };
7518
7522
  const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
7519
7523
  if (verdict.shouldCancel) {
7520
7524
  backchannelSuppressedAt = null;
7521
7525
  appendTurnLatencyStage({
7522
7526
  metadata: {
7527
+ ...verdict.metrics,
7523
7528
  partial: triggeringText.slice(0, 200),
7524
7529
  source: reason,
7525
7530
  wordCount
@@ -7532,6 +7537,7 @@ var createVoiceSession = (options) => {
7532
7537
  backchannelSuppressedAt = Date.now();
7533
7538
  appendTurnLatencyStage({
7534
7539
  metadata: {
7540
+ ...verdict.metrics,
7535
7541
  partial: triggeringText.slice(0, 200),
7536
7542
  reason,
7537
7543
  wordCount
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@absolutejs/voice",
3
- "version": "0.0.22-beta.611",
3
+ "version": "0.0.22-beta.613",
4
4
  "description": "Voice primitives and Elysia plugin for AbsoluteJS",
5
5
  "repository": {
6
6
  "type": "git",