npm - @absolutejs/voice - Versions diffs - 0.0.22-beta.611 → 0.0.22-beta.613 - Mend

@absolutejs/voice 0.0.22-beta.611 → 0.0.22-beta.613

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/core/bargeInDetector.d.ts +8 -0
package/dist/index.js +22 -12
package/dist/testing/index.js +7 -1
package/package.json +1 -1

package/dist/core/bargeInDetector.d.ts CHANGED Viewed

@@ -15,6 +15,14 @@ export type VoiceBargeInVerdict = {
     shouldCancel: boolean;
     /** Diagnostic label, surfaced on the barge_in / barge_in_suppressed trace. */
     reason?: string;
+    /**
+     * The acoustic measurements the decision used, surfaced on the trace for
+     * tuning the thresholds against real audio. Omitted when no audio was judged.
+     */
+    metrics?: {
+        voicedMs: number;
+        rms: number;
+    };
 };
 export type VoiceBargeInDetector = {
     evaluate: (input: VoiceBargeInInput) => Promise<VoiceBargeInVerdict> | VoiceBargeInVerdict;

package/dist/index.js CHANGED Viewed

@@ -5287,12 +5287,17 @@ var createVoiceSession = (options) => {
             partialText: triggeringText,
             wordCount,
             ...getTurnAudioForDetector()
-          })) : { reason: undefined, shouldCancel: !isBackchannelByText };
+          })) : {
+            metrics: undefined,
+            reason: undefined,
+            shouldCancel: !isBackchannelByText
+          };
           const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
           if (verdict.shouldCancel) {
             backchannelSuppressedAt = null;
             appendTurnLatencyStage({
               metadata: {
+                ...verdict.metrics,
                 partial: triggeringText.slice(0, 200),
                 source: reason,
                 wordCount
@@ -5305,6 +5310,7 @@ var createVoiceSession = (options) => {
             backchannelSuppressedAt = Date.now();
             appendTurnLatencyStage({
               metadata: {
+                ...verdict.metrics,
                 partial: triggeringText.slice(0, 200),
                 reason,
                 wordCount
@@ -42001,26 +42007,29 @@ var createRegexSemanticTurnDetector = (options) => {
   };
 };
 // src/core/bargeInDetector.ts
+var VOICED_FLOOR = 0.02;
 var measureTurnAudio = (chunks, format) => {
   const channels = format.channels ?? 1;
   const sampleRate = format.sampleRateHz ?? 16000;
   let sumSquares = 0;
-  let sampleCount = 0;
+  let voicedSamples = 0;
   for (const chunk of chunks) {
     const usableBytes = chunk.byteLength - chunk.byteLength % 2;
     const view = new DataView(chunk.buffer, chunk.byteOffset, usableBytes);
     for (let offset = 0;offset < usableBytes; offset += 2) {
       const sample = view.getInt16(offset, true) / 32768;
-      sumSquares += sample * sample;
-      sampleCount += 1;
+      if (Math.abs(sample) >= VOICED_FLOOR) {
+        sumSquares += sample * sample;
+        voicedSamples += 1;
+      }
     }
   }
-  if (sampleCount === 0) {
+  if (voicedSamples === 0) {
     return { durationMs: 0, rms: 0 };
   }
   return {
-    durationMs: sampleCount / channels / sampleRate * 1000,
-    rms: Math.sqrt(sumSquares / sampleCount)
+    durationMs: voicedSamples / channels / sampleRate * 1000,
+    rms: Math.sqrt(sumSquares / voicedSamples)
   };
 };
 var createAcousticBargeInDetector = (options = {}) => {
@@ -42034,19 +42043,20 @@ var createAcousticBargeInDetector = (options = {}) => {
         return input.isBackchannelByText ? { reason: "text_backchannel", shouldCancel: false } : { reason: "text_only", shouldCancel: true };
       }
       const { durationMs, rms } = measureTurnAudio(turnAudio, turnAudioFormat);
+      const metrics = { rms, voicedMs: Math.round(durationMs) };
       if (durationMs >= sustainedMs) {
-        return { reason: "acoustic_sustained", shouldCancel: true };
+        return { metrics, reason: "acoustic_sustained", shouldCancel: true };
       }
       if (input.isBackchannelByText) {
-        return { reason: "acoustic_backchannel", shouldCancel: false };
+        return { metrics, reason: "acoustic_backchannel", shouldCancel: false };
       }
       if (rms >= emphaticRms) {
-        return { reason: "acoustic_emphatic", shouldCancel: true };
+        return { metrics, reason: "acoustic_emphatic", shouldCancel: true };
       }
       if (rms <= noiseFloorRms) {
-        return { reason: "acoustic_noise_floor", shouldCancel: false };
+        return { metrics, reason: "acoustic_noise_floor", shouldCancel: false };
       }
-      return { reason: "acoustic_ambiguous", shouldCancel: true };
+      return { metrics, reason: "acoustic_ambiguous", shouldCancel: true };
     }
   };
 };

package/dist/testing/index.js CHANGED Viewed

@@ -7514,12 +7514,17 @@ var createVoiceSession = (options) => {
             partialText: triggeringText,
             wordCount,
             ...getTurnAudioForDetector()
-          })) : { reason: undefined, shouldCancel: !isBackchannelByText };
+          })) : {
+            metrics: undefined,
+            reason: undefined,
+            shouldCancel: !isBackchannelByText
+          };
           const reason = verdict.reason ?? (verdict.shouldCancel ? "stt_partial" : "backchannel");
           if (verdict.shouldCancel) {
             backchannelSuppressedAt = null;
             appendTurnLatencyStage({
               metadata: {
+                ...verdict.metrics,
                 partial: triggeringText.slice(0, 200),
                 source: reason,
                 wordCount
@@ -7532,6 +7537,7 @@ var createVoiceSession = (options) => {
             backchannelSuppressedAt = Date.now();
             appendTurnLatencyStage({
               metadata: {
+                ...verdict.metrics,
                 partial: triggeringText.slice(0, 200),
                 reason,
                 wordCount

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@absolutejs/voice",
-  "version": "0.0.22-beta.611",
+  "version": "0.0.22-beta.613",
   "description": "Voice primitives and Elysia plugin for AbsoluteJS",
   "repository": {
     "type": "git",