npm - @fonoster/autopilot - Versions diffs - 0.8.49 → 0.8.50 - Mend

@fonoster/autopilot 0.8.49 → 0.8.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/envs.js +1 -1
package/dist/vad/createVad.js +4 -5
package/dist/vadv5/SileroVadModel.js +4 -1
package/dist/vadv5/createVad.js +7 -5
package/package.json +5 -5

package/dist/envs.js CHANGED Viewed

@@ -39,7 +39,7 @@ exports.KNOWLEDGE_BASE_ENABLED = e.KNOWLEDGE_BASE_ENABLED === "true";
 exports.NODE_ENV = e.NODE_ENV || "production";
 exports.UNSTRUCTURED_API_KEY = e.UNSTRUCTURED_API_KEY;
 exports.UNSTRUCTURED_API_URL = e.UNSTRUCTURED_API_URL ?? "https://api.unstructuredapp.io/general/v0/general";
-exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "v4";
+exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "v5";
 exports.CONVERSATION_PROVIDER = e.CONVERSATION_PROVIDER
     ? e.CONVERSATION_PROVIDER
     : types_1.ConversationProvider.FILE;

package/dist/vad/createVad.js CHANGED Viewed

@@ -59,7 +59,6 @@ const ort = __importStar(require("onnxruntime-node"));
 const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
 const SileroVadModel_1 = require("./SileroVadModel");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
-// Updated constants from v5
 const FULL_FRAME_SIZE = 1600; // Equivalent to 100ms @ 16kHz
 const FRAME_SIZE = 480; // Use last 30ms from the full frame for VAD processing
 async function createVad(params) {
@@ -78,7 +77,6 @@ async function createVad(params) {
         logger.silly("State reset -- audioBuffer cleared");
     };
     return async function process(chunk, callback) {
-        // Append new samples to the audio buffer
         const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
         audioBuffer.push(...float32Array);
         // Process full frames from the buffer
@@ -96,11 +94,11 @@ async function createVad(params) {
                 framesSinceStateChange,
                 pendingSamples: audioBuffer.length
             });
-            // Increment the frames counter
             framesSinceStateChange++;
             if (isSpeechActive) {
                 // If currently in speech, check if the score has dropped below the deactivation threshold
-                if (rawScore < deactivationThreshold && framesSinceStateChange >= debounceFrames) {
+                if (rawScore < deactivationThreshold &&
+                    framesSinceStateChange >= debounceFrames) {
                     isSpeechActive = false;
                     callback("SPEECH_END");
                     silero.resetState(); // Reset VAD state after speech ends
@@ -110,7 +108,8 @@ async function createVad(params) {
             }
             else {
                 // If not currently in speech, check if the score exceeds the activation threshold
-                if (rawScore > activationThreshold && framesSinceStateChange >= debounceFrames) {
+                if (rawScore > activationThreshold &&
+                    framesSinceStateChange >= debounceFrames) {
                     isSpeechActive = true;
                     framesSinceStateChange = 0;
                     callback("SPEECH_START");

package/dist/vadv5/SileroVadModel.js CHANGED Viewed

@@ -54,7 +54,10 @@ class SileroVadModel {
         this._state = getNewState(this.ort);
     }
     async process(audioFrame) {
-        const inputTensor = new this.ort.Tensor("float32", audioFrame, [1, audioFrame.length]);
+        const inputTensor = new this.ort.Tensor("float32", audioFrame, [
+            1,
+            audioFrame.length
+        ]);
         const feeds = {
             input: inputTensor,
             state: this._state,

package/dist/vadv5/createVad.js CHANGED Viewed

@@ -59,8 +59,8 @@ const SileroVadModel_1 = require("./SileroVadModel");
 const logger_1 = require("@fonoster/logger");
 const path_1 = require("path");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
-const FULL_FRAME_SIZE = 1024; // 100ms @ 16kHz
-const BUFFER_SIZE = 512; // 30ms @ 16kHz
+const FULL_FRAME_SIZE = 1024; // 64ms @ 16kHz
+const BUFFER_SIZE = 512; // 32ms @ 16kHz
 async function createVad(params) {
     const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
     const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad_v5.onnx");
@@ -88,7 +88,7 @@ async function createVad(params) {
             // Use the last BUFFER_SIZE samples from the full frame.
             const frame = fullFrame.slice(fullFrame.length - BUFFER_SIZE);
             const result = await silero.process(new Float32Array(frame));
-            const rawScore = result.isSpeech; // rawScore is assumed to be between 0 and 1
+            const rawScore = result.isSpeech;
             logger.silly("Frame processing", {
                 rawScore,
                 isSpeechActive,
@@ -98,7 +98,8 @@ async function createVad(params) {
             framesSinceStateChange++;
             if (isSpeechActive) {
                 // If already in speech, check if the score has dropped below deactivationThreshold
-                if (rawScore < deactivationThreshold && framesSinceStateChange >= debounceFrames) {
+                if (rawScore < deactivationThreshold &&
+                    framesSinceStateChange >= debounceFrames) {
                     callback("SPEECH_END");
                     resetState();
                     logger.silly("Speech end detected", { rawScore });
@@ -107,7 +108,8 @@ async function createVad(params) {
             }
             else {
                 // If currently not speaking, check if the score is above activationThreshold
-                if (rawScore > activationThreshold && framesSinceStateChange >= debounceFrames) {
+                if (rawScore > activationThreshold &&
+                    framesSinceStateChange >= debounceFrames) {
                     isSpeechActive = true;
                     framesSinceStateChange = 0;
                     callback("SPEECH_START");

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/autopilot",
-  "version": "0.8.49",
+  "version": "0.8.50",
   "description": "Voice AI for the Fonoster platform",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -33,11 +33,11 @@
   },
   "dependencies": {
     "@aws-sdk/client-s3": "^3.712.0",
-    "@fonoster/common": "^0.8.49",
+    "@fonoster/common": "^0.8.50",
     "@fonoster/logger": "^0.8.47",
-    "@fonoster/sdk": "^0.8.49",
+    "@fonoster/sdk": "^0.8.50",
     "@fonoster/types": "^0.8.47",
-    "@fonoster/voice": "^0.8.49",
+    "@fonoster/voice": "^0.8.50",
     "@langchain/community": "^0.3.19",
     "@langchain/core": "^0.3.23",
     "@langchain/groq": "^0.1.2",
@@ -55,5 +55,5 @@
   "devDependencies": {
     "typescript": "^5.5.4"
   },
-  "gitHead": "d4aa82f7926c0f451d2f580c59ab812c74a7f579"
+  "gitHead": "d0e373668e8e77295e3847c99a346c4aa1c8d3d7"
 }