@fonoster/autopilot 0.8.49 → 0.8.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/envs.js CHANGED
@@ -39,7 +39,7 @@ exports.KNOWLEDGE_BASE_ENABLED = e.KNOWLEDGE_BASE_ENABLED === "true";
39
39
  exports.NODE_ENV = e.NODE_ENV || "production";
40
40
  exports.UNSTRUCTURED_API_KEY = e.UNSTRUCTURED_API_KEY;
41
41
  exports.UNSTRUCTURED_API_URL = e.UNSTRUCTURED_API_URL ?? "https://api.unstructuredapp.io/general/v0/general";
42
- exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "v4";
42
+ exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "v5";
43
43
  exports.CONVERSATION_PROVIDER = e.CONVERSATION_PROVIDER
44
44
  ? e.CONVERSATION_PROVIDER
45
45
  : types_1.ConversationProvider.FILE;
@@ -59,7 +59,6 @@ const ort = __importStar(require("onnxruntime-node"));
59
59
  const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
60
60
  const SileroVadModel_1 = require("./SileroVadModel");
61
61
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
62
- // Updated constants from v5
63
62
  const FULL_FRAME_SIZE = 1600; // Equivalent to 100ms @ 16kHz
64
63
  const FRAME_SIZE = 480; // Use last 30ms from the full frame for VAD processing
65
64
  async function createVad(params) {
@@ -78,7 +77,6 @@ async function createVad(params) {
78
77
  logger.silly("State reset -- audioBuffer cleared");
79
78
  };
80
79
  return async function process(chunk, callback) {
81
- // Append new samples to the audio buffer
82
80
  const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
83
81
  audioBuffer.push(...float32Array);
84
82
  // Process full frames from the buffer
@@ -96,11 +94,11 @@ async function createVad(params) {
96
94
  framesSinceStateChange,
97
95
  pendingSamples: audioBuffer.length
98
96
  });
99
- // Increment the frames counter
100
97
  framesSinceStateChange++;
101
98
  if (isSpeechActive) {
102
99
  // If currently in speech, check if the score has dropped below the deactivation threshold
103
- if (rawScore < deactivationThreshold && framesSinceStateChange >= debounceFrames) {
100
+ if (rawScore < deactivationThreshold &&
101
+ framesSinceStateChange >= debounceFrames) {
104
102
  isSpeechActive = false;
105
103
  callback("SPEECH_END");
106
104
  silero.resetState(); // Reset VAD state after speech ends
@@ -110,7 +108,8 @@ async function createVad(params) {
110
108
  }
111
109
  else {
112
110
  // If not currently in speech, check if the score exceeds the activation threshold
113
- if (rawScore > activationThreshold && framesSinceStateChange >= debounceFrames) {
111
+ if (rawScore > activationThreshold &&
112
+ framesSinceStateChange >= debounceFrames) {
114
113
  isSpeechActive = true;
115
114
  framesSinceStateChange = 0;
116
115
  callback("SPEECH_START");
@@ -54,7 +54,10 @@ class SileroVadModel {
54
54
  this._state = getNewState(this.ort);
55
55
  }
56
56
  async process(audioFrame) {
57
- const inputTensor = new this.ort.Tensor("float32", audioFrame, [1, audioFrame.length]);
57
+ const inputTensor = new this.ort.Tensor("float32", audioFrame, [
58
+ 1,
59
+ audioFrame.length
60
+ ]);
58
61
  const feeds = {
59
62
  input: inputTensor,
60
63
  state: this._state,
@@ -59,8 +59,8 @@ const SileroVadModel_1 = require("./SileroVadModel");
59
59
  const logger_1 = require("@fonoster/logger");
60
60
  const path_1 = require("path");
61
61
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
62
- const FULL_FRAME_SIZE = 1024; // 100ms @ 16kHz
63
- const BUFFER_SIZE = 512; // 30ms @ 16kHz
62
+ const FULL_FRAME_SIZE = 1024; // 64ms @ 16kHz
63
+ const BUFFER_SIZE = 512; // 32ms @ 16kHz
64
64
  async function createVad(params) {
65
65
  const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
66
66
  const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad_v5.onnx");
@@ -88,7 +88,7 @@ async function createVad(params) {
88
88
  // Use the last BUFFER_SIZE samples from the full frame.
89
89
  const frame = fullFrame.slice(fullFrame.length - BUFFER_SIZE);
90
90
  const result = await silero.process(new Float32Array(frame));
91
- const rawScore = result.isSpeech; // rawScore is assumed to be between 0 and 1
91
+ const rawScore = result.isSpeech;
92
92
  logger.silly("Frame processing", {
93
93
  rawScore,
94
94
  isSpeechActive,
@@ -98,7 +98,8 @@ async function createVad(params) {
98
98
  framesSinceStateChange++;
99
99
  if (isSpeechActive) {
100
100
  // If already in speech, check if the score has dropped below deactivationThreshold
101
- if (rawScore < deactivationThreshold && framesSinceStateChange >= debounceFrames) {
101
+ if (rawScore < deactivationThreshold &&
102
+ framesSinceStateChange >= debounceFrames) {
102
103
  callback("SPEECH_END");
103
104
  resetState();
104
105
  logger.silly("Speech end detected", { rawScore });
@@ -107,7 +108,8 @@ async function createVad(params) {
107
108
  }
108
109
  else {
109
110
  // If currently not speaking, check if the score is above activationThreshold
110
- if (rawScore > activationThreshold && framesSinceStateChange >= debounceFrames) {
111
+ if (rawScore > activationThreshold &&
112
+ framesSinceStateChange >= debounceFrames) {
111
113
  isSpeechActive = true;
112
114
  framesSinceStateChange = 0;
113
115
  callback("SPEECH_START");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/autopilot",
3
- "version": "0.8.49",
3
+ "version": "0.8.50",
4
4
  "description": "Voice AI for the Fonoster platform",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -33,11 +33,11 @@
33
33
  },
34
34
  "dependencies": {
35
35
  "@aws-sdk/client-s3": "^3.712.0",
36
- "@fonoster/common": "^0.8.49",
36
+ "@fonoster/common": "^0.8.50",
37
37
  "@fonoster/logger": "^0.8.47",
38
- "@fonoster/sdk": "^0.8.49",
38
+ "@fonoster/sdk": "^0.8.50",
39
39
  "@fonoster/types": "^0.8.47",
40
- "@fonoster/voice": "^0.8.49",
40
+ "@fonoster/voice": "^0.8.50",
41
41
  "@langchain/community": "^0.3.19",
42
42
  "@langchain/core": "^0.3.23",
43
43
  "@langchain/groq": "^0.1.2",
@@ -55,5 +55,5 @@
55
55
  "devDependencies": {
56
56
  "typescript": "^5.5.4"
57
57
  },
58
- "gitHead": "d4aa82f7926c0f451d2f580c59ab812c74a7f579"
58
+ "gitHead": "d0e373668e8e77295e3847c99a346c4aa1c8d3d7"
59
59
  }