@fonoster/autopilot 0.8.49 → 0.8.51
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/envs.js +1 -1
- package/dist/vad/createVad.js +4 -5
- package/dist/vadv5/SileroVadModel.js +4 -1
- package/dist/vadv5/createVad.js +7 -5
- package/package.json +7 -7
package/dist/envs.js
CHANGED
|
@@ -39,7 +39,7 @@ exports.KNOWLEDGE_BASE_ENABLED = e.KNOWLEDGE_BASE_ENABLED === "true";
|
|
|
39
39
|
exports.NODE_ENV = e.NODE_ENV || "production";
|
|
40
40
|
exports.UNSTRUCTURED_API_KEY = e.UNSTRUCTURED_API_KEY;
|
|
41
41
|
exports.UNSTRUCTURED_API_URL = e.UNSTRUCTURED_API_URL ?? "https://api.unstructuredapp.io/general/v0/general";
|
|
42
|
-
exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "
|
|
42
|
+
exports.SILERO_VAD_VERSION = e.SILERO_VAD_VERSION ?? "v5";
|
|
43
43
|
exports.CONVERSATION_PROVIDER = e.CONVERSATION_PROVIDER
|
|
44
44
|
? e.CONVERSATION_PROVIDER
|
|
45
45
|
: types_1.ConversationProvider.FILE;
|
package/dist/vad/createVad.js
CHANGED
|
@@ -59,7 +59,6 @@ const ort = __importStar(require("onnxruntime-node"));
|
|
|
59
59
|
const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
|
|
60
60
|
const SileroVadModel_1 = require("./SileroVadModel");
|
|
61
61
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
62
|
-
// Updated constants from v5
|
|
63
62
|
const FULL_FRAME_SIZE = 1600; // Equivalent to 100ms @ 16kHz
|
|
64
63
|
const FRAME_SIZE = 480; // Use last 30ms from the full frame for VAD processing
|
|
65
64
|
async function createVad(params) {
|
|
@@ -78,7 +77,6 @@ async function createVad(params) {
|
|
|
78
77
|
logger.silly("State reset -- audioBuffer cleared");
|
|
79
78
|
};
|
|
80
79
|
return async function process(chunk, callback) {
|
|
81
|
-
// Append new samples to the audio buffer
|
|
82
80
|
const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
|
|
83
81
|
audioBuffer.push(...float32Array);
|
|
84
82
|
// Process full frames from the buffer
|
|
@@ -96,11 +94,11 @@ async function createVad(params) {
|
|
|
96
94
|
framesSinceStateChange,
|
|
97
95
|
pendingSamples: audioBuffer.length
|
|
98
96
|
});
|
|
99
|
-
// Increment the frames counter
|
|
100
97
|
framesSinceStateChange++;
|
|
101
98
|
if (isSpeechActive) {
|
|
102
99
|
// If currently in speech, check if the score has dropped below the deactivation threshold
|
|
103
|
-
if (rawScore < deactivationThreshold &&
|
|
100
|
+
if (rawScore < deactivationThreshold &&
|
|
101
|
+
framesSinceStateChange >= debounceFrames) {
|
|
104
102
|
isSpeechActive = false;
|
|
105
103
|
callback("SPEECH_END");
|
|
106
104
|
silero.resetState(); // Reset VAD state after speech ends
|
|
@@ -110,7 +108,8 @@ async function createVad(params) {
|
|
|
110
108
|
}
|
|
111
109
|
else {
|
|
112
110
|
// If not currently in speech, check if the score exceeds the activation threshold
|
|
113
|
-
if (rawScore > activationThreshold &&
|
|
111
|
+
if (rawScore > activationThreshold &&
|
|
112
|
+
framesSinceStateChange >= debounceFrames) {
|
|
114
113
|
isSpeechActive = true;
|
|
115
114
|
framesSinceStateChange = 0;
|
|
116
115
|
callback("SPEECH_START");
|
|
@@ -54,7 +54,10 @@ class SileroVadModel {
|
|
|
54
54
|
this._state = getNewState(this.ort);
|
|
55
55
|
}
|
|
56
56
|
async process(audioFrame) {
|
|
57
|
-
const inputTensor = new this.ort.Tensor("float32", audioFrame, [
|
|
57
|
+
const inputTensor = new this.ort.Tensor("float32", audioFrame, [
|
|
58
|
+
1,
|
|
59
|
+
audioFrame.length
|
|
60
|
+
]);
|
|
58
61
|
const feeds = {
|
|
59
62
|
input: inputTensor,
|
|
60
63
|
state: this._state,
|
package/dist/vadv5/createVad.js
CHANGED
|
@@ -59,8 +59,8 @@ const SileroVadModel_1 = require("./SileroVadModel");
|
|
|
59
59
|
const logger_1 = require("@fonoster/logger");
|
|
60
60
|
const path_1 = require("path");
|
|
61
61
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
62
|
-
const FULL_FRAME_SIZE = 1024; //
|
|
63
|
-
const BUFFER_SIZE = 512; //
|
|
62
|
+
const FULL_FRAME_SIZE = 1024; // 64ms @ 16kHz
|
|
63
|
+
const BUFFER_SIZE = 512; // 32ms @ 16kHz
|
|
64
64
|
async function createVad(params) {
|
|
65
65
|
const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
|
|
66
66
|
const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad_v5.onnx");
|
|
@@ -88,7 +88,7 @@ async function createVad(params) {
|
|
|
88
88
|
// Use the last BUFFER_SIZE samples from the full frame.
|
|
89
89
|
const frame = fullFrame.slice(fullFrame.length - BUFFER_SIZE);
|
|
90
90
|
const result = await silero.process(new Float32Array(frame));
|
|
91
|
-
const rawScore = result.isSpeech;
|
|
91
|
+
const rawScore = result.isSpeech;
|
|
92
92
|
logger.silly("Frame processing", {
|
|
93
93
|
rawScore,
|
|
94
94
|
isSpeechActive,
|
|
@@ -98,7 +98,8 @@ async function createVad(params) {
|
|
|
98
98
|
framesSinceStateChange++;
|
|
99
99
|
if (isSpeechActive) {
|
|
100
100
|
// If already in speech, check if the score has dropped below deactivationThreshold
|
|
101
|
-
if (rawScore < deactivationThreshold &&
|
|
101
|
+
if (rawScore < deactivationThreshold &&
|
|
102
|
+
framesSinceStateChange >= debounceFrames) {
|
|
102
103
|
callback("SPEECH_END");
|
|
103
104
|
resetState();
|
|
104
105
|
logger.silly("Speech end detected", { rawScore });
|
|
@@ -107,7 +108,8 @@ async function createVad(params) {
|
|
|
107
108
|
}
|
|
108
109
|
else {
|
|
109
110
|
// If currently not speaking, check if the score is above activationThreshold
|
|
110
|
-
if (rawScore > activationThreshold &&
|
|
111
|
+
if (rawScore > activationThreshold &&
|
|
112
|
+
framesSinceStateChange >= debounceFrames) {
|
|
111
113
|
isSpeechActive = true;
|
|
112
114
|
framesSinceStateChange = 0;
|
|
113
115
|
callback("SPEECH_START");
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.51",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -33,11 +33,11 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@aws-sdk/client-s3": "^3.712.0",
|
|
36
|
-
"@fonoster/common": "^0.8.
|
|
37
|
-
"@fonoster/logger": "^0.8.
|
|
38
|
-
"@fonoster/sdk": "^0.8.
|
|
39
|
-
"@fonoster/types": "^0.8.
|
|
40
|
-
"@fonoster/voice": "^0.8.
|
|
36
|
+
"@fonoster/common": "^0.8.51",
|
|
37
|
+
"@fonoster/logger": "^0.8.51",
|
|
38
|
+
"@fonoster/sdk": "^0.8.51",
|
|
39
|
+
"@fonoster/types": "^0.8.51",
|
|
40
|
+
"@fonoster/voice": "^0.8.51",
|
|
41
41
|
"@langchain/community": "^0.3.19",
|
|
42
42
|
"@langchain/core": "^0.3.23",
|
|
43
43
|
"@langchain/groq": "^0.1.2",
|
|
@@ -55,5 +55,5 @@
|
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"typescript": "^5.5.4"
|
|
57
57
|
},
|
|
58
|
-
"gitHead": "
|
|
58
|
+
"gitHead": "67a0f500926304550c2698943ba6e7a7aa4b9f06"
|
|
59
59
|
}
|