npm - @fonoster/autopilot - Versions diffs - 0.7.18 → 0.7.20 - Mend

@fonoster/autopilot 0.7.18 → 0.7.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +1 -1
package/dist/assistants/AssistantSchema.d.ts +76 -0
package/dist/assistants/AssistantSchema.js +8 -1
package/dist/handleVoiceRequest.js +1 -1
package/dist/machine/machine.d.ts +11 -1
package/dist/machine/machine.js +13 -0
package/dist/machine/types.d.ts +1 -0
package/dist/models/openai/types.d.ts +3 -1
package/dist/models/openai/types.js +2 -0
package/dist/vad/SileroVad.d.ts +11 -1
package/dist/vad/SileroVad.js +6 -2
package/dist/vad/makeVad.d.ts +6 -1
package/dist/vad/makeVad.js +18 -11
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -88,7 +88,7 @@ The Autopilot supports multiple language model providers. The following is a lis
 | Provider   | Description                                                | Supported models
 |------------|------------------------------------------------------------|------------------------------------------------------------------------------|
-| OpenAI     | OpenAI provides various GPT models for conversational AI   | `gpt-4o`, `gpt-4o-mini`                                                      |
+| OpenAI     | OpenAI provides various GPT models for conversational AI   | `gpt-4o`, `gpt-4o-mini`, `gpt-3.5-turbo`, `gpt-4-turbo`                      |
 | Groq       | Groq offers high-performance AI models optimized for speed | `gemm-7b-it`, `llama3-groq-70b-8192-tool-use-preview`, `llama3-1-8b-instant` |
 | Ollama     | Self-hosted Ollama models                                  | `lama3.1`                                                                    |

package/dist/assistants/AssistantSchema.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
     goodbyeMessage: z.ZodString;
     systemErrorMessage: z.ZodString;
     initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
+    maxSpeechWaitTimeout: z.ZodNumber;
     transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
         phoneNumber: z.ZodString;
         message: z.ZodString;
@@ -32,11 +33,34 @@ declare const conversationSettingsSchema: z.ZodObject<{
         timeout: number;
         maxTimeoutCount: number;
     }>>>;
+    vad: z.ZodObject<{
+        pathToModel: z.ZodOptional<z.ZodString>;
+        activationThreshold: z.ZodNumber;
+        deactivationThreshold: z.ZodNumber;
+        debounceFrames: z.ZodNumber;
+    }, "strip", z.ZodTypeAny, {
+        activationThreshold: number;
+        deactivationThreshold: number;
+        debounceFrames: number;
+        pathToModel?: string | undefined;
+    }, {
+        activationThreshold: number;
+        deactivationThreshold: number;
+        debounceFrames: number;
+        pathToModel?: string | undefined;
+    }>;
 }, "strip", z.ZodTypeAny, {
     firstMessage: string;
     systemTemplate: string;
     goodbyeMessage: string;
     systemErrorMessage: string;
+    maxSpeechWaitTimeout: number;
+    vad: {
+        activationThreshold: number;
+        deactivationThreshold: number;
+        debounceFrames: number;
+        pathToModel?: string | undefined;
+    };
     initialDtmf?: string | null | undefined;
     transferOptions?: {
         message: string;
@@ -53,6 +77,13 @@ declare const conversationSettingsSchema: z.ZodObject<{
     systemTemplate: string;
     goodbyeMessage: string;
     systemErrorMessage: string;
+    maxSpeechWaitTimeout: number;
+    vad: {
+        activationThreshold: number;
+        deactivationThreshold: number;
+        debounceFrames: number;
+        pathToModel?: string | undefined;
+    };
     initialDtmf?: string | null | undefined;
     transferOptions?: {
         message: string;
@@ -267,6 +298,7 @@ declare const assistantSchema: z.ZodObject<{
         goodbyeMessage: z.ZodString;
         systemErrorMessage: z.ZodString;
         initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
+        maxSpeechWaitTimeout: z.ZodNumber;
         transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
             phoneNumber: z.ZodString;
             message: z.ZodString;
@@ -293,11 +325,34 @@ declare const assistantSchema: z.ZodObject<{
             timeout: number;
             maxTimeoutCount: number;
         }>>>;
+        vad: z.ZodObject<{
+            pathToModel: z.ZodOptional<z.ZodString>;
+            activationThreshold: z.ZodNumber;
+            deactivationThreshold: z.ZodNumber;
+            debounceFrames: z.ZodNumber;
+        }, "strip", z.ZodTypeAny, {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        }, {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        }>;
     }, "strip", z.ZodTypeAny, {
         firstMessage: string;
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
+        vad: {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        };
         initialDtmf?: string | null | undefined;
         transferOptions?: {
             message: string;
@@ -314,6 +369,13 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
+        vad: {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        };
         initialDtmf?: string | null | undefined;
         transferOptions?: {
             message: string;
@@ -527,6 +589,13 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
+        vad: {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        };
         initialDtmf?: string | null | undefined;
         transferOptions?: {
             message: string;
@@ -578,6 +647,13 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
+        vad: {
+            activationThreshold: number;
+            deactivationThreshold: number;
+            debounceFrames: number;
+            pathToModel?: string | undefined;
+        };
         initialDtmf?: string | null | undefined;
         transferOptions?: {
             message: string;

package/dist/assistants/AssistantSchema.js CHANGED Viewed

@@ -28,6 +28,7 @@ const conversationSettingsSchema = zod_1.z.object({
     goodbyeMessage: zod_1.z.string(),
     systemErrorMessage: zod_1.z.string(),
     initialDtmf: zod_1.z.string().optional().nullable(),
+    maxSpeechWaitTimeout: zod_1.z.number(),
     transferOptions: zod_1.z
         .object({
         phoneNumber: zod_1.z.string(),
@@ -43,7 +44,13 @@ const conversationSettingsSchema = zod_1.z.object({
         maxTimeoutCount: zod_1.z.number()
     })
         .optional()
-        .nullable()
+        .nullable(),
+    vad: zod_1.z.object({
+        pathToModel: zod_1.z.string().optional(),
+        activationThreshold: zod_1.z.number(),
+        deactivationThreshold: zod_1.z.number(),
+        debounceFrames: zod_1.z.number()
+    })
 });
 exports.conversationSettingsSchema = conversationSettingsSchema;
 const languageModelConfigSchema = zod_1.z.object({

package/dist/handleVoiceRequest.js CHANGED Viewed

@@ -60,7 +60,7 @@ async function handleVoiceRequest(req, res) {
     const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
     const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
     const voice = new _1.VoiceImpl(sessionRef, res);
-    const vad = new _1.SileroVad();
+    const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
     const languageModel = (0, createLanguageModel_1.createLanguageModel)({
         voice,
         assistantConfig,

package/dist/machine/machine.d.ts CHANGED Viewed

@@ -79,7 +79,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
         type: "isNotSpeaking";
         params: unknown;
     };
-}>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
+}>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
     conversationSettings: ConversationSettings;
     languageModel: LanguageModel;
     voice: Voice;
@@ -157,6 +157,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
         idleTimeout: number;
         maxIdleTimeoutCount: number;
         idleTimeoutCount: number;
+        maxSpeechWaitTimeout: number;
         speechResponseStartTime: number;
         speechResponseTime: number;
         isSpeaking: false;
@@ -178,6 +179,10 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
                     readonly target: "waitingForUserRequest";
                     readonly description: "Event from VAD system.";
                 };
+                readonly SPEECH_RESULT: {
+                    readonly target: "waitingForUserRequest";
+                    readonly description: "Event from Speech to Text provider.";
+                };
             };
             readonly after: {
                 readonly IDLE_TIMEOUT: readonly [{
@@ -253,6 +258,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
                     };
                 }];
             };
+            readonly after: {
+                readonly MAX_SPEECH_WAIT_TIMEOUT: {
+                    readonly target: "processingUserRequest";
+                };
+            };
         };
         readonly processingUserRequest: {
             readonly on: {

package/dist/machine/machine.js CHANGED Viewed

@@ -124,6 +124,9 @@ const machine = (0, xstate_1.setup)({
     delays: {
         IDLE_TIMEOUT: ({ context }) => {
             return context.idleTimeout;
+        },
+        MAX_SPEECH_WAIT_TIMEOUT: ({ context }) => {
+            return context.maxSpeechWaitTimeout;
         }
     },
     actors: {
@@ -191,6 +194,7 @@ const machine = (0, xstate_1.setup)({
         idleTimeout: input.conversationSettings.idleOptions?.timeout || 10000,
         maxIdleTimeoutCount: input.conversationSettings.idleOptions?.maxTimeoutCount || 3,
         idleTimeoutCount: 0,
+        maxSpeechWaitTimeout: input.conversationSettings.maxSpeechWaitTimeout,
         speechResponseStartTime: 0,
         speechResponseTime: 0,
         isSpeaking: false
@@ -211,6 +215,10 @@ const machine = (0, xstate_1.setup)({
                 SPEECH_START: {
                     target: "waitingForUserRequest",
                     description: "Event from VAD system."
+                },
+                SPEECH_RESULT: {
+                    target: "waitingForUserRequest",
+                    description: "Event from Speech to Text provider."
                 }
             },
             after: {
@@ -303,6 +311,11 @@ const machine = (0, xstate_1.setup)({
                         }
                     }
                 ]
+            },
+            after: {
+                MAX_SPEECH_WAIT_TIMEOUT: {
+                    target: "processingUserRequest"
+                }
             }
         },
         processingUserRequest: {

package/dist/machine/types.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ type AutopilotContext = {
     idleTimeout: number;
     idleTimeoutCount: number;
     maxIdleTimeoutCount: number;
+    maxSpeechWaitTimeout: number;
     speechBuffer: string;
     speechResponseStartTime: number;
     speechResponseTime: number;

package/dist/models/openai/types.d.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 import { BaseModelParams } from "../types";
 declare enum OpenAIModel {
     GPT_4O = "gpt-4o",
-    GPT_4O_MINI = "gpt-4o-mini"
+    GPT_4O_MINI = "gpt-4o-mini",
+    GPT_3_5_TURBO = "gpt-3.5-turbo",
+    GPT_4_TURBO = "gpt-4-turbo"
 }
 type OpenAIParams = BaseModelParams & {
     model: OpenAIModel;

package/dist/models/openai/types.js CHANGED Viewed

@@ -5,4 +5,6 @@ var OpenAIModel;
 (function (OpenAIModel) {
     OpenAIModel["GPT_4O"] = "gpt-4o";
     OpenAIModel["GPT_4O_MINI"] = "gpt-4o-mini";
+    OpenAIModel["GPT_3_5_TURBO"] = "gpt-3.5-turbo";
+    OpenAIModel["GPT_4_TURBO"] = "gpt-4-turbo";
 })(OpenAIModel || (exports.OpenAIModel = OpenAIModel = {}));

package/dist/vad/SileroVad.d.ts CHANGED Viewed

@@ -1,7 +1,17 @@
 import { Vad } from "./types";
 declare class SileroVad implements Vad {
     private vad;
-    constructor();
+    private params;
+    constructor(params: {
+        pathToModel?: string;
+        activationThreshold: number;
+        deactivationThreshold: number;
+        debounceFrames: number;
+    });
+    pathToModel?: string;
+    activationThreshold: number;
+    deactivationThreshold: number;
+    debounceFrames: number;
     private init;
     processChunk(data: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void): void;
 }

package/dist/vad/SileroVad.js CHANGED Viewed

@@ -20,14 +20,18 @@ exports.SileroVad = void 0;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+const logger_1 = require("@fonoster/logger");
 const makeVad_1 = require("./makeVad");
+const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
 class SileroVad {
-    constructor() {
+    constructor(params) {
+        logger.verbose("starting instance of silero vad", { ...params });
+        this.params = params;
         this.init();
     }
     async init() {
         // FIXME: It feels strange to do this in the constructor
-        this.vad = await (0, makeVad_1.makeVad)();
+        this.vad = await (0, makeVad_1.makeVad)(this.params);
     }
     processChunk(data, callback) {
         if (!this.vad) {

package/dist/vad/makeVad.d.ts CHANGED Viewed

@@ -1,2 +1,7 @@
-declare function makeVad(pathToModel?: string): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
+declare function makeVad(params: {
+    pathToModel?: string;
+    activationThreshold: number;
+    deactivationThreshold: number;
+    debounceFrames: number;
+}): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
 export { makeVad };

package/dist/vad/makeVad.js CHANGED Viewed

@@ -50,11 +50,14 @@ const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
 const SileroVadModel_1 = require("./SileroVadModel");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
 const BUFFER_SIZE = 16000;
-async function makeVad(pathToModel) {
+async function makeVad(params) {
+    const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
     const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
     const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
     let audioBuffer = [];
     let isSpeechActive = false;
+    let consecutiveSpeechFrames = 0;
+    let consecutiveNonSpeechFrames = 0;
     return async function process(chunk, callback) {
         const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
         audioBuffer.push(...float32Array);
@@ -65,20 +68,24 @@ async function makeVad(pathToModel) {
             const remainingBuffer = buffer.slice(BUFFER_SIZE);
             const result = await silero.process(new Float32Array(audioFrame));
             logger.silly("last vad result", { ...result });
-            if (result.isSpeech > 0.5) {
-                if (!isSpeechActive) {
+            if (result.isSpeech > activationThreshold) {
+                consecutiveNonSpeechFrames = 0; // Reset non-speech counter
+                consecutiveSpeechFrames++;
+                if (consecutiveSpeechFrames >= debounceFrames && !isSpeechActive) {
                     isSpeechActive = true;
                     callback("SPEECH_START");
-                    return processBuffer(remainingBuffer);
                 }
             }
-            else if (isSpeechActive) {
-                isSpeechActive = false;
-                callback("SPEECH_END");
-                // WARNING: I'm unsure if this has any effect on the model
-                // but it seems to work fine to ensure the model works optimally
-                silero.resetState();
-                return processBuffer(remainingBuffer);
+            else {
+                consecutiveSpeechFrames = 0; // Reset speech counter
+                consecutiveNonSpeechFrames++;
+                if (consecutiveNonSpeechFrames >= debounceFrames &&
+                    isSpeechActive &&
+                    result.isSpeech < deactivationThreshold) {
+                    isSpeechActive = false;
+                    callback("SPEECH_END");
+                    silero.resetState(); // Reset VAD state after speech ends
+                }
             }
             return processBuffer(remainingBuffer);
         };

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/autopilot",
-  "version": "0.7.18",
+  "version": "0.7.20",
   "description": "Voice AI for the Fonoster platform",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -56,5 +56,5 @@
   "devDependencies": {
     "typescript": "^5.5.4"
   },
-  "gitHead": "4150dcb8086de182d0650df0c6d990ee76658058"
+  "gitHead": "bde17656cad787fd22127beaa5d8617e38be2eb6"
 }