npm - @fonoster/autopilot - Versions diffs - 0.7.19 → 0.7.21 - Mend

@fonoster/autopilot 0.7.19 → 0.7.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +1 -1
package/dist/Autopilot.d.ts +1 -0
package/dist/Autopilot.js +31 -16
package/dist/assistants/AssistantSchema.d.ts +8 -0
package/dist/assistants/AssistantSchema.js +1 -0
package/dist/handleVoiceRequest.js +0 -2
package/dist/machine/machine.d.ts +11 -1
package/dist/machine/machine.js +13 -0
package/dist/machine/types.d.ts +1 -0
package/dist/models/openai/types.d.ts +3 -1
package/dist/models/openai/types.js +2 -0
package/dist/types.d.ts +0 -2
package/dist/vad/types.d.ts +3 -2
package/dist/vadWorker.d.ts +1 -0
package/dist/vadWorker.js +28 -0
package/package.json +2 -2

package/README.md CHANGED Viewed

@@ -88,7 +88,7 @@ The Autopilot supports multiple language model providers. The following is a lis
 | Provider   | Description                                                | Supported models
 |------------|------------------------------------------------------------|------------------------------------------------------------------------------|
-| OpenAI     | OpenAI provides various GPT models for conversational AI   | `gpt-4o`, `gpt-4o-mini`                                                      |
+| OpenAI     | OpenAI provides various GPT models for conversational AI   | `gpt-4o`, `gpt-4o-mini`, `gpt-3.5-turbo`, `gpt-4-turbo`                      |
 | Groq       | Groq offers high-performance AI models optimized for speed | `gemm-7b-it`, `llama3-groq-70b-8192-tool-use-preview`, `llama3-1-8b-instant` |
 | Ollama     | Self-hosted Ollama models                                  | `lama3.1`                                                                    |

package/dist/Autopilot.d.ts CHANGED Viewed

@@ -2,6 +2,7 @@ import { AutopilotParams } from "./types";
 declare class Autopilot {
     private params;
     private actor;
+    private vadWorker;
     constructor(params: AutopilotParams);
     start(): void;
     stop(): void;

package/dist/Autopilot.js CHANGED Viewed

@@ -1,4 +1,7 @@
 "use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.Autopilot = void 0;
 /*
@@ -19,6 +22,8 @@ exports.Autopilot = void 0;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+const path_1 = __importDefault(require("path"));
+const worker_threads_1 = require("worker_threads");
 const logger_1 = require("@fonoster/logger");
 const xstate_1 = require("xstate");
 const machine_1 = require("./machine/machine");
@@ -27,6 +32,10 @@ class Autopilot {
     constructor(params) {
         this.params = params;
         const { voice, languageModel, conversationSettings } = this.params;
+        const vadWorkerPath = path_1.default.resolve(__dirname, "../dist", "./vadWorker");
+        this.vadWorker = new worker_threads_1.Worker(vadWorkerPath, {
+            workerData: conversationSettings.vad
+        });
         this.actor = (0, xstate_1.createActor)(machine_1.machine, {
             input: {
                 conversationSettings,
@@ -42,30 +51,36 @@ class Autopilot {
         });
         this.setupVoiceStream();
         this.setupSpeechGathering();
+        this.vadWorker.on("error", (err) => {
+            logger.error("vad worker error", err);
+        });
+        this.vadWorker.on("exit", (code) => {
+            if (code !== 0) {
+                logger.error("vad worker stopped with exit code", { code });
+            }
+        });
     }
     stop() {
         logger.verbose("stopping autopilot");
         this.actor.stop();
+        this.vadWorker.terminate();
     }
     async setupVoiceStream() {
-        const { voice, vad } = this.params;
+        const { voice } = this.params;
         const stream = await voice.stream();
-        stream.onData(this.handleVoicePayload(vad));
+        stream.onData(this.handleVoicePayload.bind(this));
+        this.vadWorker.on("message", (event) => {
+            logger.verbose("received speech event from vad", { event });
+            this.actor.send({ type: event });
+        });
     }
-    handleVoicePayload(vad) {
-        return (chunk) => {
-            try {
-                vad.processChunk(chunk, (event) => {
-                    if (["SPEECH_START", "SPEECH_END"].includes(event)) {
-                        logger.verbose("received speech event", { event });
-                        this.actor.send({ type: event });
-                    }
-                });
-            }
-            catch (err) {
-                logger.error("an error occurred while processing vad", err);
-            }
-        };
+    handleVoicePayload(chunk) {
+        try {
+            this.vadWorker.postMessage(chunk);
+        }
+        catch (err) {
+            logger.error("an error occurred while processing vad", err);
+        }
     }
     async setupSpeechGathering() {
         const { voice } = this.params;

package/dist/assistants/AssistantSchema.d.ts CHANGED Viewed

@@ -6,6 +6,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
     goodbyeMessage: z.ZodString;
     systemErrorMessage: z.ZodString;
     initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
+    maxSpeechWaitTimeout: z.ZodNumber;
     transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
         phoneNumber: z.ZodString;
         message: z.ZodString;
@@ -53,6 +54,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
     systemTemplate: string;
     goodbyeMessage: string;
     systemErrorMessage: string;
+    maxSpeechWaitTimeout: number;
     vad: {
         activationThreshold: number;
         deactivationThreshold: number;
@@ -75,6 +77,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
     systemTemplate: string;
     goodbyeMessage: string;
     systemErrorMessage: string;
+    maxSpeechWaitTimeout: number;
     vad: {
         activationThreshold: number;
         deactivationThreshold: number;
@@ -295,6 +298,7 @@ declare const assistantSchema: z.ZodObject<{
         goodbyeMessage: z.ZodString;
         systemErrorMessage: z.ZodString;
         initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
+        maxSpeechWaitTimeout: z.ZodNumber;
         transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
             phoneNumber: z.ZodString;
             message: z.ZodString;
@@ -342,6 +346,7 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
         vad: {
             activationThreshold: number;
             deactivationThreshold: number;
@@ -364,6 +369,7 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
         vad: {
             activationThreshold: number;
             deactivationThreshold: number;
@@ -583,6 +589,7 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
         vad: {
             activationThreshold: number;
             deactivationThreshold: number;
@@ -640,6 +647,7 @@ declare const assistantSchema: z.ZodObject<{
         systemTemplate: string;
         goodbyeMessage: string;
         systemErrorMessage: string;
+        maxSpeechWaitTimeout: number;
         vad: {
             activationThreshold: number;
             deactivationThreshold: number;

package/dist/assistants/AssistantSchema.js CHANGED Viewed

@@ -28,6 +28,7 @@ const conversationSettingsSchema = zod_1.z.object({
     goodbyeMessage: zod_1.z.string(),
     systemErrorMessage: zod_1.z.string(),
     initialDtmf: zod_1.z.string().optional().nullable(),
+    maxSpeechWaitTimeout: zod_1.z.number(),
     transferOptions: zod_1.z
         .object({
         phoneNumber: zod_1.z.string(),

package/dist/handleVoiceRequest.js CHANGED Viewed

@@ -60,7 +60,6 @@ async function handleVoiceRequest(req, res) {
     const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
     const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
     const voice = new _1.VoiceImpl(sessionRef, res);
-    const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
     const languageModel = (0, createLanguageModel_1.createLanguageModel)({
         voice,
         assistantConfig,
@@ -74,7 +73,6 @@ async function handleVoiceRequest(req, res) {
     const autopilot = new _1.default({
         conversationSettings: assistantConfig.conversationSettings,
         voice,
-        vad,
         languageModel
     });
     autopilot.start();

package/dist/machine/machine.d.ts CHANGED Viewed

@@ -79,7 +79,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
         type: "isNotSpeaking";
         params: unknown;
     };
-}>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
+}>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
     conversationSettings: ConversationSettings;
     languageModel: LanguageModel;
     voice: Voice;
@@ -157,6 +157,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
         idleTimeout: number;
         maxIdleTimeoutCount: number;
         idleTimeoutCount: number;
+        maxSpeechWaitTimeout: number;
         speechResponseStartTime: number;
         speechResponseTime: number;
         isSpeaking: false;
@@ -178,6 +179,10 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
                     readonly target: "waitingForUserRequest";
                     readonly description: "Event from VAD system.";
                 };
+                readonly SPEECH_RESULT: {
+                    readonly target: "waitingForUserRequest";
+                    readonly description: "Event from Speech to Text provider.";
+                };
             };
             readonly after: {
                 readonly IDLE_TIMEOUT: readonly [{
@@ -253,6 +258,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
                     };
                 }];
             };
+            readonly after: {
+                readonly MAX_SPEECH_WAIT_TIMEOUT: {
+                    readonly target: "processingUserRequest";
+                };
+            };
         };
         readonly processingUserRequest: {
             readonly on: {

package/dist/machine/machine.js CHANGED Viewed

@@ -124,6 +124,9 @@ const machine = (0, xstate_1.setup)({
     delays: {
         IDLE_TIMEOUT: ({ context }) => {
             return context.idleTimeout;
+        },
+        MAX_SPEECH_WAIT_TIMEOUT: ({ context }) => {
+            return context.maxSpeechWaitTimeout;
         }
     },
     actors: {
@@ -191,6 +194,7 @@ const machine = (0, xstate_1.setup)({
         idleTimeout: input.conversationSettings.idleOptions?.timeout || 10000,
         maxIdleTimeoutCount: input.conversationSettings.idleOptions?.maxTimeoutCount || 3,
         idleTimeoutCount: 0,
+        maxSpeechWaitTimeout: input.conversationSettings.maxSpeechWaitTimeout,
         speechResponseStartTime: 0,
         speechResponseTime: 0,
         isSpeaking: false
@@ -211,6 +215,10 @@ const machine = (0, xstate_1.setup)({
                 SPEECH_START: {
                     target: "waitingForUserRequest",
                     description: "Event from VAD system."
+                },
+                SPEECH_RESULT: {
+                    target: "waitingForUserRequest",
+                    description: "Event from Speech to Text provider."
                 }
             },
             after: {
@@ -303,6 +311,11 @@ const machine = (0, xstate_1.setup)({
                         }
                     }
                 ]
+            },
+            after: {
+                MAX_SPEECH_WAIT_TIMEOUT: {
+                    target: "processingUserRequest"
+                }
             }
         },
         processingUserRequest: {

package/dist/machine/types.d.ts CHANGED Viewed

@@ -13,6 +13,7 @@ type AutopilotContext = {
     idleTimeout: number;
     idleTimeoutCount: number;
     maxIdleTimeoutCount: number;
+    maxSpeechWaitTimeout: number;
     speechBuffer: string;
     speechResponseStartTime: number;
     speechResponseTime: number;

package/dist/models/openai/types.d.ts CHANGED Viewed

@@ -1,7 +1,9 @@
 import { BaseModelParams } from "../types";
 declare enum OpenAIModel {
     GPT_4O = "gpt-4o",
-    GPT_4O_MINI = "gpt-4o-mini"
+    GPT_4O_MINI = "gpt-4o-mini",
+    GPT_3_5_TURBO = "gpt-3.5-turbo",
+    GPT_4_TURBO = "gpt-4-turbo"
 }
 type OpenAIParams = BaseModelParams & {
     model: OpenAIModel;

package/dist/models/openai/types.js CHANGED Viewed

@@ -5,4 +5,6 @@ var OpenAIModel;
 (function (OpenAIModel) {
     OpenAIModel["GPT_4O"] = "gpt-4o";
     OpenAIModel["GPT_4O_MINI"] = "gpt-4o-mini";
+    OpenAIModel["GPT_3_5_TURBO"] = "gpt-3.5-turbo";
+    OpenAIModel["GPT_4_TURBO"] = "gpt-4-turbo";
 })(OpenAIModel || (exports.OpenAIModel = OpenAIModel = {}));

package/dist/types.d.ts CHANGED Viewed

@@ -1,6 +1,5 @@
 import { ConversationSettings } from "./assistants";
 import { LanguageModel } from "./models";
-import { Vad } from "./vad";
 import { Voice } from "./voice";
 declare enum LANGUAGE_MODEL_PROVIDER {
     OPENAI = "openai",
@@ -9,7 +8,6 @@ declare enum LANGUAGE_MODEL_PROVIDER {
 }
 type AutopilotParams = {
     voice: Voice;
-    vad: Vad;
     conversationSettings: ConversationSettings;
     languageModel: LanguageModel;
 };

package/dist/vad/types.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
+type VadEvent = "SPEECH_START" | "SPEECH_END";
 type Vad = {
-    processChunk: (chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => void;
+    processChunk: (chunk: Uint8Array, callback: (event: VadEvent) => void) => void;
 };
 type SpeechProbabilities = {
     notSpeech: number;
@@ -16,4 +17,4 @@ type ONNXRuntimeAPI = {
         new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
     };
 };
-export { ONNXRuntimeAPI, SpeechProbabilities, Vad };
+export { ONNXRuntimeAPI, SpeechProbabilities, Vad, VadEvent };

package/dist/vadWorker.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/dist/vadWorker.js ADDED Viewed

@@ -0,0 +1,28 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const worker_threads_1 = require("worker_threads");
+const SileroVad_1 = require("./vad/SileroVad");
+const vad = new SileroVad_1.SileroVad(worker_threads_1.workerData);
+worker_threads_1.parentPort?.on("message", (chunk) => {
+    vad.processChunk(chunk, (voiceActivity) => {
+        worker_threads_1.parentPort?.postMessage(voiceActivity);
+    });
+});

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/autopilot",
-  "version": "0.7.19",
+  "version": "0.7.21",
   "description": "Voice AI for the Fonoster platform",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -56,5 +56,5 @@
   "devDependencies": {
     "typescript": "^5.5.4"
   },
-  "gitHead": "5250aa76f6c4b72a3b26beabe71ea7a7c227d7c1"
+  "gitHead": "0de74ab45f5fe25b0f096ad02bab2be00be53d89"
 }