npm - @fonoster/autopilot - Versions diffs - 0.7.0 → 0.7.3 - Mend

@fonoster/autopilot 0.7.0 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +1 -1
package/dist/Autopilot.d.ts +8 -0
package/dist/Autopilot.js +46 -18
package/dist/assistants/AssistantSchema.d.ts +25 -0
package/dist/assistants/AssistantSchema.js +32 -0
package/dist/assistants/index.d.ts +2 -0
package/dist/assistants/index.js +4 -1
package/dist/assistants/loadAndValidateAssistant.d.ts +3 -0
package/dist/assistants/loadAndValidateAssistant.js +46 -0
package/dist/assistants/types.d.ts +8 -4
package/dist/machine/machine.d.ts +75 -20
package/dist/machine/machine.js +77 -46
package/dist/machine/types.d.ts +9 -1
package/dist/runner.d.ts +2 -0
package/dist/{demo.js → runner.js} +10 -21
package/dist/types.d.ts +2 -9
package/dist/vad/SileroVadModel.d.ts +15 -0
package/dist/vad/SileroVadModel.js +65 -0
package/dist/vad/chunkToFloat32Array.d.ts +2 -0
package/dist/vad/chunkToFloat32Array.js +25 -0
package/dist/vad/index.d.ts +3 -0
package/dist/vad/index.js +38 -0
package/dist/vad/makeVad.d.ts +2 -0
package/dist/vad/makeVad.js +83 -0
package/dist/vad/micVadTest.js +48 -0
package/dist/vad/types.d.ts +17 -0
package/dist/vad/types.js +2 -0
package/package.json +13 -6
package/silero_vad.onnx +0 -0
package/dist/assistants/examples.d.ts +0 -10
package/dist/assistants/examples.js +0 -96
/package/dist/{demo.d.ts → vad/micVadTest.d.ts} +0 -0

package/README.md CHANGED Viewed

@@ -1,3 +1,3 @@
 <a href="https://gitpod.io/#https://github.com/fonoster/fonoster"> <img src="https://img.shields.io/badge/Contribute%20with-Gitpod-908a85?logo=gitpod" alt="Contribute with Gitpod" />
-This module is part of the [Fonoster](https://fonoster.com) project. By itself, it does not do much. It is intended to be used as a dependency for other modules. For more information about the project, please visit [https://github.com/fonoster/fonoster](https://github.com/fonoster/fonoster)(../apiserver/README.md)
+This module is part of the [Fonoster](https://fonoster.com) project. By itself, it does not do much. It is intended to be used as a dependency for other modules. For more information about the project, please visit [https://github.com/fonoster/fonoster](https://github.com/fonoster/fonoster).

package/dist/Autopilot.d.ts CHANGED Viewed

@@ -1,7 +1,15 @@
 import { AutopilotConfig } from "./types";
 declare class Autopilot {
     private config;
+    private assistant;
+    private actor;
+    private voice;
     constructor(config: AutopilotConfig);
     start(): void;
+    private createActor;
+    private subscribeToActorState;
+    private setupVoiceStream;
+    private handleVoicePayload;
+    private setupSpeechGathering;
 }
 export { Autopilot };

package/dist/Autopilot.js CHANGED Viewed

@@ -24,34 +24,62 @@ const logger_1 = require("@fonoster/logger");
 const xstate_1 = require("xstate");
 const assistants_1 = require("./assistants");
 const machine_1 = require("./machine/machine");
+const vad_1 = require("./vad");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
 class Autopilot {
     constructor(config) {
         this.config = config;
-        this.config = config;
+        this.assistant = (0, assistants_1.makeAssistant)(config.assistantConfig);
+        this.actor = this.createActor();
+        this.voice = config.voice;
     }
     start() {
-        const { voice, assistantConfig, firstMessage } = this.config;
-        const assistant = (0, assistants_1.makeAssistant)(assistantConfig);
-        const actor = (0, xstate_1.createActor)(machine_1.machine, {
-            input: {
-                firstMessage,
-                voice,
-                assistant
-            }
+        this.actor.start();
+        this.setupSpeechGathering();
+        this.setupVoiceStream();
+        this.subscribeToActorState();
+    }
+    createActor() {
+        const { voice } = this.config;
+        const { firstMessage } = this.config.assistantConfig;
+        return (0, xstate_1.createActor)(machine_1.machine, {
+            input: { firstMessage, voice, assistant: this.assistant }
         });
-        actor.start();
-        actor.subscribe((state) => {
+    }
+    subscribeToActorState() {
+        this.actor.subscribe((state) => {
             logger.verbose("actor's new state is", { state: state.value });
         });
-        voice
-            .sgather({
+    }
+    async setupVoiceStream() {
+        const stream = await this.config.voice.stream({
+            direction: common_1.StreamDirection.OUT
+        });
+        const vad = await (0, vad_1.makeVad)();
+        stream.onPayload(this.handleVoicePayload(vad));
+    }
+    handleVoicePayload(vad) {
+        return async (payload) => {
+            try {
+                // TODO: Investigate why we need to cast this to Float32Array
+                const data = payload.data;
+                await vad(data, (event) => {
+                    if (event === "SPEECH_START" || event === "SPEECH_END") {
+                        this.actor.send({ type: event });
+                    }
+                });
+            }
+            catch (err) {
+                logger.error("an error occurred while processing vad", err);
+            }
+        };
+    }
+    async setupSpeechGathering() {
+        const stream = await this.voice.sgather({
             source: common_1.StreamGatherSource.SPEECH
-        })
-            .then((stream) => {
-            stream.onPayload((payload) => {
-                actor.send({ type: "HUMAN_PROMPT", speech: payload.speech });
-            });
+        });
+        stream.onPayload((payload) => {
+            this.actor.send({ type: "HUMAN_PROMPT", speech: payload.speech });
         });
     }
 }

package/dist/assistants/AssistantSchema.d.ts ADDED Viewed

@@ -0,0 +1,25 @@
+import { z } from "zod";
+import { Model } from "./types";
+declare const AssistantSchema: z.ZodObject<{
+    name: z.ZodString;
+    firstMessage: z.ZodString;
+    systemTemplate: z.ZodString;
+    model: z.ZodNativeEnum<typeof Model>;
+    temperature: z.ZodNumber;
+    maxTokens: z.ZodNumber;
+}, "strip", z.ZodTypeAny, {
+    systemTemplate: string;
+    model: Model;
+    temperature: number;
+    maxTokens: number;
+    name: string;
+    firstMessage: string;
+}, {
+    systemTemplate: string;
+    model: Model;
+    temperature: number;
+    maxTokens: number;
+    name: string;
+    firstMessage: string;
+}>;
+export { AssistantSchema };

package/dist/assistants/AssistantSchema.js ADDED Viewed

@@ -0,0 +1,32 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.AssistantSchema = void 0;
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const zod_1 = require("zod");
+const types_1 = require("./types");
+const AssistantSchema = zod_1.z.object({
+    name: zod_1.z.string(),
+    firstMessage: zod_1.z.string(),
+    systemTemplate: zod_1.z.string(),
+    model: zod_1.z.nativeEnum(types_1.Model),
+    temperature: zod_1.z.number(),
+    maxTokens: zod_1.z.number()
+});
+exports.AssistantSchema = AssistantSchema;

package/dist/assistants/index.d.ts CHANGED Viewed

@@ -1,2 +1,4 @@
 export { makeAssistant } from "./assistants";
+export { loadAndValidateAssistant } from "./loadAndValidateAssistant";
+export * from "./AssistantSchema";
 export * from "./types";

package/dist/assistants/index.js CHANGED Viewed

@@ -14,7 +14,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
     for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
 };
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.makeAssistant = void 0;
+exports.loadAndValidateAssistant = exports.makeAssistant = void 0;
 /*
  * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
  * http://github.com/fonoster/fonoster
@@ -35,4 +35,7 @@ exports.makeAssistant = void 0;
  */
 var assistants_1 = require("./assistants");
 Object.defineProperty(exports, "makeAssistant", { enumerable: true, get: function () { return assistants_1.makeAssistant; } });
+var loadAndValidateAssistant_1 = require("./loadAndValidateAssistant");
+Object.defineProperty(exports, "loadAndValidateAssistant", { enumerable: true, get: function () { return loadAndValidateAssistant_1.loadAndValidateAssistant; } });
+__exportStar(require("./AssistantSchema"), exports);
 __exportStar(require("./types"), exports);

package/dist/assistants/loadAndValidateAssistant.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { AssistantFromJson } from "./types";
+declare function loadAndValidateAssistant(path: string): AssistantFromJson;
+export { loadAndValidateAssistant };

package/dist/assistants/loadAndValidateAssistant.js ADDED Viewed

@@ -0,0 +1,46 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.loadAndValidateAssistant = loadAndValidateAssistant;
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const fs_1 = __importDefault(require("fs"));
+const logger_1 = require("@fonoster/logger");
+const AssistantSchema_1 = require("./AssistantSchema");
+const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
+function loadAndValidateAssistant(path) {
+    if (!fs_1.default.existsSync(path)) {
+        logger.error("assistant file not found", { path });
+        process.exit(1);
+    }
+    try {
+        const fileContent = fs_1.default.readFileSync(path, "utf8");
+        const assistant = JSON.parse(fileContent);
+        return AssistantSchema_1.AssistantSchema.parse(assistant);
+    }
+    catch (e) {
+        logger.error("error parsing or validating assistant file", {
+            path,
+            error: e.message
+        });
+        process.exit(1);
+    }
+}

package/dist/assistants/types.d.ts CHANGED Viewed

@@ -3,11 +3,15 @@ declare enum Model {
     GPT_4 = "gpt-4",
     GPT_4O_MINI = "gpt-4o-mini"
 }
-type AssistantConfig = {
-    apiKey: string;
+type AssistantFromJson = {
+    name: string;
+    firstMessage: string;
+    systemTemplate: string;
     model: Model;
     temperature: number;
     maxTokens: number;
-    systemTemplate: string;
 };
-export { AssistantConfig, Model };
+type AssistantConfig = AssistantFromJson & {
+    apiKey: string;
+};
+export { AssistantFromJson, AssistantConfig, Model };

package/dist/machine/machine.d.ts CHANGED Viewed

@@ -1,9 +1,17 @@
-export declare const machine: import("xstate").StateMachine<{
+declare const machine: import("xstate").StateMachine<{
     firstMessage: string;
     voice: import("@fonoster/voice").VoiceResponse;
     assistant: import("../assistants/assistants").Assistant;
+    playbackRef: string;
+    speechBuffer: string;
+    speechResponseStartTime: number;
+    speechResponseTime: number;
 }, {
-    type: "VOICE_DETECTED";
+    type: "SPEECH_START";
+} | {
+    type: "SPEECH_END";
+} | {
+    type: "SESSION_END";
 } | {
     type: "HUMAN_PROMPT";
     speech: string;
@@ -12,8 +20,12 @@ export declare const machine: import("xstate").StateMachine<{
         type: "sendGreeting";
         params: unknown;
     };
-    interruptAISpeaking: {
-        type: "interruptAISpeaking";
+    interruptMachineSpeaking: {
+        type: "interruptMachineSpeaking";
+        params: unknown;
+    };
+    appendSpeech: {
+        type: "appendSpeech";
         params: unknown;
     };
     processHumanRequest: {
@@ -24,7 +36,10 @@ export declare const machine: import("xstate").StateMachine<{
         type: "hangup";
         params: unknown;
     };
-}>, never, never, "hangup" | "active" | "welcome" | "humanSpeaking", string, {
+}>, {
+    type: "hasSpeechBuffer";
+    params: unknown;
+}, never, "hangup" | "welcome" | "machineListening" | "humanSpeaking", string, {
     firstMessage: string;
     voice: import("@fonoster/voice").VoiceResponse;
     assistant: import("../assistants/assistants").Assistant;
@@ -48,13 +63,25 @@ export declare const machine: import("xstate").StateMachine<{
             firstMessage: string;
             voice: import("@fonoster/voice").VoiceResponse;
             assistant: import("../assistants/assistants").Assistant;
+            playbackRef: string;
+            speechBuffer: string;
+            speechResponseStartTime: number;
+            speechResponseTime: number;
         }, {
-            type: "VOICE_DETECTED";
+            type: "SPEECH_START";
+        } | {
+            type: "SPEECH_END";
+        } | {
+            type: "SESSION_END";
         } | {
             type: "HUMAN_PROMPT";
             speech: string;
         }, Record<string, import("xstate").AnyActorRef | undefined>, import("xstate").StateValue, string, unknown, any, any>, {
-            type: "VOICE_DETECTED";
+            type: "SPEECH_START";
+        } | {
+            type: "SPEECH_END";
+        } | {
+            type: "SESSION_END";
         } | {
             type: "HUMAN_PROMPT";
             speech: string;
@@ -63,46 +90,74 @@ export declare const machine: import("xstate").StateMachine<{
         firstMessage: string;
         voice: import("@fonoster/voice").VoiceResponse;
         assistant: import("@langchain/core/runnables").Runnable<any, string, import("@langchain/core/runnables").RunnableConfig>;
+        playbackRef: string;
+        speechBuffer: string;
+        speechResponseStartTime: number;
+        speechResponseTime: number;
     };
     readonly id: "fnAI";
     readonly initial: "welcome";
     readonly states: {
         readonly welcome: {
-            readonly always: {
-                readonly target: "active";
-            };
             readonly entry: {
                 readonly type: "sendGreeting";
             };
+            readonly always: {
+                readonly target: "machineListening";
+            };
             readonly description: "The initial state where the AI greets the Human.";
         };
-        readonly active: {
+        readonly machineListening: {
             readonly on: {
+                readonly SPEECH_START: {
+                    readonly target: "humanSpeaking";
+                    readonly description: "This must be triggered by a VAD or similar system.";
+                };
                 readonly HUMAN_PROMPT: {
-                    readonly target: "active";
                     readonly actions: {
-                        readonly type: "processHumanRequest";
+                        readonly type: "appendSpeech";
                     };
-                    readonly description: "This must be triggered when speech to text ends.";
+                    readonly description: "Appends the speech to the buffer.";
                 };
             };
-            readonly description: "The state where the AI is actively engaged in conversation.";
+            readonly description: "The state where the AI is actively listening in conversation.";
         };
         readonly humanSpeaking: {
-            readonly always: {
-                readonly target: "active";
-            };
             readonly entry: {
-                readonly type: "interruptAISpeaking";
+                readonly type: "interruptMachineSpeaking";
+            };
+            readonly on: {
+                readonly HUMAN_PROMPT: {
+                    readonly actions: {
+                        readonly type: "appendSpeech";
+                    };
+                    readonly description: "Appends the speech to the buffer.";
+                };
+                readonly SPEECH_END: {
+                    readonly target: "machineListening";
+                    readonly actions: {
+                        readonly type: "processHumanRequest";
+                    };
+                    readonly guard: {
+                        readonly type: "hasSpeechBuffer";
+                    };
+                    readonly description: "This must be triggered by a VAD or similar system.";
+                };
             };
             readonly description: "The state where the AI detects Human speech while it is speaking.";
         };
         readonly hangup: {
             readonly type: "final";
-            readonly description: "The final state where the AI terminates the conversation due to inactivity.";
             readonly entry: {
                 readonly type: "hangup";
             };
+            readonly on: {
+                readonly SESSION_END: {
+                    readonly target: "hangup";
+                };
+            };
+            readonly description: "The final state where the AI terminates the conversation due to inactivity.";
         };
     };
 }>;
+export { machine };

package/dist/machine/machine.js CHANGED Viewed

@@ -1,13 +1,4 @@
 "use strict";
-var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
-    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
-    return new (P || (P = Promise))(function (resolve, reject) {
-        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
-        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
-        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
-        step((generator = generator.apply(thisArg, _arguments || [])).next());
-    });
-};
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.machine = void 0;
 /*
@@ -28,84 +19,124 @@ exports.machine = void 0;
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
+const common_1 = require("@fonoster/common");
 const logger_1 = require("@fonoster/logger");
+const uuid_1 = require("uuid");
 const xstate_1 = require("xstate");
 const types_1 = require("./types");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
-exports.machine = (0, xstate_1.setup)({
+const machine = (0, xstate_1.setup)({
     types: types_1.types,
     actions: {
-        sendGreeting: function (_a) {
-            return __awaiter(this, arguments, void 0, function* ({ context }) {
-                yield context.voice.answer();
-                yield context.voice.say(context.firstMessage);
+        sendGreeting: async function ({ context }) {
+            await context.voice.answer();
+            await context.voice.say(context.firstMessage, {
+                playbackRef: context.playbackRef
             });
         },
-        interruptAISpeaking: function () {
-            logger.verbose("interruptAISpeaking");
-        },
-        processHumanRequest: function (_a) {
-            return __awaiter(this, arguments, void 0, function* ({ context, event }) {
-                const speech = event.speech;
-                logger.verbose("human request", { speech });
-                const response = yield context.assistant.invoke({
-                    text: speech
-                });
-                logger.verbose("assistant response", { response });
-                yield context.voice.say(response);
+        interruptMachineSpeaking: async function ({ context }) {
+            logger.verbose("interrupting the machine", {
+                playbackRef: context.playbackRef
             });
+            await context.voice.playbackControl(context.playbackRef, common_1.PlaybackControlAction.STOP);
+        },
+        appendSpeech: function ({ context, event }) {
+            const speech = event.speech;
+            context.speechBuffer = (context.speechBuffer || "") + " " + speech;
+            context.speechResponseStartTime = Date.now();
+            logger.verbose("appended speech", { speechBuffer: context.speechBuffer });
         },
-        hangup: function (_a) {
-            return __awaiter(this, arguments, void 0, function* ({ context }) {
-                yield context.voice.hangup();
+        processHumanRequest: async function ({ context }) {
+            const speech = context.speechBuffer.trim();
+            logger.verbose("processing human request", { speech });
+            const response = await context.assistant.invoke({
+                text: speech
+            });
+            const speechResponseTime = Date.now() - context.speechResponseStartTime;
+            context.speechResponseTime = speechResponseTime;
+            logger.verbose("assistant response", {
+                response,
+                responseTime: speechResponseTime
             });
+            await context.voice.say(response, { playbackRef: context.playbackRef });
+            // Clear the speech buffer and reset response timing
+            context.speechBuffer = "";
+            context.speechResponseStartTime = 0;
+        },
+        hangup: async function ({ context }) {
+            await context.voice.hangup();
+        }
+    },
+    guards: {
+        hasSpeechBuffer: function ({ context }) {
+            return context.speechBuffer?.trim().length > 0;
         }
     }
 }).createMachine({
     context: ({ input }) => ({
         firstMessage: input.firstMessage,
         voice: input.voice,
-        assistant: input.assistant
+        assistant: input.assistant,
+        playbackRef: (0, uuid_1.v4)(),
+        speechBuffer: "",
+        speechResponseStartTime: 0,
+        speechResponseTime: 0
     }),
     id: "fnAI",
     initial: "welcome",
     states: {
         welcome: {
-            always: {
-                target: "active"
-            },
             entry: {
                 type: "sendGreeting"
             },
+            always: {
+                target: "machineListening"
+            },
             description: "The initial state where the AI greets the Human."
         },
-        active: {
+        machineListening: {
             on: {
+                SPEECH_START: {
+                    target: "humanSpeaking",
+                    description: "This must be triggered by a VAD or similar system."
+                },
                 HUMAN_PROMPT: {
-                    target: "active",
-                    actions: {
-                        type: "processHumanRequest"
-                    },
-                    description: "This must be triggered when speech to text ends."
+                    actions: { type: "appendSpeech" },
+                    description: "Appends the speech to the buffer."
                 }
             },
-            description: "The state where the AI is actively engaged in conversation."
+            description: "The state where the AI is actively listening in conversation."
         },
         humanSpeaking: {
-            always: {
-                target: "active"
-            },
             entry: {
-                type: "interruptAISpeaking"
+                type: "interruptMachineSpeaking"
+            },
+            on: {
+                HUMAN_PROMPT: {
+                    actions: { type: "appendSpeech" },
+                    description: "Appends the speech to the buffer."
+                },
+                SPEECH_END: {
+                    target: "machineListening",
+                    actions: { type: "processHumanRequest" },
+                    guard: { type: "hasSpeechBuffer" },
+                    description: "This must be triggered by a VAD or similar system."
+                }
             },
             description: "The state where the AI detects Human speech while it is speaking."
         },
         hangup: {
             type: "final",
-            description: "The final state where the AI terminates the conversation due to inactivity.",
             entry: {
                 type: "hangup"
-            }
+            },
+            on: {
+                SESSION_END: {
+                    target: "hangup"
+                }
+            },
+            description: "The final state where the AI terminates the conversation due to inactivity."
         }
     }
 });
+exports.machine = machine;

package/dist/machine/types.d.ts CHANGED Viewed

@@ -5,6 +5,10 @@ declare const types: {
         firstMessage: string;
         voice: VoiceResponse;
         assistant: Assistant;
+        playbackRef: string;
+        speechBuffer: string;
+        speechResponseStartTime: number;
+        speechResponseTime: number;
     };
     input: {
         firstMessage: string;
@@ -12,7 +16,11 @@ declare const types: {
         assistant: Assistant;
     };
     events: {
-        type: "VOICE_DETECTED";
+        type: "SPEECH_START";
+    } | {
+        type: "SPEECH_END";
+    } | {
+        type: "SESSION_END";
     } | {
         type: "HUMAN_PROMPT";
         speech: string;

package/dist/runner.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ #!/usr/bin/env node
2	+ export {};

package/dist/{demo.js → runner.js} RENAMED Viewed

@@ -1,13 +1,5 @@
+#!/usr/bin/env node
 "use strict";
-var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
-    function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
-    return new (P || (P = Promise))(function (resolve, reject) {
-        function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
-        function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
-        function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
-        step((generator = generator.apply(thisArg, _arguments || [])).next());
-    });
-};
 var __importDefault = (this && this.__importDefault) || function (mod) {
     return (mod && mod.__esModule) ? mod : { "default": mod };
 };
@@ -32,26 +24,23 @@ Object.defineProperty(exports, "__esModule", { value: true });
  */
 const logger_1 = require("@fonoster/logger");
 const voice_1 = __importDefault(require("@fonoster/voice"));
-const examples_1 = require("./assistants/examples");
-const types_1 = require("./assistants/types");
+const assistants_1 = require("./assistants");
 const Autopilot_1 = require("./Autopilot");
 const envs_1 = require("./envs");
 const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
-// Only skip identity for local development
-new voice_1.default({ skipIdentity: true }).listen((req, voice) => __awaiter(void 0, void 0, void 0, function* () {
+const skipIdentity = process.env.NODE_ENV === "dev";
+new voice_1.default({ skipIdentity }).listen(async (req, voice) => {
     const { ingressNumber, sessionRef, appRef } = req;
     logger.verbose("voice request", { ingressNumber, sessionRef, appRef });
-    const { firstMessage, template: systemTemplate } = examples_1.OLIVIA_AI_PHONE_ASSISTANT;
+    const assistantPath = process.argv[2];
+    const assistant = (0, assistants_1.loadAndValidateAssistant)(assistantPath);
+    logger.verbose("interacting with assistant", { name: assistant.name });
     const autopilot = new Autopilot_1.Autopilot({
         voice,
-        firstMessage,
         assistantConfig: {
-            apiKey: envs_1.OPENAI_API_KEY,
-            model: types_1.Model.GPT_4O_MINI,
-            temperature: 0.9,
-            maxTokens: 100,
-            systemTemplate
+            ...assistant,
+            apiKey: envs_1.OPENAI_API_KEY
         }
     });
     autopilot.start();
-}));
+});

package/dist/types.d.ts CHANGED Viewed

@@ -1,14 +1,7 @@
 import { VoiceResponse } from "@fonoster/voice";
-import { Model } from "./assistants/types";
+import { AssistantConfig } from "./assistants/types";
 type AutopilotConfig = {
     voice: VoiceResponse;
-    firstMessage: string;
-    assistantConfig: {
-        apiKey: string;
-        model: Model;
-        temperature: number;
-        maxTokens: number;
-        systemTemplate: string;
-    };
+    assistantConfig: AssistantConfig;
 };
 export { AutopilotConfig };

package/dist/vad/SileroVadModel.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { ONNXRuntimeAPI, SpeechProbabilities } from "./types";
+declare class SileroVadModel {
+    private ort;
+    private pathToModel;
+    _session: any;
+    _h: unknown;
+    _c: unknown;
+    _sr: unknown;
+    constructor(ort: ONNXRuntimeAPI, pathToModel: string);
+    static new: (ort: ONNXRuntimeAPI, pathToModel: string) => Promise<SileroVadModel>;
+    init(): Promise<void>;
+    process(audioFrame: Float32Array): Promise<SpeechProbabilities>;
+    resetState(): void;
+}
+export { SileroVadModel };

package/dist/vad/SileroVadModel.js ADDED Viewed

@@ -0,0 +1,65 @@
+"use strict";
+var _a;
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.SileroVadModel = void 0;
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const fs_1 = require("fs");
+class SileroVadModel {
+    constructor(ort, pathToModel) {
+        this.ort = ort;
+        this.pathToModel = pathToModel;
+    }
+    async init() {
+        const modelArrayBuffer = (0, fs_1.readFileSync)(this.pathToModel).buffer;
+        this._session = await this.ort.InferenceSession.create(modelArrayBuffer);
+        this._sr = new this.ort.Tensor("int64", [16000n]);
+        this.resetState();
+    }
+    async process(audioFrame) {
+        const t = new this.ort.Tensor("float32", audioFrame, [
+            1,
+            audioFrame.length
+        ]);
+        const inputs = {
+            input: t,
+            h: this._h,
+            c: this._c,
+            sr: this._sr
+        };
+        const out = await this._session.run(inputs);
+        this._h = out.hn;
+        this._c = out.cn;
+        const [isSpeech] = out.output.data;
+        const notSpeech = 1 - isSpeech;
+        return { notSpeech, isSpeech };
+    }
+    resetState() {
+        const zeroes = Array(2 * 64).fill(0);
+        this._h = new this.ort.Tensor("float32", zeroes, [2, 1, 64]);
+        this._c = new this.ort.Tensor("float32", zeroes, [2, 1, 64]);
+    }
+}
+exports.SileroVadModel = SileroVadModel;
+_a = SileroVadModel;
+SileroVadModel.new = async (ort, pathToModel) => {
+    const model = new _a(ort, pathToModel);
+    await model.init();
+    return model;
+};

package/dist/vad/chunkToFloat32Array.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ declare function chunkToFloat32Array(chunk: Float32Array): Float32Array;
2	+ export { chunkToFloat32Array };

package/dist/vad/chunkToFloat32Array.js ADDED Viewed

@@ -0,0 +1,25 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.chunkToFloat32Array = chunkToFloat32Array;
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+function chunkToFloat32Array(chunk) {
+    const int16Array = new Int16Array(chunk.buffer, chunk.byteOffset, chunk.byteLength / Int16Array.BYTES_PER_ELEMENT);
+    return new Float32Array(Array.from(int16Array, (sample) => sample / 32768.0));
+}

package/dist/vad/index.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+export * from "./SileroVadModel";
+export * from "./types";
+export * from "./makeVad";

package/dist/vad/index.js ADDED Viewed

@@ -0,0 +1,38 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __exportStar = (this && this.__exportStar) || function(m, exports) {
+    for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/* eslint-disable no-loops/no-loops */
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+__exportStar(require("./SileroVadModel"), exports);
+__exportStar(require("./types"), exports);
+__exportStar(require("./makeVad"), exports);

package/dist/vad/makeVad.d.ts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ declare function makeVad(pathToModel?: string): Promise<(chunk: Float32Array, callback: (event: "SPEECH_START" \| "SPEECH_END", data: Record<string, unknown>) => void) => Promise<void>>;
2	+ export { makeVad };

package/dist/vad/makeVad.js ADDED Viewed

@@ -0,0 +1,83 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
+    Object.defineProperty(o, "default", { enumerable: true, value: v });
+}) : function(o, v) {
+    o["default"] = v;
+});
+var __importStar = (this && this.__importStar) || function (mod) {
+    if (mod && mod.__esModule) return mod;
+    var result = {};
+    if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
+    __setModuleDefault(result, mod);
+    return result;
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.makeVad = makeVad;
+/* eslint-disable no-loops/no-loops */
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const path_1 = require("path");
+const ort = __importStar(require("onnxruntime-node"));
+const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
+const SileroVadModel_1 = require("./SileroVadModel");
+const BUFFER_SIZE = 16000;
+async function makeVad(pathToModel) {
+    const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
+    const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
+    let audioBuffer = [];
+    let isSpeechActive = false;
+    return async function process(chunk, callback) {
+        const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
+        audioBuffer.push(...float32Array);
+        const processBuffer = async (buffer) => {
+            if (buffer.length < BUFFER_SIZE)
+                return buffer;
+            const audioFrame = buffer.slice(0, BUFFER_SIZE);
+            const remainingBuffer = buffer.slice(BUFFER_SIZE);
+            const result = await silero.process(new Float32Array(audioFrame));
+            if (result.isSpeech > 0.5) {
+                if (!isSpeechActive) {
+                    isSpeechActive = true;
+                    callback("SPEECH_START", {});
+                    return processBuffer(remainingBuffer);
+                }
+            }
+            else {
+                if (isSpeechActive) {
+                    isSpeechActive = false;
+                    callback("SPEECH_END", {});
+                    return processBuffer(remainingBuffer);
+                }
+            }
+            return processBuffer(remainingBuffer);
+        };
+        audioBuffer = await processBuffer(audioBuffer);
+    };
+}

package/dist/vad/micVadTest.js ADDED Viewed

@@ -0,0 +1,48 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+/*
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
+ * http://github.com/fonoster/fonoster
+ *
+ * This file is part of Fonoster
+ *
+ * Licensed under the MIT License (the "License");
+ * you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *    https://opensource.org/licenses/MIT
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+const logger_1 = require("@fonoster/logger");
+const node_record_lpcm16_1 = __importDefault(require("node-record-lpcm16"));
+const makeVad_1 = require("./makeVad");
+const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
+async function main() {
+    const vad = await (0, makeVad_1.makeVad)();
+    // Start recording from the default microphone
+    const mic = node_record_lpcm16_1.default
+        .record({
+        sampleRate: 16000, // 16 kHz sample rate
+        channels: 1,
+        threshold: 0.5
+    })
+        .stream();
+    mic.on("data", async (data) => {
+        const chunk = new Float32Array(data.buffer);
+        await vad(chunk, (event, _data) => {
+            logger.info("vad event:", { event, data: _data });
+        });
+    });
+    mic.on("error", (err) => {
+        logger.error("an error occurred:", { err });
+    });
+}
+main().catch(logger.error);

package/dist/vad/types.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+type Vad = (chunk: Float32Array, cb: (event: string) => void) => Promise<void>;
+type SpeechProbabilities = {
+    notSpeech: number;
+    isSpeech: number;
+};
+type ONNXRuntimeAPI = {
+    InferenceSession: {
+        create(modelArrayBuffer: ArrayBuffer): Promise<unknown>;
+    };
+    Tensor: {
+        new (type: "int64", dims: [16000n]): unknown;
+        new (type: "float32", data: number[], dims: [2, 1, 64]): unknown;
+        new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
+        new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
+    };
+};
+export { SpeechProbabilities, ONNXRuntimeAPI, Vad };

package/dist/vad/types.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@fonoster/autopilot",
-  "version": "0.7.0",
+  "version": "0.7.3",
   "description": "Voice AI for the Fonoster platform",
   "author": "Pedro Sanders <psanders@fonoster.com>",
   "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -16,8 +16,12 @@
     "build": "tsc -b tsconfig.json",
     "clean": "rimraf ./dist node_modules tsconfig.tsbuildinfo"
   },
+  "bin": {
+    "autopilot": "./dist/runner.js"
+  },
   "files": [
-    "dist"
+    "dist",
+    "silero_vad.onnx"
   ],
   "publishConfig": {
     "access": "public"
@@ -30,14 +34,17 @@
     "url": "https://github.com/fonoster/fonoster/issues"
   },
   "dependencies": {
-    "@fonoster/logger": "^0.7.0",
-    "@fonoster/voice": "^0.7.0",
+    "@fonoster/logger": "^0.7.2",
+    "@fonoster/voice": "^0.7.3",
     "@langchain/openai": "^0.2.7",
     "dotenv": "^16.4.5",
-    "xstate": "^5.17.3"
+    "onnxruntime-node": "^1.19.0",
+    "xstate": "^5.17.3",
+    "zod": "^3.23.8"
   },
   "devDependencies": {
+    "node-record-lpcm16": "^1.0.1",
     "typescript": "^5.5.4"
   },
-  "gitHead": "afa950ace3b1d022b6dc8be0c3b87a6b8a5ba3c5"
+  "gitHead": "97aa6649691819fe8a704b96bc62b1b142162393"
 }

package/silero_vad.onnx ADDED Viewed

Binary file

package/dist/assistants/examples.d.ts DELETED Viewed

@@ -1,10 +0,0 @@
-export declare const RESTAURANT_PHONE_ASSISTANT: {
-    name: string;
-    firstMessage: string;
-    template: string;
-};
-export declare const OLIVIA_AI_PHONE_ASSISTANT: {
-    name: string;
-    firstMessage: string;
-    template: string;
-};

package/dist/assistants/examples.js DELETED Viewed

@@ -1,96 +0,0 @@
-"use strict";
-Object.defineProperty(exports, "__esModule", { value: true });
-exports.OLIVIA_AI_PHONE_ASSISTANT = exports.RESTAURANT_PHONE_ASSISTANT = void 0;
-/*
- * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
- * http://github.com/fonoster/fonoster
- *
- * This file is part of Fonoster
- *
- * Licensed under the MIT License (the "License");
- * you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- *    https://opensource.org/licenses/MIT
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-exports.RESTAURANT_PHONE_ASSISTANT = {
-    name: "Restaurant Phone Assistant",
-    firstMessage: "Hello, I'm Martha the AI assistant from Restaurant La Casa. How can I help you today?",
-    template: `
-  ## La casa AI Phone Assistant (Martha)
-  ### Mission Statement
-  To assist users in navigating our restaurant's offerings. This includes providing information on the menu,
-  handling reservations, and updating on special events.
-  ### Interaction Modes
-  - **Age Restriction:** None; suitable for all ages.
-  - **Meal Options:** Brunch, Lunch, Dinner.
-  - **Special Notes:** Users should specify any dietary restrictions or preferences.
-  ### Available Links
-  Since you are a AI Phone assistant, you can't browse the web. However, you can send links to the user's phone via SMS.
-  Here are some useful links for Restaurant La Casa:
-  - [Menu Information](https://www.lacasarestaurant.com/menu)
-  - [Make a Reservation](https://www.lacasarestaurant.com/reservations)
-  - [Special Events Details](https://www.lacasarestaurant.com/events)
-  ### Hours of Operation
-  - **Brunch:** 9:00 AM - 12:00 PM
-  - **Lunch:** 12:00 PM - 3:00 PM
-  - **Dinner:** 5:00 PM - 10:00 PM
-  ### Special Instructions
-  Provide accurate and timely responses to user inquiries. Stay on brand and maintain a friendly and professional tone.
-  Only provide information that is relevant to the user's request. If the user asks for something that is not within the scope of the system,
-  politely inform them that you are unable to assist with that request.
-  In case of any medical emergency instruct the user to call 911 immediately.
-  Make sure all response are readable by a TTS engine. For example, when reading the hours of operation, say "Brunch is served from 9:00 AM to 12:00 PM.".
-  Similarly, when providing links, say "I have sent you a link to the menu."
-`
-};
-exports.OLIVIA_AI_PHONE_ASSISTANT = {
-    name: "Olivia AI Phone Assistant",
-    firstMessage: "Hello, I'm Olivia your friendly AI. What would you like to chat about today?",
-    template: `
-    ## Olivia the friendly AI
-    ### Mission Statement
-    Olivia is designed to help users by researching topics, bringing news updates, and telling engaging stories. Our goal is to provide accurate information and captivating narratives in a timely manner.
-    ### Available Links
-    As an AI, Olivia can browse the web and retrieve information in real-time. Here are some resources that Olivia may use to enhance the user experience:
-    - [Latest News](https://www.news.com)
-    - [Popular Topics](https://www.populartopics.com)
-    - [Story Archive](https://www.storyarchive.com)
-    ### Special Instructions
-    Provide accurate and up-to-date information on requested topics. Maintain a friendly, engaging, and creative tone. Ensure that all responses are clear and easy to understand. If a topic or request is beyond Olivia's capabilities, politely inform the user and suggest alternative sources if possible.
-    In case of urgent or critical news, inform users to consult trusted news sources immediately.
-    Make sure all responses are concise and engaging. For instance, when delivering news updates, say "Here’s the latest update on the topic." When telling stories, ensure the narrative is compelling and well-structured.
-    When telling story, begin with conversation starters like "Sure, I have a great story for you!" or "Let me tell you a fascinating tale!".
-    When you finish a story, or news summary, end with a conversation prompt like "What do you think?" or "Would you like to hear more?".
-`
-};

/package/dist/{demo.d.ts → vad/micVadTest.d.ts} RENAMED Viewed

File without changes