@fonoster/autopilot 0.7.19 → 0.7.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -88,7 +88,7 @@ The Autopilot supports multiple language model providers. The following is a lis
88
88
 
89
89
  | Provider | Description | Supported models
90
90
  |------------|------------------------------------------------------------|------------------------------------------------------------------------------|
91
- | OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-4o-mini` |
91
+ | OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-4o-mini`, `gpt-3.5-turbo`, `gpt-4-turbo` |
92
92
  | Groq | Groq offers high-performance AI models optimized for speed | `gemm-7b-it`, `llama3-groq-70b-8192-tool-use-preview`, `llama3-1-8b-instant` |
93
93
  | Ollama | Self-hosted Ollama models | `lama3.1` |
94
94
 
@@ -2,6 +2,7 @@ import { AutopilotParams } from "./types";
2
2
  declare class Autopilot {
3
3
  private params;
4
4
  private actor;
5
+ private vadWorker;
5
6
  constructor(params: AutopilotParams);
6
7
  start(): void;
7
8
  stop(): void;
package/dist/Autopilot.js CHANGED
@@ -1,4 +1,7 @@
1
1
  "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
2
5
  Object.defineProperty(exports, "__esModule", { value: true });
3
6
  exports.Autopilot = void 0;
4
7
  /*
@@ -19,6 +22,8 @@ exports.Autopilot = void 0;
19
22
  * See the License for the specific language governing permissions and
20
23
  * limitations under the License.
21
24
  */
25
+ const path_1 = __importDefault(require("path"));
26
+ const worker_threads_1 = require("worker_threads");
22
27
  const logger_1 = require("@fonoster/logger");
23
28
  const xstate_1 = require("xstate");
24
29
  const machine_1 = require("./machine/machine");
@@ -27,6 +32,10 @@ class Autopilot {
27
32
  constructor(params) {
28
33
  this.params = params;
29
34
  const { voice, languageModel, conversationSettings } = this.params;
35
+ const vadWorkerPath = path_1.default.resolve(__dirname, "../dist", "./vadWorker");
36
+ this.vadWorker = new worker_threads_1.Worker(vadWorkerPath, {
37
+ workerData: conversationSettings.vad
38
+ });
30
39
  this.actor = (0, xstate_1.createActor)(machine_1.machine, {
31
40
  input: {
32
41
  conversationSettings,
@@ -42,30 +51,36 @@ class Autopilot {
42
51
  });
43
52
  this.setupVoiceStream();
44
53
  this.setupSpeechGathering();
54
+ this.vadWorker.on("error", (err) => {
55
+ logger.error("vad worker error", err);
56
+ });
57
+ this.vadWorker.on("exit", (code) => {
58
+ if (code !== 0) {
59
+ logger.error("vad worker stopped with exit code", { code });
60
+ }
61
+ });
45
62
  }
46
63
  stop() {
47
64
  logger.verbose("stopping autopilot");
48
65
  this.actor.stop();
66
+ this.vadWorker.terminate();
49
67
  }
50
68
  async setupVoiceStream() {
51
- const { voice, vad } = this.params;
69
+ const { voice } = this.params;
52
70
  const stream = await voice.stream();
53
- stream.onData(this.handleVoicePayload(vad));
71
+ stream.onData(this.handleVoicePayload.bind(this));
72
+ this.vadWorker.on("message", (event) => {
73
+ logger.verbose("received speech event from vad", { event });
74
+ this.actor.send({ type: event });
75
+ });
54
76
  }
55
- handleVoicePayload(vad) {
56
- return (chunk) => {
57
- try {
58
- vad.processChunk(chunk, (event) => {
59
- if (["SPEECH_START", "SPEECH_END"].includes(event)) {
60
- logger.verbose("received speech event", { event });
61
- this.actor.send({ type: event });
62
- }
63
- });
64
- }
65
- catch (err) {
66
- logger.error("an error occurred while processing vad", err);
67
- }
68
- };
77
+ handleVoicePayload(chunk) {
78
+ try {
79
+ this.vadWorker.postMessage(chunk);
80
+ }
81
+ catch (err) {
82
+ logger.error("an error occurred while processing vad", err);
83
+ }
69
84
  }
70
85
  async setupSpeechGathering() {
71
86
  const { voice } = this.params;
@@ -6,6 +6,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
6
6
  goodbyeMessage: z.ZodString;
7
7
  systemErrorMessage: z.ZodString;
8
8
  initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
9
+ maxSpeechWaitTimeout: z.ZodNumber;
9
10
  transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
10
11
  phoneNumber: z.ZodString;
11
12
  message: z.ZodString;
@@ -53,6 +54,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
53
54
  systemTemplate: string;
54
55
  goodbyeMessage: string;
55
56
  systemErrorMessage: string;
57
+ maxSpeechWaitTimeout: number;
56
58
  vad: {
57
59
  activationThreshold: number;
58
60
  deactivationThreshold: number;
@@ -75,6 +77,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
75
77
  systemTemplate: string;
76
78
  goodbyeMessage: string;
77
79
  systemErrorMessage: string;
80
+ maxSpeechWaitTimeout: number;
78
81
  vad: {
79
82
  activationThreshold: number;
80
83
  deactivationThreshold: number;
@@ -295,6 +298,7 @@ declare const assistantSchema: z.ZodObject<{
295
298
  goodbyeMessage: z.ZodString;
296
299
  systemErrorMessage: z.ZodString;
297
300
  initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
301
+ maxSpeechWaitTimeout: z.ZodNumber;
298
302
  transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
299
303
  phoneNumber: z.ZodString;
300
304
  message: z.ZodString;
@@ -342,6 +346,7 @@ declare const assistantSchema: z.ZodObject<{
342
346
  systemTemplate: string;
343
347
  goodbyeMessage: string;
344
348
  systemErrorMessage: string;
349
+ maxSpeechWaitTimeout: number;
345
350
  vad: {
346
351
  activationThreshold: number;
347
352
  deactivationThreshold: number;
@@ -364,6 +369,7 @@ declare const assistantSchema: z.ZodObject<{
364
369
  systemTemplate: string;
365
370
  goodbyeMessage: string;
366
371
  systemErrorMessage: string;
372
+ maxSpeechWaitTimeout: number;
367
373
  vad: {
368
374
  activationThreshold: number;
369
375
  deactivationThreshold: number;
@@ -583,6 +589,7 @@ declare const assistantSchema: z.ZodObject<{
583
589
  systemTemplate: string;
584
590
  goodbyeMessage: string;
585
591
  systemErrorMessage: string;
592
+ maxSpeechWaitTimeout: number;
586
593
  vad: {
587
594
  activationThreshold: number;
588
595
  deactivationThreshold: number;
@@ -640,6 +647,7 @@ declare const assistantSchema: z.ZodObject<{
640
647
  systemTemplate: string;
641
648
  goodbyeMessage: string;
642
649
  systemErrorMessage: string;
650
+ maxSpeechWaitTimeout: number;
643
651
  vad: {
644
652
  activationThreshold: number;
645
653
  deactivationThreshold: number;
@@ -28,6 +28,7 @@ const conversationSettingsSchema = zod_1.z.object({
28
28
  goodbyeMessage: zod_1.z.string(),
29
29
  systemErrorMessage: zod_1.z.string(),
30
30
  initialDtmf: zod_1.z.string().optional().nullable(),
31
+ maxSpeechWaitTimeout: zod_1.z.number(),
31
32
  transferOptions: zod_1.z
32
33
  .object({
33
34
  phoneNumber: zod_1.z.string(),
@@ -60,7 +60,6 @@ async function handleVoiceRequest(req, res) {
60
60
  const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
61
61
  const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
62
62
  const voice = new _1.VoiceImpl(sessionRef, res);
63
- const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
64
63
  const languageModel = (0, createLanguageModel_1.createLanguageModel)({
65
64
  voice,
66
65
  assistantConfig,
@@ -74,7 +73,6 @@ async function handleVoiceRequest(req, res) {
74
73
  const autopilot = new _1.default({
75
74
  conversationSettings: assistantConfig.conversationSettings,
76
75
  voice,
77
- vad,
78
76
  languageModel
79
77
  });
80
78
  autopilot.start();
@@ -79,7 +79,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
79
79
  type: "isNotSpeaking";
80
80
  params: unknown;
81
81
  };
82
- }>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
82
+ }>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
83
83
  conversationSettings: ConversationSettings;
84
84
  languageModel: LanguageModel;
85
85
  voice: Voice;
@@ -157,6 +157,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
157
157
  idleTimeout: number;
158
158
  maxIdleTimeoutCount: number;
159
159
  idleTimeoutCount: number;
160
+ maxSpeechWaitTimeout: number;
160
161
  speechResponseStartTime: number;
161
162
  speechResponseTime: number;
162
163
  isSpeaking: false;
@@ -178,6 +179,10 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
178
179
  readonly target: "waitingForUserRequest";
179
180
  readonly description: "Event from VAD system.";
180
181
  };
182
+ readonly SPEECH_RESULT: {
183
+ readonly target: "waitingForUserRequest";
184
+ readonly description: "Event from Speech to Text provider.";
185
+ };
181
186
  };
182
187
  readonly after: {
183
188
  readonly IDLE_TIMEOUT: readonly [{
@@ -253,6 +258,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
253
258
  };
254
259
  }];
255
260
  };
261
+ readonly after: {
262
+ readonly MAX_SPEECH_WAIT_TIMEOUT: {
263
+ readonly target: "processingUserRequest";
264
+ };
265
+ };
256
266
  };
257
267
  readonly processingUserRequest: {
258
268
  readonly on: {
@@ -124,6 +124,9 @@ const machine = (0, xstate_1.setup)({
124
124
  delays: {
125
125
  IDLE_TIMEOUT: ({ context }) => {
126
126
  return context.idleTimeout;
127
+ },
128
+ MAX_SPEECH_WAIT_TIMEOUT: ({ context }) => {
129
+ return context.maxSpeechWaitTimeout;
127
130
  }
128
131
  },
129
132
  actors: {
@@ -191,6 +194,7 @@ const machine = (0, xstate_1.setup)({
191
194
  idleTimeout: input.conversationSettings.idleOptions?.timeout || 10000,
192
195
  maxIdleTimeoutCount: input.conversationSettings.idleOptions?.maxTimeoutCount || 3,
193
196
  idleTimeoutCount: 0,
197
+ maxSpeechWaitTimeout: input.conversationSettings.maxSpeechWaitTimeout,
194
198
  speechResponseStartTime: 0,
195
199
  speechResponseTime: 0,
196
200
  isSpeaking: false
@@ -211,6 +215,10 @@ const machine = (0, xstate_1.setup)({
211
215
  SPEECH_START: {
212
216
  target: "waitingForUserRequest",
213
217
  description: "Event from VAD system."
218
+ },
219
+ SPEECH_RESULT: {
220
+ target: "waitingForUserRequest",
221
+ description: "Event from Speech to Text provider."
214
222
  }
215
223
  },
216
224
  after: {
@@ -303,6 +311,11 @@ const machine = (0, xstate_1.setup)({
303
311
  }
304
312
  }
305
313
  ]
314
+ },
315
+ after: {
316
+ MAX_SPEECH_WAIT_TIMEOUT: {
317
+ target: "processingUserRequest"
318
+ }
306
319
  }
307
320
  },
308
321
  processingUserRequest: {
@@ -13,6 +13,7 @@ type AutopilotContext = {
13
13
  idleTimeout: number;
14
14
  idleTimeoutCount: number;
15
15
  maxIdleTimeoutCount: number;
16
+ maxSpeechWaitTimeout: number;
16
17
  speechBuffer: string;
17
18
  speechResponseStartTime: number;
18
19
  speechResponseTime: number;
@@ -1,7 +1,9 @@
1
1
  import { BaseModelParams } from "../types";
2
2
  declare enum OpenAIModel {
3
3
  GPT_4O = "gpt-4o",
4
- GPT_4O_MINI = "gpt-4o-mini"
4
+ GPT_4O_MINI = "gpt-4o-mini",
5
+ GPT_3_5_TURBO = "gpt-3.5-turbo",
6
+ GPT_4_TURBO = "gpt-4-turbo"
5
7
  }
6
8
  type OpenAIParams = BaseModelParams & {
7
9
  model: OpenAIModel;
@@ -5,4 +5,6 @@ var OpenAIModel;
5
5
  (function (OpenAIModel) {
6
6
  OpenAIModel["GPT_4O"] = "gpt-4o";
7
7
  OpenAIModel["GPT_4O_MINI"] = "gpt-4o-mini";
8
+ OpenAIModel["GPT_3_5_TURBO"] = "gpt-3.5-turbo";
9
+ OpenAIModel["GPT_4_TURBO"] = "gpt-4-turbo";
8
10
  })(OpenAIModel || (exports.OpenAIModel = OpenAIModel = {}));
package/dist/types.d.ts CHANGED
@@ -1,6 +1,5 @@
1
1
  import { ConversationSettings } from "./assistants";
2
2
  import { LanguageModel } from "./models";
3
- import { Vad } from "./vad";
4
3
  import { Voice } from "./voice";
5
4
  declare enum LANGUAGE_MODEL_PROVIDER {
6
5
  OPENAI = "openai",
@@ -9,7 +8,6 @@ declare enum LANGUAGE_MODEL_PROVIDER {
9
8
  }
10
9
  type AutopilotParams = {
11
10
  voice: Voice;
12
- vad: Vad;
13
11
  conversationSettings: ConversationSettings;
14
12
  languageModel: LanguageModel;
15
13
  };
@@ -1,5 +1,6 @@
1
+ type VadEvent = "SPEECH_START" | "SPEECH_END";
1
2
  type Vad = {
2
- processChunk: (chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => void;
3
+ processChunk: (chunk: Uint8Array, callback: (event: VadEvent) => void) => void;
3
4
  };
4
5
  type SpeechProbabilities = {
5
6
  notSpeech: number;
@@ -16,4 +17,4 @@ type ONNXRuntimeAPI = {
16
17
  new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
17
18
  };
18
19
  };
19
- export { ONNXRuntimeAPI, SpeechProbabilities, Vad };
20
+ export { ONNXRuntimeAPI, SpeechProbabilities, Vad, VadEvent };
@@ -0,0 +1 @@
1
+ export {};
@@ -0,0 +1,28 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ /*
4
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
5
+ * http://github.com/fonoster/fonoster
6
+ *
7
+ * This file is part of Fonoster
8
+ *
9
+ * Licensed under the MIT License (the "License");
10
+ * you may not use this file except in compliance with
11
+ * the License. You may obtain a copy of the License at
12
+ *
13
+ * https://opensource.org/licenses/MIT
14
+ *
15
+ * Unless required by applicable law or agreed to in writing, software
16
+ * distributed under the License is distributed on an "AS IS" BASIS,
17
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18
+ * See the License for the specific language governing permissions and
19
+ * limitations under the License.
20
+ */
21
+ const worker_threads_1 = require("worker_threads");
22
+ const SileroVad_1 = require("./vad/SileroVad");
23
+ const vad = new SileroVad_1.SileroVad(worker_threads_1.workerData);
24
+ worker_threads_1.parentPort?.on("message", (chunk) => {
25
+ vad.processChunk(chunk, (voiceActivity) => {
26
+ worker_threads_1.parentPort?.postMessage(voiceActivity);
27
+ });
28
+ });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/autopilot",
3
- "version": "0.7.19",
3
+ "version": "0.7.21",
4
4
  "description": "Voice AI for the Fonoster platform",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -56,5 +56,5 @@
56
56
  "devDependencies": {
57
57
  "typescript": "^5.5.4"
58
58
  },
59
- "gitHead": "5250aa76f6c4b72a3b26beabe71ea7a7c227d7c1"
59
+ "gitHead": "0de74ab45f5fe25b0f096ad02bab2be00be53d89"
60
60
  }