@fonoster/autopilot 0.7.19 → 0.7.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/Autopilot.d.ts +1 -0
- package/dist/Autopilot.js +31 -16
- package/dist/assistants/AssistantSchema.d.ts +8 -0
- package/dist/assistants/AssistantSchema.js +1 -0
- package/dist/handleVoiceRequest.js +0 -2
- package/dist/machine/machine.d.ts +11 -1
- package/dist/machine/machine.js +13 -0
- package/dist/machine/types.d.ts +1 -0
- package/dist/models/openai/types.d.ts +3 -1
- package/dist/models/openai/types.js +2 -0
- package/dist/types.d.ts +0 -2
- package/dist/vad/types.d.ts +3 -2
- package/dist/vadWorker.d.ts +1 -0
- package/dist/vadWorker.js +28 -0
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -88,7 +88,7 @@ The Autopilot supports multiple language model providers. The following is a lis
|
|
|
88
88
|
|
|
89
89
|
| Provider | Description | Supported models
|
|
90
90
|
|------------|------------------------------------------------------------|------------------------------------------------------------------------------|
|
|
91
|
-
| OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-4o-mini`
|
|
91
|
+
| OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-4o-mini`, `gpt-3.5-turbo`, `gpt-4-turbo` |
|
|
92
92
|
| Groq | Groq offers high-performance AI models optimized for speed | `gemm-7b-it`, `llama3-groq-70b-8192-tool-use-preview`, `llama3-1-8b-instant` |
|
|
93
93
|
| Ollama | Self-hosted Ollama models | `lama3.1` |
|
|
94
94
|
|
package/dist/Autopilot.d.ts
CHANGED
package/dist/Autopilot.js
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
|
4
|
+
};
|
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
6
|
exports.Autopilot = void 0;
|
|
4
7
|
/*
|
|
@@ -19,6 +22,8 @@ exports.Autopilot = void 0;
|
|
|
19
22
|
* See the License for the specific language governing permissions and
|
|
20
23
|
* limitations under the License.
|
|
21
24
|
*/
|
|
25
|
+
const path_1 = __importDefault(require("path"));
|
|
26
|
+
const worker_threads_1 = require("worker_threads");
|
|
22
27
|
const logger_1 = require("@fonoster/logger");
|
|
23
28
|
const xstate_1 = require("xstate");
|
|
24
29
|
const machine_1 = require("./machine/machine");
|
|
@@ -27,6 +32,10 @@ class Autopilot {
|
|
|
27
32
|
constructor(params) {
|
|
28
33
|
this.params = params;
|
|
29
34
|
const { voice, languageModel, conversationSettings } = this.params;
|
|
35
|
+
const vadWorkerPath = path_1.default.resolve(__dirname, "../dist", "./vadWorker");
|
|
36
|
+
this.vadWorker = new worker_threads_1.Worker(vadWorkerPath, {
|
|
37
|
+
workerData: conversationSettings.vad
|
|
38
|
+
});
|
|
30
39
|
this.actor = (0, xstate_1.createActor)(machine_1.machine, {
|
|
31
40
|
input: {
|
|
32
41
|
conversationSettings,
|
|
@@ -42,30 +51,36 @@ class Autopilot {
|
|
|
42
51
|
});
|
|
43
52
|
this.setupVoiceStream();
|
|
44
53
|
this.setupSpeechGathering();
|
|
54
|
+
this.vadWorker.on("error", (err) => {
|
|
55
|
+
logger.error("vad worker error", err);
|
|
56
|
+
});
|
|
57
|
+
this.vadWorker.on("exit", (code) => {
|
|
58
|
+
if (code !== 0) {
|
|
59
|
+
logger.error("vad worker stopped with exit code", { code });
|
|
60
|
+
}
|
|
61
|
+
});
|
|
45
62
|
}
|
|
46
63
|
stop() {
|
|
47
64
|
logger.verbose("stopping autopilot");
|
|
48
65
|
this.actor.stop();
|
|
66
|
+
this.vadWorker.terminate();
|
|
49
67
|
}
|
|
50
68
|
async setupVoiceStream() {
|
|
51
|
-
const { voice
|
|
69
|
+
const { voice } = this.params;
|
|
52
70
|
const stream = await voice.stream();
|
|
53
|
-
stream.onData(this.handleVoicePayload(
|
|
71
|
+
stream.onData(this.handleVoicePayload.bind(this));
|
|
72
|
+
this.vadWorker.on("message", (event) => {
|
|
73
|
+
logger.verbose("received speech event from vad", { event });
|
|
74
|
+
this.actor.send({ type: event });
|
|
75
|
+
});
|
|
54
76
|
}
|
|
55
|
-
handleVoicePayload(
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
}
|
|
63
|
-
});
|
|
64
|
-
}
|
|
65
|
-
catch (err) {
|
|
66
|
-
logger.error("an error occurred while processing vad", err);
|
|
67
|
-
}
|
|
68
|
-
};
|
|
77
|
+
handleVoicePayload(chunk) {
|
|
78
|
+
try {
|
|
79
|
+
this.vadWorker.postMessage(chunk);
|
|
80
|
+
}
|
|
81
|
+
catch (err) {
|
|
82
|
+
logger.error("an error occurred while processing vad", err);
|
|
83
|
+
}
|
|
69
84
|
}
|
|
70
85
|
async setupSpeechGathering() {
|
|
71
86
|
const { voice } = this.params;
|
|
@@ -6,6 +6,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
|
|
|
6
6
|
goodbyeMessage: z.ZodString;
|
|
7
7
|
systemErrorMessage: z.ZodString;
|
|
8
8
|
initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
9
|
+
maxSpeechWaitTimeout: z.ZodNumber;
|
|
9
10
|
transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
|
|
10
11
|
phoneNumber: z.ZodString;
|
|
11
12
|
message: z.ZodString;
|
|
@@ -53,6 +54,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
|
|
|
53
54
|
systemTemplate: string;
|
|
54
55
|
goodbyeMessage: string;
|
|
55
56
|
systemErrorMessage: string;
|
|
57
|
+
maxSpeechWaitTimeout: number;
|
|
56
58
|
vad: {
|
|
57
59
|
activationThreshold: number;
|
|
58
60
|
deactivationThreshold: number;
|
|
@@ -75,6 +77,7 @@ declare const conversationSettingsSchema: z.ZodObject<{
|
|
|
75
77
|
systemTemplate: string;
|
|
76
78
|
goodbyeMessage: string;
|
|
77
79
|
systemErrorMessage: string;
|
|
80
|
+
maxSpeechWaitTimeout: number;
|
|
78
81
|
vad: {
|
|
79
82
|
activationThreshold: number;
|
|
80
83
|
deactivationThreshold: number;
|
|
@@ -295,6 +298,7 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
295
298
|
goodbyeMessage: z.ZodString;
|
|
296
299
|
systemErrorMessage: z.ZodString;
|
|
297
300
|
initialDtmf: z.ZodNullable<z.ZodOptional<z.ZodString>>;
|
|
301
|
+
maxSpeechWaitTimeout: z.ZodNumber;
|
|
298
302
|
transferOptions: z.ZodNullable<z.ZodOptional<z.ZodObject<{
|
|
299
303
|
phoneNumber: z.ZodString;
|
|
300
304
|
message: z.ZodString;
|
|
@@ -342,6 +346,7 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
342
346
|
systemTemplate: string;
|
|
343
347
|
goodbyeMessage: string;
|
|
344
348
|
systemErrorMessage: string;
|
|
349
|
+
maxSpeechWaitTimeout: number;
|
|
345
350
|
vad: {
|
|
346
351
|
activationThreshold: number;
|
|
347
352
|
deactivationThreshold: number;
|
|
@@ -364,6 +369,7 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
364
369
|
systemTemplate: string;
|
|
365
370
|
goodbyeMessage: string;
|
|
366
371
|
systemErrorMessage: string;
|
|
372
|
+
maxSpeechWaitTimeout: number;
|
|
367
373
|
vad: {
|
|
368
374
|
activationThreshold: number;
|
|
369
375
|
deactivationThreshold: number;
|
|
@@ -583,6 +589,7 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
583
589
|
systemTemplate: string;
|
|
584
590
|
goodbyeMessage: string;
|
|
585
591
|
systemErrorMessage: string;
|
|
592
|
+
maxSpeechWaitTimeout: number;
|
|
586
593
|
vad: {
|
|
587
594
|
activationThreshold: number;
|
|
588
595
|
deactivationThreshold: number;
|
|
@@ -640,6 +647,7 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
640
647
|
systemTemplate: string;
|
|
641
648
|
goodbyeMessage: string;
|
|
642
649
|
systemErrorMessage: string;
|
|
650
|
+
maxSpeechWaitTimeout: number;
|
|
643
651
|
vad: {
|
|
644
652
|
activationThreshold: number;
|
|
645
653
|
deactivationThreshold: number;
|
|
@@ -28,6 +28,7 @@ const conversationSettingsSchema = zod_1.z.object({
|
|
|
28
28
|
goodbyeMessage: zod_1.z.string(),
|
|
29
29
|
systemErrorMessage: zod_1.z.string(),
|
|
30
30
|
initialDtmf: zod_1.z.string().optional().nullable(),
|
|
31
|
+
maxSpeechWaitTimeout: zod_1.z.number(),
|
|
31
32
|
transferOptions: zod_1.z
|
|
32
33
|
.object({
|
|
33
34
|
phoneNumber: zod_1.z.string(),
|
|
@@ -60,7 +60,6 @@ async function handleVoiceRequest(req, res) {
|
|
|
60
60
|
const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
|
|
61
61
|
const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
|
|
62
62
|
const voice = new _1.VoiceImpl(sessionRef, res);
|
|
63
|
-
const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
|
|
64
63
|
const languageModel = (0, createLanguageModel_1.createLanguageModel)({
|
|
65
64
|
voice,
|
|
66
65
|
assistantConfig,
|
|
@@ -74,7 +73,6 @@ async function handleVoiceRequest(req, res) {
|
|
|
74
73
|
const autopilot = new _1.default({
|
|
75
74
|
conversationSettings: assistantConfig.conversationSettings,
|
|
76
75
|
voice,
|
|
77
|
-
vad,
|
|
78
76
|
languageModel
|
|
79
77
|
});
|
|
80
78
|
autopilot.start();
|
|
@@ -79,7 +79,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
79
79
|
type: "isNotSpeaking";
|
|
80
80
|
params: unknown;
|
|
81
81
|
};
|
|
82
|
-
}>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
|
|
82
|
+
}>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
|
|
83
83
|
conversationSettings: ConversationSettings;
|
|
84
84
|
languageModel: LanguageModel;
|
|
85
85
|
voice: Voice;
|
|
@@ -157,6 +157,7 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
157
157
|
idleTimeout: number;
|
|
158
158
|
maxIdleTimeoutCount: number;
|
|
159
159
|
idleTimeoutCount: number;
|
|
160
|
+
maxSpeechWaitTimeout: number;
|
|
160
161
|
speechResponseStartTime: number;
|
|
161
162
|
speechResponseTime: number;
|
|
162
163
|
isSpeaking: false;
|
|
@@ -178,6 +179,10 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
178
179
|
readonly target: "waitingForUserRequest";
|
|
179
180
|
readonly description: "Event from VAD system.";
|
|
180
181
|
};
|
|
182
|
+
readonly SPEECH_RESULT: {
|
|
183
|
+
readonly target: "waitingForUserRequest";
|
|
184
|
+
readonly description: "Event from Speech to Text provider.";
|
|
185
|
+
};
|
|
181
186
|
};
|
|
182
187
|
readonly after: {
|
|
183
188
|
readonly IDLE_TIMEOUT: readonly [{
|
|
@@ -253,6 +258,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
253
258
|
};
|
|
254
259
|
}];
|
|
255
260
|
};
|
|
261
|
+
readonly after: {
|
|
262
|
+
readonly MAX_SPEECH_WAIT_TIMEOUT: {
|
|
263
|
+
readonly target: "processingUserRequest";
|
|
264
|
+
};
|
|
265
|
+
};
|
|
256
266
|
};
|
|
257
267
|
readonly processingUserRequest: {
|
|
258
268
|
readonly on: {
|
package/dist/machine/machine.js
CHANGED
|
@@ -124,6 +124,9 @@ const machine = (0, xstate_1.setup)({
|
|
|
124
124
|
delays: {
|
|
125
125
|
IDLE_TIMEOUT: ({ context }) => {
|
|
126
126
|
return context.idleTimeout;
|
|
127
|
+
},
|
|
128
|
+
MAX_SPEECH_WAIT_TIMEOUT: ({ context }) => {
|
|
129
|
+
return context.maxSpeechWaitTimeout;
|
|
127
130
|
}
|
|
128
131
|
},
|
|
129
132
|
actors: {
|
|
@@ -191,6 +194,7 @@ const machine = (0, xstate_1.setup)({
|
|
|
191
194
|
idleTimeout: input.conversationSettings.idleOptions?.timeout || 10000,
|
|
192
195
|
maxIdleTimeoutCount: input.conversationSettings.idleOptions?.maxTimeoutCount || 3,
|
|
193
196
|
idleTimeoutCount: 0,
|
|
197
|
+
maxSpeechWaitTimeout: input.conversationSettings.maxSpeechWaitTimeout,
|
|
194
198
|
speechResponseStartTime: 0,
|
|
195
199
|
speechResponseTime: 0,
|
|
196
200
|
isSpeaking: false
|
|
@@ -211,6 +215,10 @@ const machine = (0, xstate_1.setup)({
|
|
|
211
215
|
SPEECH_START: {
|
|
212
216
|
target: "waitingForUserRequest",
|
|
213
217
|
description: "Event from VAD system."
|
|
218
|
+
},
|
|
219
|
+
SPEECH_RESULT: {
|
|
220
|
+
target: "waitingForUserRequest",
|
|
221
|
+
description: "Event from Speech to Text provider."
|
|
214
222
|
}
|
|
215
223
|
},
|
|
216
224
|
after: {
|
|
@@ -303,6 +311,11 @@ const machine = (0, xstate_1.setup)({
|
|
|
303
311
|
}
|
|
304
312
|
}
|
|
305
313
|
]
|
|
314
|
+
},
|
|
315
|
+
after: {
|
|
316
|
+
MAX_SPEECH_WAIT_TIMEOUT: {
|
|
317
|
+
target: "processingUserRequest"
|
|
318
|
+
}
|
|
306
319
|
}
|
|
307
320
|
},
|
|
308
321
|
processingUserRequest: {
|
package/dist/machine/types.d.ts
CHANGED
|
@@ -1,7 +1,9 @@
|
|
|
1
1
|
import { BaseModelParams } from "../types";
|
|
2
2
|
declare enum OpenAIModel {
|
|
3
3
|
GPT_4O = "gpt-4o",
|
|
4
|
-
GPT_4O_MINI = "gpt-4o-mini"
|
|
4
|
+
GPT_4O_MINI = "gpt-4o-mini",
|
|
5
|
+
GPT_3_5_TURBO = "gpt-3.5-turbo",
|
|
6
|
+
GPT_4_TURBO = "gpt-4-turbo"
|
|
5
7
|
}
|
|
6
8
|
type OpenAIParams = BaseModelParams & {
|
|
7
9
|
model: OpenAIModel;
|
|
@@ -5,4 +5,6 @@ var OpenAIModel;
|
|
|
5
5
|
(function (OpenAIModel) {
|
|
6
6
|
OpenAIModel["GPT_4O"] = "gpt-4o";
|
|
7
7
|
OpenAIModel["GPT_4O_MINI"] = "gpt-4o-mini";
|
|
8
|
+
OpenAIModel["GPT_3_5_TURBO"] = "gpt-3.5-turbo";
|
|
9
|
+
OpenAIModel["GPT_4_TURBO"] = "gpt-4-turbo";
|
|
8
10
|
})(OpenAIModel || (exports.OpenAIModel = OpenAIModel = {}));
|
package/dist/types.d.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import { ConversationSettings } from "./assistants";
|
|
2
2
|
import { LanguageModel } from "./models";
|
|
3
|
-
import { Vad } from "./vad";
|
|
4
3
|
import { Voice } from "./voice";
|
|
5
4
|
declare enum LANGUAGE_MODEL_PROVIDER {
|
|
6
5
|
OPENAI = "openai",
|
|
@@ -9,7 +8,6 @@ declare enum LANGUAGE_MODEL_PROVIDER {
|
|
|
9
8
|
}
|
|
10
9
|
type AutopilotParams = {
|
|
11
10
|
voice: Voice;
|
|
12
|
-
vad: Vad;
|
|
13
11
|
conversationSettings: ConversationSettings;
|
|
14
12
|
languageModel: LanguageModel;
|
|
15
13
|
};
|
package/dist/vad/types.d.ts
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
+
type VadEvent = "SPEECH_START" | "SPEECH_END";
|
|
1
2
|
type Vad = {
|
|
2
|
-
processChunk: (chunk: Uint8Array, callback: (event:
|
|
3
|
+
processChunk: (chunk: Uint8Array, callback: (event: VadEvent) => void) => void;
|
|
3
4
|
};
|
|
4
5
|
type SpeechProbabilities = {
|
|
5
6
|
notSpeech: number;
|
|
@@ -16,4 +17,4 @@ type ONNXRuntimeAPI = {
|
|
|
16
17
|
new (type: "float32", data: Float32Array, dims: [1, number]): unknown;
|
|
17
18
|
};
|
|
18
19
|
};
|
|
19
|
-
export { ONNXRuntimeAPI, SpeechProbabilities, Vad };
|
|
20
|
+
export { ONNXRuntimeAPI, SpeechProbabilities, Vad, VadEvent };
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
/*
|
|
4
|
+
* Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
|
|
5
|
+
* http://github.com/fonoster/fonoster
|
|
6
|
+
*
|
|
7
|
+
* This file is part of Fonoster
|
|
8
|
+
*
|
|
9
|
+
* Licensed under the MIT License (the "License");
|
|
10
|
+
* you may not use this file except in compliance with
|
|
11
|
+
* the License. You may obtain a copy of the License at
|
|
12
|
+
*
|
|
13
|
+
* https://opensource.org/licenses/MIT
|
|
14
|
+
*
|
|
15
|
+
* Unless required by applicable law or agreed to in writing, software
|
|
16
|
+
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
17
|
+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
18
|
+
* See the License for the specific language governing permissions and
|
|
19
|
+
* limitations under the License.
|
|
20
|
+
*/
|
|
21
|
+
const worker_threads_1 = require("worker_threads");
|
|
22
|
+
const SileroVad_1 = require("./vad/SileroVad");
|
|
23
|
+
const vad = new SileroVad_1.SileroVad(worker_threads_1.workerData);
|
|
24
|
+
worker_threads_1.parentPort?.on("message", (chunk) => {
|
|
25
|
+
vad.processChunk(chunk, (voiceActivity) => {
|
|
26
|
+
worker_threads_1.parentPort?.postMessage(voiceActivity);
|
|
27
|
+
});
|
|
28
|
+
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.21",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -56,5 +56,5 @@
|
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"typescript": "^5.5.4"
|
|
58
58
|
},
|
|
59
|
-
"gitHead": "
|
|
59
|
+
"gitHead": "0de74ab45f5fe25b0f096ad02bab2be00be53d89"
|
|
60
60
|
}
|