@fonoster/autopilot 0.8.37 → 0.8.40
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/Autopilot.js +3 -2
- package/dist/loadAssistantFromAPI.js +2 -1
- package/dist/machine/machine.d.ts +8 -26
- package/dist/machine/machine.js +73 -51
- package/package.json +7 -7
package/dist/Autopilot.js
CHANGED
|
@@ -73,6 +73,9 @@ class Autopilot {
|
|
|
73
73
|
if (event === "SPEECH_START") {
|
|
74
74
|
this.actor.send({ type: "SPEECH_START" });
|
|
75
75
|
}
|
|
76
|
+
else if (event === "SPEECH_END") {
|
|
77
|
+
this.actor.send({ type: "SPEECH_END" });
|
|
78
|
+
}
|
|
76
79
|
});
|
|
77
80
|
}
|
|
78
81
|
handleVoicePayload(chunk) {
|
|
@@ -89,8 +92,6 @@ class Autopilot {
|
|
|
89
92
|
stream.onData((speech) => {
|
|
90
93
|
logger.verbose("received speech result", { speech });
|
|
91
94
|
if (speech) {
|
|
92
|
-
// Testing using STT for both VAD and STT (experimental)
|
|
93
|
-
this.actor.send({ type: "SPEECH_END" });
|
|
94
95
|
this.actor.send({ type: "SPEECH_RESULT", speech });
|
|
95
96
|
}
|
|
96
97
|
});
|
|
@@ -57,6 +57,7 @@ const common_1 = require("@fonoster/common");
|
|
|
57
57
|
const SDK = __importStar(require("@fonoster/sdk"));
|
|
58
58
|
const envs_1 = require("./envs");
|
|
59
59
|
const logger_1 = require("@fonoster/logger");
|
|
60
|
+
const common_2 = require("@fonoster/common");
|
|
60
61
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
61
62
|
function loadAssistantFromAPI(req,
|
|
62
63
|
// TODO: Add validation for integrations
|
|
@@ -85,7 +86,7 @@ integrations) {
|
|
|
85
86
|
const credentials = (0, common_1.findIntegrationsCredentials)(integrations, app.intelligence?.productRef);
|
|
86
87
|
const assistantConfig = app.intelligence?.config;
|
|
87
88
|
assistantConfig.languageModel.apiKey = credentials?.apiKey;
|
|
88
|
-
resolve(assistantConfig);
|
|
89
|
+
resolve(common_2.assistantSchema.parse(assistantConfig));
|
|
89
90
|
})
|
|
90
91
|
.catch((err) => {
|
|
91
92
|
reject(new Error(`Failed to load assistant config from API: ${err}`));
|
|
@@ -66,20 +66,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
66
66
|
type: "setSpeakingDone";
|
|
67
67
|
params: import("xstate").NonReducibleUnknown;
|
|
68
68
|
};
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
params: unknown;
|
|
73
|
-
};
|
|
74
|
-
hasSpeechResult: {
|
|
75
|
-
type: "hasSpeechResult";
|
|
76
|
-
params: unknown;
|
|
77
|
-
};
|
|
78
|
-
isNotSpeaking: {
|
|
79
|
-
type: "isNotSpeaking";
|
|
80
|
-
params: unknown;
|
|
69
|
+
resetState: {
|
|
70
|
+
type: "resetState";
|
|
71
|
+
params: import("xstate").NonReducibleUnknown;
|
|
81
72
|
};
|
|
82
|
-
}>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", {}, string, {
|
|
73
|
+
}>, never, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT" | "SESSION_TIMEOUT", {}, string, {
|
|
83
74
|
conversationSettings: ConversationSettings;
|
|
84
75
|
languageModel: LanguageModel;
|
|
85
76
|
voice: Voice;
|
|
@@ -143,20 +134,11 @@ declare const machine: import("xstate").StateMachine<AutopilotContext, {
|
|
|
143
134
|
type: "setSpeakingDone";
|
|
144
135
|
params: import("xstate").NonReducibleUnknown;
|
|
145
136
|
};
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
params: unknown;
|
|
150
|
-
};
|
|
151
|
-
hasSpeechResult: {
|
|
152
|
-
type: "hasSpeechResult";
|
|
153
|
-
params: unknown;
|
|
154
|
-
};
|
|
155
|
-
isNotSpeaking: {
|
|
156
|
-
type: "isNotSpeaking";
|
|
157
|
-
params: unknown;
|
|
137
|
+
resetState: {
|
|
138
|
+
type: "resetState";
|
|
139
|
+
params: import("xstate").NonReducibleUnknown;
|
|
158
140
|
};
|
|
159
|
-
}>, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT", string, {
|
|
141
|
+
}>, never, "IDLE_TIMEOUT" | "MAX_SPEECH_WAIT_TIMEOUT" | "SESSION_TIMEOUT", string, {
|
|
160
142
|
conversationSettings: ConversationSettings;
|
|
161
143
|
languageModel: LanguageModel;
|
|
162
144
|
voice: Voice;
|
package/dist/machine/machine.js
CHANGED
|
@@ -78,7 +78,10 @@ const machine = (0, xstate_1.setup)({
|
|
|
78
78
|
speech: event.speech
|
|
79
79
|
});
|
|
80
80
|
const speech = event.speech;
|
|
81
|
-
|
|
81
|
+
if (!speech) {
|
|
82
|
+
return context;
|
|
83
|
+
}
|
|
84
|
+
context.speechBuffer = ((context.speechBuffer ?? "") +
|
|
82
85
|
" " +
|
|
83
86
|
speech).trimStart();
|
|
84
87
|
return context;
|
|
@@ -103,24 +106,33 @@ const machine = (0, xstate_1.setup)({
|
|
|
103
106
|
});
|
|
104
107
|
context.isSpeaking = false;
|
|
105
108
|
return context;
|
|
109
|
+
}),
|
|
110
|
+
resetState: (0, xstate_1.assign)(({ context }) => {
|
|
111
|
+
logger.verbose("called resetState action");
|
|
112
|
+
return {
|
|
113
|
+
...context,
|
|
114
|
+
speechBuffer: "",
|
|
115
|
+
idleTimeoutCount: 0,
|
|
116
|
+
isSpeaking: false
|
|
117
|
+
};
|
|
106
118
|
})
|
|
107
119
|
},
|
|
108
120
|
guards: {
|
|
109
121
|
idleTimeoutCountExceedsMax: function ({ context }) {
|
|
110
122
|
logger.verbose("called idleTimeoutCountExceedsMax guard", {
|
|
111
|
-
idleTimeoutCount: context.idleTimeoutCount,
|
|
123
|
+
idleTimeoutCount: context.idleTimeoutCount + 1,
|
|
112
124
|
maxIdleTimeoutCount: context.maxIdleTimeoutCount
|
|
113
125
|
});
|
|
114
|
-
return context.idleTimeoutCount
|
|
126
|
+
return context.idleTimeoutCount + 1 > context.maxIdleTimeoutCount;
|
|
115
127
|
},
|
|
116
128
|
hasSpeechResult: function ({ context }) {
|
|
117
129
|
return context.speechBuffer !== "";
|
|
118
130
|
},
|
|
119
|
-
|
|
120
|
-
logger.verbose("called
|
|
131
|
+
isSpeaking: function ({ context }) {
|
|
132
|
+
logger.verbose("called isSpeaking guard", {
|
|
121
133
|
isSpeaking: context.isSpeaking
|
|
122
134
|
});
|
|
123
|
-
return
|
|
135
|
+
return context.isSpeaking;
|
|
124
136
|
}
|
|
125
137
|
},
|
|
126
138
|
delays: {
|
|
@@ -129,6 +141,10 @@ const machine = (0, xstate_1.setup)({
|
|
|
129
141
|
},
|
|
130
142
|
MAX_SPEECH_WAIT_TIMEOUT: ({ context }) => {
|
|
131
143
|
return context.maxSpeechWaitTimeout;
|
|
144
|
+
},
|
|
145
|
+
SESSION_TIMEOUT: ({ context }) => {
|
|
146
|
+
const elapsed = Date.now() - context.sessionStartTime;
|
|
147
|
+
return Math.max(0, context.maxSessionDuration - elapsed);
|
|
132
148
|
}
|
|
133
149
|
},
|
|
134
150
|
actors: {
|
|
@@ -175,6 +191,18 @@ const machine = (0, xstate_1.setup)({
|
|
|
175
191
|
await context.voice.say(context.systemErrorMessage);
|
|
176
192
|
}
|
|
177
193
|
})
|
|
194
|
+
},
|
|
195
|
+
on: {
|
|
196
|
+
ERROR: {
|
|
197
|
+
target: "systemError",
|
|
198
|
+
actions: "logError"
|
|
199
|
+
}
|
|
200
|
+
},
|
|
201
|
+
after: {
|
|
202
|
+
SESSION_TIMEOUT: {
|
|
203
|
+
target: "hangup",
|
|
204
|
+
actions: ["goodbye"]
|
|
205
|
+
}
|
|
178
206
|
}
|
|
179
207
|
}).createMachine({
|
|
180
208
|
context: ({ input }) => ({
|
|
@@ -192,8 +220,9 @@ const machine = (0, xstate_1.setup)({
|
|
|
192
220
|
maxIdleTimeoutCount: input.conversationSettings.idleOptions?.maxTimeoutCount || 3,
|
|
193
221
|
idleTimeoutCount: 0,
|
|
194
222
|
maxSpeechWaitTimeout: input.conversationSettings.maxSpeechWaitTimeout,
|
|
195
|
-
|
|
196
|
-
|
|
223
|
+
isSpeaking: false,
|
|
224
|
+
sessionStartTime: Date.now(),
|
|
225
|
+
maxSessionDuration: input.conversationSettings.maxSessionDuration
|
|
197
226
|
}),
|
|
198
227
|
id: "fnAI",
|
|
199
228
|
initial: "greeting",
|
|
@@ -221,27 +250,25 @@ const machine = (0, xstate_1.setup)({
|
|
|
221
250
|
IDLE_TIMEOUT: [
|
|
222
251
|
{
|
|
223
252
|
target: "hangup",
|
|
224
|
-
actions: {
|
|
225
|
-
|
|
226
|
-
},
|
|
227
|
-
guard: {
|
|
228
|
-
type: "idleTimeoutCountExceedsMax"
|
|
229
|
-
}
|
|
253
|
+
actions: { type: "goodbye" },
|
|
254
|
+
guard: (0, xstate_1.and)(["idleTimeoutCountExceedsMax", (0, xstate_1.not)("isSpeaking")])
|
|
230
255
|
},
|
|
231
256
|
{
|
|
232
|
-
target: "
|
|
257
|
+
target: "idleTransition",
|
|
258
|
+
guard: (0, xstate_1.not)("isSpeaking"),
|
|
233
259
|
actions: [
|
|
234
|
-
{
|
|
235
|
-
|
|
236
|
-
},
|
|
237
|
-
{
|
|
238
|
-
type: "announceIdleTimeout"
|
|
239
|
-
}
|
|
260
|
+
{ type: "increaseIdleTimeoutCount" },
|
|
261
|
+
{ type: "announceIdleTimeout" }
|
|
240
262
|
]
|
|
241
263
|
}
|
|
242
264
|
]
|
|
243
265
|
}
|
|
244
266
|
},
|
|
267
|
+
idleTransition: {
|
|
268
|
+
always: {
|
|
269
|
+
target: "idle"
|
|
270
|
+
}
|
|
271
|
+
},
|
|
245
272
|
waitingForUserRequest: {
|
|
246
273
|
always: {
|
|
247
274
|
target: "updatingSpeech"
|
|
@@ -264,54 +291,40 @@ const machine = (0, xstate_1.setup)({
|
|
|
264
291
|
hangup: {
|
|
265
292
|
type: "final"
|
|
266
293
|
},
|
|
267
|
-
transitioningToIdle: {
|
|
268
|
-
always: {
|
|
269
|
-
target: "idle"
|
|
270
|
-
}
|
|
271
|
-
},
|
|
272
294
|
updatingSpeech: {
|
|
273
295
|
on: {
|
|
296
|
+
SPEECH_END: {
|
|
297
|
+
actions: [
|
|
298
|
+
{
|
|
299
|
+
type: "setSpeakingDone"
|
|
300
|
+
}
|
|
301
|
+
]
|
|
302
|
+
},
|
|
274
303
|
SPEECH_RESULT: [
|
|
275
304
|
{
|
|
276
305
|
target: "processingUserRequest",
|
|
277
306
|
actions: {
|
|
278
307
|
type: "appendSpeech"
|
|
279
308
|
},
|
|
280
|
-
guard:
|
|
281
|
-
type: "isNotSpeaking"
|
|
282
|
-
},
|
|
309
|
+
guard: (0, xstate_1.not)("isSpeaking"),
|
|
283
310
|
description: "Speech result from the Speech to Text provider."
|
|
284
|
-
},
|
|
285
|
-
{
|
|
286
|
-
target: "updatingSpeech",
|
|
287
|
-
actions: {
|
|
288
|
-
type: "appendSpeech"
|
|
289
|
-
}
|
|
290
311
|
}
|
|
291
|
-
]
|
|
292
|
-
|
|
312
|
+
]
|
|
313
|
+
},
|
|
314
|
+
after: {
|
|
315
|
+
MAX_SPEECH_WAIT_TIMEOUT: [
|
|
293
316
|
{
|
|
294
317
|
target: "processingUserRequest",
|
|
295
|
-
|
|
296
|
-
type: "setSpeakingDone"
|
|
297
|
-
},
|
|
298
|
-
guard: {
|
|
299
|
-
type: "hasSpeechResult"
|
|
300
|
-
},
|
|
301
|
-
description: "Event from VAD or similar system."
|
|
318
|
+
guard: (0, xstate_1.and)([(0, xstate_1.not)("isSpeaking"), "hasSpeechResult"])
|
|
302
319
|
},
|
|
303
320
|
{
|
|
304
|
-
target: "
|
|
321
|
+
target: "idle",
|
|
322
|
+
guard: (0, xstate_1.not)("isSpeaking"),
|
|
305
323
|
actions: {
|
|
306
|
-
type: "
|
|
324
|
+
type: "announceIdleTimeout"
|
|
307
325
|
}
|
|
308
326
|
}
|
|
309
327
|
]
|
|
310
|
-
},
|
|
311
|
-
after: {
|
|
312
|
-
MAX_SPEECH_WAIT_TIMEOUT: {
|
|
313
|
-
target: "processingUserRequest"
|
|
314
|
-
}
|
|
315
328
|
}
|
|
316
329
|
},
|
|
317
330
|
processingUserRequest: {
|
|
@@ -328,6 +341,15 @@ const machine = (0, xstate_1.setup)({
|
|
|
328
341
|
target: "idle"
|
|
329
342
|
}
|
|
330
343
|
}
|
|
344
|
+
},
|
|
345
|
+
systemError: {
|
|
346
|
+
entry: "announceSystemError",
|
|
347
|
+
after: {
|
|
348
|
+
SYSTEM_ERROR_RECOVERY_TIMEOUT: {
|
|
349
|
+
target: "idle",
|
|
350
|
+
actions: "resetState"
|
|
351
|
+
}
|
|
352
|
+
}
|
|
331
353
|
}
|
|
332
354
|
}
|
|
333
355
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.8.
|
|
3
|
+
"version": "0.8.40",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -33,11 +33,11 @@
|
|
|
33
33
|
},
|
|
34
34
|
"dependencies": {
|
|
35
35
|
"@aws-sdk/client-s3": "^3.712.0",
|
|
36
|
-
"@fonoster/common": "^0.8.
|
|
37
|
-
"@fonoster/logger": "^0.8.
|
|
38
|
-
"@fonoster/sdk": "^0.8.
|
|
39
|
-
"@fonoster/types": "^0.8.
|
|
40
|
-
"@fonoster/voice": "^0.8.
|
|
36
|
+
"@fonoster/common": "^0.8.40",
|
|
37
|
+
"@fonoster/logger": "^0.8.40",
|
|
38
|
+
"@fonoster/sdk": "^0.8.40",
|
|
39
|
+
"@fonoster/types": "^0.8.40",
|
|
40
|
+
"@fonoster/voice": "^0.8.40",
|
|
41
41
|
"@langchain/community": "^0.3.19",
|
|
42
42
|
"@langchain/core": "^0.3.23",
|
|
43
43
|
"@langchain/groq": "^0.1.2",
|
|
@@ -55,5 +55,5 @@
|
|
|
55
55
|
"devDependencies": {
|
|
56
56
|
"typescript": "^5.5.4"
|
|
57
57
|
},
|
|
58
|
-
"gitHead": "
|
|
58
|
+
"gitHead": "2bbf3dadb6e1e178c4eefdeb4291a2e05eef0f2b"
|
|
59
59
|
}
|