@fonoster/autopilot 0.7.17 → 0.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. package/README.md +3 -2
  2. package/dist/assistants/AssistantSchema.d.ts +86 -6
  3. package/dist/assistants/AssistantSchema.js +7 -1
  4. package/dist/createLanguageModel.d.ts +7 -2
  5. package/dist/createLanguageModel.js +5 -3
  6. package/dist/handleVoiceRequest.js +17 -6
  7. package/dist/machine/machine.d.ts +81 -52
  8. package/dist/machine/machine.js +58 -57
  9. package/dist/machine/types.d.ts +22 -0
  10. package/dist/machine/types.js +2 -0
  11. package/dist/models/AbstractLanguageModel.d.ts +3 -1
  12. package/dist/models/AbstractLanguageModel.js +22 -13
  13. package/dist/models/LanguageModelFactory.d.ts +3 -2
  14. package/dist/models/LanguageModelFactory.js +2 -2
  15. package/dist/models/createPromptTemplate.d.ts +5 -1
  16. package/dist/models/createPromptTemplate.js +5 -2
  17. package/dist/models/groq/Groq.d.ts +2 -1
  18. package/dist/models/groq/Groq.js +2 -2
  19. package/dist/models/ollama/Ollama.d.ts +2 -1
  20. package/dist/models/ollama/Ollama.js +2 -2
  21. package/dist/models/openai/OpenAI.d.ts +2 -1
  22. package/dist/models/openai/OpenAI.js +2 -2
  23. package/dist/models/toolInvocation.d.ts +12 -0
  24. package/dist/models/toolInvocation.js +42 -0
  25. package/dist/models/types.d.ts +3 -0
  26. package/dist/tools/ToolSchema.d.ts +3 -0
  27. package/dist/tools/ToolSchema.js +1 -0
  28. package/dist/tools/ToolsCatalog.d.ts +1 -0
  29. package/dist/tools/ToolsCatalog.js +3 -0
  30. package/dist/tools/sendRequest.js +7 -1
  31. package/dist/vad/SileroVad.d.ts +11 -1
  32. package/dist/vad/SileroVad.js +6 -2
  33. package/dist/vad/makeVad.d.ts +6 -1
  34. package/dist/vad/makeVad.js +18 -11
  35. package/package.json +6 -6
package/README.md CHANGED
@@ -88,8 +88,9 @@ The Autopilot supports multiple language model providers. The following is a lis
88
88
 
89
89
  | Provider | Description | Supported models
90
90
  |------------|------------------------------------------------------------|------------------------------------------------------------------------------|
91
- | OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-40-mini` |
91
+ | OpenAI | OpenAI provides various GPT models for conversational AI | `gpt-4o`, `gpt-4o-mini` |
92
92
  | Groq | Groq offers high-performance AI models optimized for speed | `gemm-7b-it`, `llama3-groq-70b-8192-tool-use-preview`, `llama3-1-8b-instant` |
93
+ | Ollama | Self-hosted Ollama models | `lama3.1` |
93
94
 
94
95
  ## Adding Knowledge Base
95
96
 
@@ -141,7 +142,7 @@ You can configure a new tool by adding a new entry in the `tools` array in the c
141
142
 
142
143
  In addition to the `get` operation type, you can use the `post` operation type. The `post` operation type is used when sending data to the tool. When sending a post, you can optionally set `waitForResponse` to false, which will "fire and forget" the request. The default behavior is to wait for the response.
143
144
 
144
- If your tool needs the number of the caller or the number that received the call, you can use the reserved variables `ingressNumber` and `callerNumber`. Similarly, you can use the reserved variable `callReceivedAt` to get the date and time when the call was received in `ISO 8601` format.
145
+ If your tool needs the number of the caller or the number that received the call, you can use the reserved variables `ingressNumber` and `callerNumber`. Similarly, you can use the reserved variable `callReceivedAt` to get the date and time when the call was received in `ISO 8601` format and the `callDirection` variable to check if the call was originated from the PSTN.
145
146
 
146
147
  The expected format for the response is a JSON object with the following structure:
147
148
 
@@ -32,11 +32,33 @@ declare const conversationSettingsSchema: z.ZodObject<{
32
32
  timeout: number;
33
33
  maxTimeoutCount: number;
34
34
  }>>>;
35
+ vad: z.ZodObject<{
36
+ pathToModel: z.ZodOptional<z.ZodString>;
37
+ activationThreshold: z.ZodNumber;
38
+ deactivationThreshold: z.ZodNumber;
39
+ debounceFrames: z.ZodNumber;
40
+ }, "strip", z.ZodTypeAny, {
41
+ activationThreshold: number;
42
+ deactivationThreshold: number;
43
+ debounceFrames: number;
44
+ pathToModel?: string | undefined;
45
+ }, {
46
+ activationThreshold: number;
47
+ deactivationThreshold: number;
48
+ debounceFrames: number;
49
+ pathToModel?: string | undefined;
50
+ }>;
35
51
  }, "strip", z.ZodTypeAny, {
36
- systemTemplate: string;
37
52
  firstMessage: string;
53
+ systemTemplate: string;
38
54
  goodbyeMessage: string;
39
55
  systemErrorMessage: string;
56
+ vad: {
57
+ activationThreshold: number;
58
+ deactivationThreshold: number;
59
+ debounceFrames: number;
60
+ pathToModel?: string | undefined;
61
+ };
40
62
  initialDtmf?: string | null | undefined;
41
63
  transferOptions?: {
42
64
  message: string;
@@ -49,10 +71,16 @@ declare const conversationSettingsSchema: z.ZodObject<{
49
71
  maxTimeoutCount: number;
50
72
  } | null | undefined;
51
73
  }, {
52
- systemTemplate: string;
53
74
  firstMessage: string;
75
+ systemTemplate: string;
54
76
  goodbyeMessage: string;
55
77
  systemErrorMessage: string;
78
+ vad: {
79
+ activationThreshold: number;
80
+ deactivationThreshold: number;
81
+ debounceFrames: number;
82
+ pathToModel?: string | undefined;
83
+ };
56
84
  initialDtmf?: string | null | undefined;
57
85
  transferOptions?: {
58
86
  message: string;
@@ -129,6 +157,7 @@ declare const languageModelConfigSchema: z.ZodObject<{
129
157
  }>;
130
158
  required?: string[] | undefined;
131
159
  }>;
160
+ requestStartMessage: z.ZodOptional<z.ZodString>;
132
161
  operation: z.ZodEffects<z.ZodObject<{
133
162
  type: z.ZodNativeEnum<typeof import("../tools/ToolSchema").AllowedOperations>;
134
163
  url: z.ZodOptional<z.ZodString>;
@@ -173,6 +202,7 @@ declare const languageModelConfigSchema: z.ZodObject<{
173
202
  waitForResponse?: boolean | undefined;
174
203
  headers?: Record<string, string> | undefined;
175
204
  };
205
+ requestStartMessage?: string | undefined;
176
206
  }, {
177
207
  name: string;
178
208
  description: string;
@@ -191,6 +221,7 @@ declare const languageModelConfigSchema: z.ZodObject<{
191
221
  waitForResponse?: boolean | undefined;
192
222
  headers?: Record<string, string> | undefined;
193
223
  };
224
+ requestStartMessage?: string | undefined;
194
225
  }>, "many">;
195
226
  }, "strip", z.ZodTypeAny, {
196
227
  model: string;
@@ -217,6 +248,7 @@ declare const languageModelConfigSchema: z.ZodObject<{
217
248
  waitForResponse?: boolean | undefined;
218
249
  headers?: Record<string, string> | undefined;
219
250
  };
251
+ requestStartMessage?: string | undefined;
220
252
  }[];
221
253
  temperature: number;
222
254
  maxTokens: number;
@@ -248,6 +280,7 @@ declare const languageModelConfigSchema: z.ZodObject<{
248
280
  waitForResponse?: boolean | undefined;
249
281
  headers?: Record<string, string> | undefined;
250
282
  };
283
+ requestStartMessage?: string | undefined;
251
284
  }[];
252
285
  temperature: number;
253
286
  maxTokens: number;
@@ -288,11 +321,33 @@ declare const assistantSchema: z.ZodObject<{
288
321
  timeout: number;
289
322
  maxTimeoutCount: number;
290
323
  }>>>;
324
+ vad: z.ZodObject<{
325
+ pathToModel: z.ZodOptional<z.ZodString>;
326
+ activationThreshold: z.ZodNumber;
327
+ deactivationThreshold: z.ZodNumber;
328
+ debounceFrames: z.ZodNumber;
329
+ }, "strip", z.ZodTypeAny, {
330
+ activationThreshold: number;
331
+ deactivationThreshold: number;
332
+ debounceFrames: number;
333
+ pathToModel?: string | undefined;
334
+ }, {
335
+ activationThreshold: number;
336
+ deactivationThreshold: number;
337
+ debounceFrames: number;
338
+ pathToModel?: string | undefined;
339
+ }>;
291
340
  }, "strip", z.ZodTypeAny, {
292
- systemTemplate: string;
293
341
  firstMessage: string;
342
+ systemTemplate: string;
294
343
  goodbyeMessage: string;
295
344
  systemErrorMessage: string;
345
+ vad: {
346
+ activationThreshold: number;
347
+ deactivationThreshold: number;
348
+ debounceFrames: number;
349
+ pathToModel?: string | undefined;
350
+ };
296
351
  initialDtmf?: string | null | undefined;
297
352
  transferOptions?: {
298
353
  message: string;
@@ -305,10 +360,16 @@ declare const assistantSchema: z.ZodObject<{
305
360
  maxTimeoutCount: number;
306
361
  } | null | undefined;
307
362
  }, {
308
- systemTemplate: string;
309
363
  firstMessage: string;
364
+ systemTemplate: string;
310
365
  goodbyeMessage: string;
311
366
  systemErrorMessage: string;
367
+ vad: {
368
+ activationThreshold: number;
369
+ deactivationThreshold: number;
370
+ debounceFrames: number;
371
+ pathToModel?: string | undefined;
372
+ };
312
373
  initialDtmf?: string | null | undefined;
313
374
  transferOptions?: {
314
375
  message: string;
@@ -385,6 +446,7 @@ declare const assistantSchema: z.ZodObject<{
385
446
  }>;
386
447
  required?: string[] | undefined;
387
448
  }>;
449
+ requestStartMessage: z.ZodOptional<z.ZodString>;
388
450
  operation: z.ZodEffects<z.ZodObject<{
389
451
  type: z.ZodNativeEnum<typeof import("../tools/ToolSchema").AllowedOperations>;
390
452
  url: z.ZodOptional<z.ZodString>;
@@ -429,6 +491,7 @@ declare const assistantSchema: z.ZodObject<{
429
491
  waitForResponse?: boolean | undefined;
430
492
  headers?: Record<string, string> | undefined;
431
493
  };
494
+ requestStartMessage?: string | undefined;
432
495
  }, {
433
496
  name: string;
434
497
  description: string;
@@ -447,6 +510,7 @@ declare const assistantSchema: z.ZodObject<{
447
510
  waitForResponse?: boolean | undefined;
448
511
  headers?: Record<string, string> | undefined;
449
512
  };
513
+ requestStartMessage?: string | undefined;
450
514
  }>, "many">;
451
515
  }, "strip", z.ZodTypeAny, {
452
516
  model: string;
@@ -473,6 +537,7 @@ declare const assistantSchema: z.ZodObject<{
473
537
  waitForResponse?: boolean | undefined;
474
538
  headers?: Record<string, string> | undefined;
475
539
  };
540
+ requestStartMessage?: string | undefined;
476
541
  }[];
477
542
  temperature: number;
478
543
  maxTokens: number;
@@ -504,6 +569,7 @@ declare const assistantSchema: z.ZodObject<{
504
569
  waitForResponse?: boolean | undefined;
505
570
  headers?: Record<string, string> | undefined;
506
571
  };
572
+ requestStartMessage?: string | undefined;
507
573
  }[];
508
574
  temperature: number;
509
575
  maxTokens: number;
@@ -513,10 +579,16 @@ declare const assistantSchema: z.ZodObject<{
513
579
  }>;
514
580
  }, "strip", z.ZodTypeAny, {
515
581
  conversationSettings: {
516
- systemTemplate: string;
517
582
  firstMessage: string;
583
+ systemTemplate: string;
518
584
  goodbyeMessage: string;
519
585
  systemErrorMessage: string;
586
+ vad: {
587
+ activationThreshold: number;
588
+ deactivationThreshold: number;
589
+ debounceFrames: number;
590
+ pathToModel?: string | undefined;
591
+ };
520
592
  initialDtmf?: string | null | undefined;
521
593
  transferOptions?: {
522
594
  message: string;
@@ -554,6 +626,7 @@ declare const assistantSchema: z.ZodObject<{
554
626
  waitForResponse?: boolean | undefined;
555
627
  headers?: Record<string, string> | undefined;
556
628
  };
629
+ requestStartMessage?: string | undefined;
557
630
  }[];
558
631
  temperature: number;
559
632
  maxTokens: number;
@@ -563,10 +636,16 @@ declare const assistantSchema: z.ZodObject<{
563
636
  };
564
637
  }, {
565
638
  conversationSettings: {
566
- systemTemplate: string;
567
639
  firstMessage: string;
640
+ systemTemplate: string;
568
641
  goodbyeMessage: string;
569
642
  systemErrorMessage: string;
643
+ vad: {
644
+ activationThreshold: number;
645
+ deactivationThreshold: number;
646
+ debounceFrames: number;
647
+ pathToModel?: string | undefined;
648
+ };
570
649
  initialDtmf?: string | null | undefined;
571
650
  transferOptions?: {
572
651
  message: string;
@@ -604,6 +683,7 @@ declare const assistantSchema: z.ZodObject<{
604
683
  waitForResponse?: boolean | undefined;
605
684
  headers?: Record<string, string> | undefined;
606
685
  };
686
+ requestStartMessage?: string | undefined;
607
687
  }[];
608
688
  temperature: number;
609
689
  maxTokens: number;
@@ -43,7 +43,13 @@ const conversationSettingsSchema = zod_1.z.object({
43
43
  maxTimeoutCount: zod_1.z.number()
44
44
  })
45
45
  .optional()
46
- .nullable()
46
+ .nullable(),
47
+ vad: zod_1.z.object({
48
+ pathToModel: zod_1.z.string().optional(),
49
+ activationThreshold: zod_1.z.number(),
50
+ deactivationThreshold: zod_1.z.number(),
51
+ debounceFrames: zod_1.z.number()
52
+ })
47
53
  });
48
54
  exports.conversationSettingsSchema = conversationSettingsSchema;
49
55
  const languageModelConfigSchema = zod_1.z.object({
@@ -1,3 +1,8 @@
1
- import { AssistantConfig, FilesKnowledgeBase, TelephonyContext } from ".";
2
- declare function createLanguageModel(assistantConfig: AssistantConfig, knowledgeBase: FilesKnowledgeBase, telephonyContext: TelephonyContext): import("./models/AbstractLanguageModel").AbstractLanguageModel;
1
+ import { AssistantConfig, FilesKnowledgeBase, TelephonyContext, Voice } from ".";
2
+ declare function createLanguageModel(params: {
3
+ voice: Voice;
4
+ assistantConfig: AssistantConfig;
5
+ knowledgeBase: FilesKnowledgeBase;
6
+ telephonyContext: TelephonyContext;
7
+ }): import("./models/AbstractLanguageModel").AbstractLanguageModel;
3
8
  export { createLanguageModel };
@@ -20,14 +20,16 @@ exports.createLanguageModel = createLanguageModel;
20
20
  * limitations under the License.
21
21
  */
22
22
  const _1 = require(".");
23
- function createLanguageModel(assistantConfig, knowledgeBase, telephonyContext) {
23
+ function createLanguageModel(params) {
24
+ const { voice, assistantConfig, knowledgeBase, telephonyContext } = params;
24
25
  const { languageModel: languageModelSettings, conversationSettings } = assistantConfig;
25
26
  return _1.LanguageModelFactory.getLanguageModel(languageModelSettings.provider, {
26
- apiKey: languageModelSettings.apiKey,
27
27
  // @ts-expect-error don't know the model type here
28
28
  model: languageModelSettings.model,
29
+ apiKey: languageModelSettings.apiKey,
29
30
  maxTokens: languageModelSettings.maxTokens,
30
31
  temperature: languageModelSettings.temperature,
32
+ firstMessage: conversationSettings.firstMessage,
31
33
  systemTemplate: conversationSettings.systemTemplate,
32
34
  baseUrl: languageModelSettings.baseUrl,
33
35
  knowledgeBase,
@@ -36,5 +38,5 @@ function createLanguageModel(assistantConfig, knowledgeBase, telephonyContext) {
36
38
  _1.hangupToolDefinition,
37
39
  _1.transferToolDefinition
38
40
  ]
39
- }, telephonyContext);
41
+ }, voice, telephonyContext);
40
42
  }
@@ -50,15 +50,26 @@ const loadKnowledgeBase_1 = require("./loadKnowledgeBase");
50
50
  const _1 = __importStar(require("."));
51
51
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
52
52
  async function handleVoiceRequest(req, res) {
53
- const { ingressNumber, sessionRef, appRef } = req;
54
- logger.verbose("voice request", { ingressNumber, sessionRef, appRef });
53
+ const { ingressNumber, sessionRef, appRef, callDirection } = req;
54
+ logger.verbose("voice request", {
55
+ ingressNumber,
56
+ sessionRef,
57
+ appRef,
58
+ metadata: req.metadata
59
+ });
55
60
  const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
56
61
  const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
57
62
  const voice = new _1.VoiceImpl(sessionRef, res);
58
- const vad = new _1.SileroVad();
59
- const languageModel = (0, createLanguageModel_1.createLanguageModel)(assistantConfig, knowledgeBase, {
60
- ingressNumber: req.ingressNumber,
61
- callerNumber: req.callerNumber
63
+ const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
64
+ const languageModel = (0, createLanguageModel_1.createLanguageModel)({
65
+ voice,
66
+ assistantConfig,
67
+ knowledgeBase,
68
+ telephonyContext: {
69
+ callDirection,
70
+ ingressNumber: req.ingressNumber,
71
+ callerNumber: req.callerNumber
72
+ }
62
73
  });
63
74
  const autopilot = new _1.default({
64
75
  conversationSettings: assistantConfig.conversationSettings,
@@ -1,25 +1,8 @@
1
+ import { AutopilotContext } from "./types";
1
2
  import { ConversationSettings } from "../assistants";
2
3
  import { LanguageModel } from "../models";
3
4
  import { Voice } from "../voice";
4
- declare const machine: import("xstate").StateMachine<{
5
- sessionRef: string;
6
- languageModel: LanguageModel;
7
- voice: Voice;
8
- firstMessage: string;
9
- goodbyeMessage: string;
10
- transferMessage?: string;
11
- transferPhoneNumber?: string;
12
- systemErrorMessage: string;
13
- idleMessage: string;
14
- idleTimeout: number;
15
- idleTimeoutCount: number;
16
- maxIdleTimeoutCount: number;
17
- speechBuffer: string;
18
- speechResponseStartTime: number;
19
- speechResponseTime: number;
20
- isSpeaking: boolean;
21
- knowledgeBaseSourceUrl?: string;
22
- }, {
5
+ declare const machine: import("xstate").StateMachine<AutopilotContext, {
23
6
  type: "SPEECH_START";
24
7
  } | {
25
8
  type: "SPEECH_END";
@@ -28,7 +11,17 @@ declare const machine: import("xstate").StateMachine<{
28
11
  speech: string;
29
12
  } | {
30
13
  type: "USER_REQUEST_PROCESSED";
31
- }, {}, never, import("xstate").Values<{
14
+ }, {
15
+ [x: string]: import("xstate").ActorRefFromLogic<import("xstate").PromiseActorLogic<void, {
16
+ context: AutopilotContext;
17
+ }, import("xstate").EventObject>> | undefined;
18
+ }, {
19
+ src: "doProcessUserRequest";
20
+ logic: import("xstate").PromiseActorLogic<void, {
21
+ context: AutopilotContext;
22
+ }, import("xstate").EventObject>;
23
+ id: string | undefined;
24
+ }, import("xstate").Values<{
32
25
  greetUser: {
33
26
  type: "greetUser";
34
27
  params: unknown;
@@ -45,10 +38,6 @@ declare const machine: import("xstate").StateMachine<{
45
38
  type: "interruptPlayback";
46
39
  params: unknown;
47
40
  };
48
- processUserRequest: {
49
- type: "processUserRequest";
50
- params: unknown;
51
- };
52
41
  announceIdleTimeout: {
53
42
  type: "announceIdleTimeout";
54
43
  params: unknown;
@@ -90,14 +79,39 @@ declare const machine: import("xstate").StateMachine<{
90
79
  type: "isNotSpeaking";
91
80
  params: unknown;
92
81
  };
93
- }>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "hackingTimeout" | "updatingSpeech" | "processingUserRequest", string, {
82
+ }>, "IDLE_TIMEOUT", "hangup" | "greeting" | "idle" | "waitingForUserRequest" | "transitioningToIdle" | "updatingSpeech" | "processingUserRequest", string, {
94
83
  conversationSettings: ConversationSettings;
95
84
  languageModel: LanguageModel;
96
85
  voice: Voice;
97
86
  }, import("xstate").NonReducibleUnknown, import("xstate").EventObject, import("xstate").MetaObject, {
98
87
  readonly context: ({ input }: {
99
88
  spawn: {
100
- <TSrc extends never>(logic: TSrc, ...[options]: never): import("xstate").ActorRefFromLogic<never>;
89
+ <TSrc extends "doProcessUserRequest">(logic: TSrc, ...[options]: {
90
+ src: "doProcessUserRequest";
91
+ logic: import("xstate").PromiseActorLogic<void, {
92
+ context: AutopilotContext;
93
+ }, import("xstate").EventObject>;
94
+ id: string | undefined;
95
+ } extends infer T ? T extends {
96
+ src: "doProcessUserRequest";
97
+ logic: import("xstate").PromiseActorLogic<void, {
98
+ context: AutopilotContext;
99
+ }, import("xstate").EventObject>;
100
+ id: string | undefined;
101
+ } ? T extends {
102
+ src: TSrc;
103
+ } ? import("xstate").ConditionalRequired<[options?: ({
104
+ id?: T["id"] | undefined;
105
+ systemId?: string;
106
+ input?: import("xstate").InputFrom<T["logic"]> | undefined;
107
+ syncSnapshot?: boolean;
108
+ } & { [K in import("xstate").RequiredActorOptions<T>]: unknown; }) | undefined], import("xstate").IsNotNever<import("xstate").RequiredActorOptions<T>>> : never : never : never): import("xstate").ActorRefFromLogic<import("xstate").GetConcreteByKey<{
109
+ src: "doProcessUserRequest";
110
+ logic: import("xstate").PromiseActorLogic<void, {
111
+ context: AutopilotContext;
112
+ }, import("xstate").EventObject>;
113
+ id: string | undefined;
114
+ }, "src", TSrc>["logic"]>;
101
115
  <TLogic extends import("xstate").AnyActorLogic>(src: TLogic, options?: {
102
116
  id?: never;
103
117
  systemId?: string;
@@ -110,25 +124,7 @@ declare const machine: import("xstate").StateMachine<{
110
124
  languageModel: LanguageModel;
111
125
  voice: Voice;
112
126
  };
113
- self: import("xstate").ActorRef<import("xstate").MachineSnapshot<{
114
- sessionRef: string;
115
- languageModel: LanguageModel;
116
- voice: Voice;
117
- firstMessage: string;
118
- goodbyeMessage: string;
119
- transferMessage?: string;
120
- transferPhoneNumber?: string;
121
- systemErrorMessage: string;
122
- idleMessage: string;
123
- idleTimeout: number;
124
- idleTimeoutCount: number;
125
- maxIdleTimeoutCount: number;
126
- speechBuffer: string;
127
- speechResponseStartTime: number;
128
- speechResponseTime: number;
129
- isSpeaking: boolean;
130
- knowledgeBaseSourceUrl?: string;
131
- }, {
127
+ self: import("xstate").ActorRef<import("xstate").MachineSnapshot<AutopilotContext, {
132
128
  type: "SPEECH_START";
133
129
  } | {
134
130
  type: "SPEECH_END";
@@ -193,7 +189,7 @@ declare const machine: import("xstate").StateMachine<{
193
189
  readonly type: "idleTimeoutCountExceedsMax";
194
190
  };
195
191
  }, {
196
- readonly target: "hackingTimeout";
192
+ readonly target: "transitioningToIdle";
197
193
  readonly actions: readonly [{
198
194
  readonly type: "increaseIdleTimeoutCount";
199
195
  }, {
@@ -219,7 +215,7 @@ declare const machine: import("xstate").StateMachine<{
219
215
  readonly hangup: {
220
216
  readonly type: "final";
221
217
  };
222
- readonly hackingTimeout: {
218
+ readonly transitioningToIdle: {
223
219
  readonly always: {
224
220
  readonly target: "idle";
225
221
  };
@@ -264,14 +260,47 @@ declare const machine: import("xstate").StateMachine<{
264
260
  readonly target: "waitingForUserRequest";
265
261
  readonly description: "Event from VAD or similar system.";
266
262
  };
267
- readonly USER_REQUEST_PROCESSED: {
263
+ };
264
+ readonly invoke: {
265
+ readonly src: "doProcessUserRequest";
266
+ readonly input: ({ context }: {
267
+ context: AutopilotContext;
268
+ event: {
269
+ type: "SPEECH_START";
270
+ } | {
271
+ type: "SPEECH_END";
272
+ } | {
273
+ type: "SPEECH_RESULT";
274
+ speech: string;
275
+ } | {
276
+ type: "USER_REQUEST_PROCESSED";
277
+ };
278
+ self: import("xstate").ActorRef<import("xstate").MachineSnapshot<AutopilotContext, {
279
+ type: "SPEECH_START";
280
+ } | {
281
+ type: "SPEECH_END";
282
+ } | {
283
+ type: "SPEECH_RESULT";
284
+ speech: string;
285
+ } | {
286
+ type: "USER_REQUEST_PROCESSED";
287
+ }, Record<string, import("xstate").AnyActorRef>, import("xstate").StateValue, string, unknown, any, any>, {
288
+ type: "SPEECH_START";
289
+ } | {
290
+ type: "SPEECH_END";
291
+ } | {
292
+ type: "SPEECH_RESULT";
293
+ speech: string;
294
+ } | {
295
+ type: "USER_REQUEST_PROCESSED";
296
+ }, import("xstate").AnyEventObject>;
297
+ }) => {
298
+ context: AutopilotContext;
299
+ };
300
+ readonly onDone: {
268
301
  readonly target: "idle";
269
- readonly description: "Go back home.";
270
302
  };
271
303
  };
272
- readonly entry: {
273
- readonly type: "processUserRequest";
274
- };
275
304
  };
276
305
  };
277
306
  }>;
@@ -56,55 +56,6 @@ const machine = (0, xstate_1.setup)({
56
56
  });
57
57
  await context.voice.stopSpeech();
58
58
  },
59
- processUserRequest: async ({ context }) => {
60
- logger.verbose("called processUserRequest action", {
61
- speechBuffer: context.speechBuffer
62
- });
63
- // Stop any speech that might be playing
64
- await context.voice.stopSpeech();
65
- const speech = context.speechBuffer.trim();
66
- const languageModel = context.languageModel;
67
- const response = await languageModel.invoke(speech);
68
- const speechResponseTime = Date.now() - context.speechResponseStartTime;
69
- context.speechResponseTime = speechResponseTime;
70
- context.speechResponseStartTime = 0;
71
- logger.verbose("response from language model", {
72
- speechResponseTime
73
- });
74
- try {
75
- if (response.type === "say" && !response.content) {
76
- logger.verbose("call might already be hung up");
77
- (0, xstate_1.raise)({ type: "USER_REQUEST_PROCESSED" });
78
- return;
79
- }
80
- else if (response.type === "hangup") {
81
- const message = context.goodbyeMessage;
82
- await context.voice.say(message);
83
- await context.voice.hangup();
84
- return;
85
- }
86
- else if (response.type === "transfer") {
87
- logger.verbose("transferring call to a number in the pstn", {
88
- phoneNumber: context.transferPhoneNumber
89
- });
90
- const message = context.transferMessage;
91
- await context.voice.say(message);
92
- await context.voice.transfer(context.transferPhoneNumber, {
93
- record: true,
94
- timeout: 30
95
- });
96
- return;
97
- }
98
- await context.voice.say(response.content);
99
- }
100
- catch (error) {
101
- logger.error("error processing user request", {
102
- error
103
- });
104
- await context.voice.say(context.systemErrorMessage);
105
- }
106
- (0, xstate_1.raise)({ type: "USER_REQUEST_PROCESSED" });
107
- },
108
59
  announceIdleTimeout: async ({ context }) => {
109
60
  logger.verbose("called announceIdleTimeout action", {
110
61
  idleMessage: context.idleMessage
@@ -174,6 +125,56 @@ const machine = (0, xstate_1.setup)({
174
125
  IDLE_TIMEOUT: ({ context }) => {
175
126
  return context.idleTimeout;
176
127
  }
128
+ },
129
+ actors: {
130
+ doProcessUserRequest: (0, xstate_1.fromPromise)(async ({ input }) => {
131
+ const { context } = input;
132
+ logger.verbose("called processUserRequest action", {
133
+ speechBuffer: context.speechBuffer
134
+ });
135
+ // Stop any speech that might be playing
136
+ await context.voice.stopSpeech();
137
+ const speech = context.speechBuffer.trim();
138
+ const languageModel = context.languageModel;
139
+ const response = await languageModel.invoke(speech);
140
+ const speechResponseTime = Date.now() - context.speechResponseStartTime;
141
+ context.speechResponseTime = speechResponseTime;
142
+ context.speechResponseStartTime = 0;
143
+ logger.verbose("response from language model", {
144
+ speechResponseTime
145
+ });
146
+ try {
147
+ if (response.type === "say" && !response.content) {
148
+ logger.verbose("call might already be hung up");
149
+ return;
150
+ }
151
+ else if (response.type === "hangup") {
152
+ const message = context.goodbyeMessage;
153
+ await context.voice.say(message);
154
+ await context.voice.hangup();
155
+ return;
156
+ }
157
+ else if (response.type === "transfer") {
158
+ logger.verbose("transferring call to a number in the pstn", {
159
+ phoneNumber: context.transferPhoneNumber
160
+ });
161
+ const message = context.transferMessage;
162
+ await context.voice.say(message);
163
+ await context.voice.transfer(context.transferPhoneNumber, {
164
+ record: true,
165
+ timeout: 30
166
+ });
167
+ return;
168
+ }
169
+ await context.voice.say(response.content);
170
+ }
171
+ catch (error) {
172
+ logger.error("error processing user request", {
173
+ error
174
+ });
175
+ await context.voice.say(context.systemErrorMessage);
176
+ }
177
+ })
177
178
  }
178
179
  }).createMachine({
179
180
  context: ({ input }) => ({
@@ -224,7 +225,7 @@ const machine = (0, xstate_1.setup)({
224
225
  }
225
226
  },
226
227
  {
227
- target: "hackingTimeout",
228
+ target: "transitioningToIdle",
228
229
  actions: [
229
230
  {
230
231
  type: "increaseIdleTimeoutCount"
@@ -259,7 +260,7 @@ const machine = (0, xstate_1.setup)({
259
260
  hangup: {
260
261
  type: "final"
261
262
  },
262
- hackingTimeout: {
263
+ transitioningToIdle: {
263
264
  always: {
264
265
  target: "idle"
265
266
  }
@@ -309,14 +310,14 @@ const machine = (0, xstate_1.setup)({
309
310
  SPEECH_START: {
310
311
  target: "waitingForUserRequest",
311
312
  description: "Event from VAD or similar system."
312
- },
313
- USER_REQUEST_PROCESSED: {
314
- target: "idle",
315
- description: "Go back home."
316
313
  }
317
314
  },
318
- entry: {
319
- type: "processUserRequest"
315
+ invoke: {
316
+ src: "doProcessUserRequest",
317
+ input: ({ context }) => ({ context }),
318
+ onDone: {
319
+ target: "idle"
320
+ }
320
321
  }
321
322
  }
322
323
  }
@@ -0,0 +1,22 @@
1
+ import { LanguageModel } from "../models";
2
+ import { Voice } from "../voice";
3
+ type AutopilotContext = {
4
+ sessionRef: string;
5
+ languageModel: LanguageModel;
6
+ voice: Voice;
7
+ firstMessage: string;
8
+ goodbyeMessage: string;
9
+ transferMessage?: string;
10
+ transferPhoneNumber?: string;
11
+ systemErrorMessage: string;
12
+ idleMessage: string;
13
+ idleTimeout: number;
14
+ idleTimeoutCount: number;
15
+ maxIdleTimeoutCount: number;
16
+ speechBuffer: string;
17
+ speechResponseStartTime: number;
18
+ speechResponseTime: number;
19
+ isSpeaking: boolean;
20
+ knowledgeBaseSourceUrl?: string;
21
+ };
22
+ export { AutopilotContext };
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,9 +1,11 @@
1
1
  import { InvocationResult, LanguageModel, LanguageModelParams, TelephonyContext } from "./types";
2
+ import { Voice } from "../voice";
2
3
  declare abstract class AbstractLanguageModel implements LanguageModel {
3
4
  private chain;
4
5
  private chatHistory;
5
6
  private toolsCatalog;
6
- constructor(params: LanguageModelParams, telephonyContext: TelephonyContext);
7
+ private voice;
8
+ constructor(params: LanguageModelParams, voice: Voice, telephonyContext: TelephonyContext);
7
9
  invoke(text: string): Promise<InvocationResult>;
8
10
  }
9
11
  export { AbstractLanguageModel };
@@ -23,24 +23,33 @@ const logger_1 = require("@fonoster/logger");
23
23
  const chatHistory_1 = require("./chatHistory");
24
24
  const createChain_1 = require("./createChain");
25
25
  const createPromptTemplate_1 = require("./createPromptTemplate");
26
+ const toolInvocation_1 = require("./toolInvocation");
26
27
  const tools_1 = require("../tools");
27
28
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
28
29
  class AbstractLanguageModel {
29
- constructor(params, telephonyContext) {
30
- const { model, systemTemplate, knowledgeBase, tools } = params;
31
- const promptTemplate = (0, createPromptTemplate_1.createPromptTemplate)(systemTemplate, telephonyContext);
30
+ constructor(params, voice, telephonyContext) {
31
+ const { model, firstMessage, systemTemplate, knowledgeBase, tools } = params;
32
32
  this.chatHistory = (0, chatHistory_1.createChatHistory)();
33
33
  this.toolsCatalog = new tools_1.ToolsCatalog(tools);
34
+ this.voice = voice;
35
+ const promptTemplate = (0, createPromptTemplate_1.createPromptTemplate)({
36
+ firstMessage,
37
+ systemTemplate,
38
+ telephonyContext
39
+ });
34
40
  this.chain = (0, createChain_1.createChain)(model, knowledgeBase, promptTemplate, this.chatHistory);
35
41
  }
36
42
  async invoke(text) {
37
43
  const { chain, chatHistory, toolsCatalog } = this;
38
44
  const response = (await chain.invoke({ text }));
45
+ let firstInvocation = true;
39
46
  if (response.additional_kwargs?.tool_calls) {
40
47
  // eslint-disable-next-line no-loops/no-loops
41
48
  for (const toolCall of response.additional_kwargs.tool_calls) {
42
49
  const { arguments: args, name } = toolCall.function;
43
- logger.verbose(`invoking tool: ${name} with args: ${args}`);
50
+ logger.verbose(`invoking tool: ${name} with args: ${args}`, {
51
+ firstInvocation
52
+ });
44
53
  switch (name) {
45
54
  case "hangup":
46
55
  await chatHistory.addAIMessage("tool result: call hangup initiated");
@@ -49,15 +58,15 @@ class AbstractLanguageModel {
49
58
  await chatHistory.addAIMessage("tool result: call transfer initiated");
50
59
  return { type: "transfer" };
51
60
  default:
52
- try {
53
- const toolResult = await toolsCatalog.invokeTool(name, JSON.parse(args));
54
- logger.verbose("tool result: ", toolResult);
55
- await chatHistory.addAIMessage(`tool result: ${toolResult.result}`);
56
- }
57
- catch (error) {
58
- logger.error(`tool error: ${error.message}`);
59
- await chatHistory.addAIMessage(`tool error: ${error.message}`);
60
- }
61
+ await (0, toolInvocation_1.toolInvocation)({
62
+ args,
63
+ chatHistory,
64
+ firstInvocation,
65
+ toolName: name,
66
+ toolsCatalog,
67
+ voice: this.voice
68
+ });
69
+ firstInvocation = false;
61
70
  }
62
71
  }
63
72
  const finalResponse = (await chain.invoke({
@@ -4,7 +4,8 @@ import { OllamaParams } from "./ollama";
4
4
  import { OpenAIParams } from "./openai";
5
5
  import { BaseModelParams, TelephonyContext } from "./types";
6
6
  import { LANGUAGE_MODEL_PROVIDER } from "../types";
7
- type LanguageModelConstructor<T extends BaseModelParams = BaseModelParams> = new (options: T, telephonyContext: TelephonyContext) => AbstractLanguageModel;
7
+ import { Voice } from "../voice";
8
+ type LanguageModelConstructor<T extends BaseModelParams = BaseModelParams> = new (options: T, voice: Voice, telephonyContext: TelephonyContext) => AbstractLanguageModel;
8
9
  type LanguageModelConfigMap = {
9
10
  [LANGUAGE_MODEL_PROVIDER.OPENAI]: OpenAIParams;
10
11
  [LANGUAGE_MODEL_PROVIDER.GROQ]: GroqParams;
@@ -13,6 +14,6 @@ type LanguageModelConfigMap = {
13
14
  declare class LanguageModelFactory {
14
15
  private static languageModels;
15
16
  static registerLanguageModel<T extends BaseModelParams>(name: string, ctor: LanguageModelConstructor<T>): void;
16
- static getLanguageModel<T extends keyof LanguageModelConfigMap>(languageModel: T, config: LanguageModelConfigMap[T], telephonyContext: TelephonyContext): AbstractLanguageModel;
17
+ static getLanguageModel<T extends keyof LanguageModelConfigMap>(languageModel: T, config: LanguageModelConfigMap[T], voice: Voice, telephonyContext: TelephonyContext): AbstractLanguageModel;
17
18
  }
18
19
  export { LanguageModelFactory };
@@ -30,12 +30,12 @@ class LanguageModelFactory {
30
30
  logger.verbose("registering llm provider", { name });
31
31
  this.languageModels.set(name, ctor);
32
32
  }
33
- static getLanguageModel(languageModel, config, telephonyContext) {
33
+ static getLanguageModel(languageModel, config, voice, telephonyContext) {
34
34
  const LanguageModelConstructor = this.languageModels.get(`llm.${languageModel}`);
35
35
  if (!LanguageModelConstructor) {
36
36
  throw new Error(`Language model ${languageModel} not found`);
37
37
  }
38
- return new LanguageModelConstructor(config, telephonyContext);
38
+ return new LanguageModelConstructor(config, voice, telephonyContext);
39
39
  }
40
40
  }
41
41
  exports.LanguageModelFactory = LanguageModelFactory;
@@ -1,3 +1,7 @@
1
1
  import { ChatPromptTemplate } from "@langchain/core/prompts";
2
2
  import { TelephonyContext } from "./types";
3
- export declare function createPromptTemplate(systemTemplate: string, telephonyContext: TelephonyContext): ChatPromptTemplate<any, any>;
3
+ export declare function createPromptTemplate(params: {
4
+ firstMessage: string;
5
+ systemTemplate: string;
6
+ telephonyContext: TelephonyContext;
7
+ }): ChatPromptTemplate<any, any>;
@@ -20,14 +20,17 @@ exports.createPromptTemplate = createPromptTemplate;
20
20
  * limitations under the License.
21
21
  */
22
22
  const prompts_1 = require("@langchain/core/prompts");
23
- function createPromptTemplate(systemTemplate, telephonyContext) {
23
+ function createPromptTemplate(params) {
24
+ const { firstMessage, systemTemplate, telephonyContext } = params;
24
25
  return prompts_1.ChatPromptTemplate.fromMessages([
25
26
  new prompts_1.MessagesPlaceholder("history"),
27
+ prompts_1.SystemMessagePromptTemplate.fromTemplate(`firstMessage: ${firstMessage}`),
26
28
  prompts_1.SystemMessagePromptTemplate.fromTemplate(systemTemplate),
27
29
  prompts_1.SystemMessagePromptTemplate.fromTemplate("{context}"),
28
30
  prompts_1.SystemMessagePromptTemplate.fromTemplate(`callReceivedAt:${new Date().toISOString()}
29
31
  ingressNumber:${telephonyContext.ingressNumber}
30
- callerNumber:${telephonyContext.callerNumber}`),
32
+ callerNumber:${telephonyContext.callerNumber}
33
+ callDirection:${telephonyContext.callDirection}`),
31
34
  prompts_1.HumanMessagePromptTemplate.fromTemplate("{input}")
32
35
  ]);
33
36
  }
@@ -1,8 +1,9 @@
1
1
  import { GroqParams } from "./types";
2
+ import { Voice } from "../../voice";
2
3
  import { AbstractLanguageModel } from "../AbstractLanguageModel";
3
4
  import { TelephonyContext } from "../types";
4
5
  declare const LANGUAGE_MODEL_NAME = "llm.groq";
5
6
  declare class Groq extends AbstractLanguageModel {
6
- constructor(params: GroqParams, telephonyContext: TelephonyContext);
7
+ constructor(params: GroqParams, voice: Voice, telephonyContext: TelephonyContext);
7
8
  }
8
9
  export { Groq, LANGUAGE_MODEL_NAME };
@@ -7,7 +7,7 @@ const AbstractLanguageModel_1 = require("../AbstractLanguageModel");
7
7
  const LANGUAGE_MODEL_NAME = "llm.groq";
8
8
  exports.LANGUAGE_MODEL_NAME = LANGUAGE_MODEL_NAME;
9
9
  class Groq extends AbstractLanguageModel_1.AbstractLanguageModel {
10
- constructor(params, telephonyContext) {
10
+ constructor(params, voice, telephonyContext) {
11
11
  const model = new groq_1.ChatGroq({
12
12
  ...params
13
13
  }).bind({
@@ -16,7 +16,7 @@ class Groq extends AbstractLanguageModel_1.AbstractLanguageModel {
16
16
  super({
17
17
  ...params,
18
18
  model
19
- }, telephonyContext);
19
+ }, voice, telephonyContext);
20
20
  }
21
21
  }
22
22
  exports.Groq = Groq;
@@ -1,8 +1,9 @@
1
1
  import { OllamaParams } from "./types";
2
+ import { Voice } from "../../voice";
2
3
  import { AbstractLanguageModel } from "../AbstractLanguageModel";
3
4
  import { TelephonyContext } from "../types";
4
5
  declare const LANGUAGE_MODEL_NAME = "llm.ollama";
5
6
  declare class Ollama extends AbstractLanguageModel {
6
- constructor(params: OllamaParams, telephonyContext: TelephonyContext);
7
+ constructor(params: OllamaParams, voice: Voice, telephonyContext: TelephonyContext);
7
8
  }
8
9
  export { LANGUAGE_MODEL_NAME, Ollama };
@@ -7,7 +7,7 @@ const AbstractLanguageModel_1 = require("../AbstractLanguageModel");
7
7
  const LANGUAGE_MODEL_NAME = "llm.ollama";
8
8
  exports.LANGUAGE_MODEL_NAME = LANGUAGE_MODEL_NAME;
9
9
  class Ollama extends AbstractLanguageModel_1.AbstractLanguageModel {
10
- constructor(params, telephonyContext) {
10
+ constructor(params, voice, telephonyContext) {
11
11
  const model = new ollama_1.ChatOllama({
12
12
  ...params
13
13
  }).bind({
@@ -16,7 +16,7 @@ class Ollama extends AbstractLanguageModel_1.AbstractLanguageModel {
16
16
  super({
17
17
  ...params,
18
18
  model
19
- }, telephonyContext);
19
+ }, voice, telephonyContext);
20
20
  }
21
21
  }
22
22
  exports.Ollama = Ollama;
@@ -1,8 +1,9 @@
1
1
  import { OpenAIParams } from "./types";
2
+ import { Voice } from "../../voice";
2
3
  import { AbstractLanguageModel } from "../AbstractLanguageModel";
3
4
  import { TelephonyContext } from "../types";
4
5
  declare const LANGUAGE_MODEL_NAME = "llm.openai";
5
6
  declare class OpenAI extends AbstractLanguageModel {
6
- constructor(params: OpenAIParams, telephonyContext: TelephonyContext);
7
+ constructor(params: OpenAIParams, voice: Voice, telephonyContext: TelephonyContext);
7
8
  }
8
9
  export { LANGUAGE_MODEL_NAME, OpenAI };
@@ -7,7 +7,7 @@ const AbstractLanguageModel_1 = require("../AbstractLanguageModel");
7
7
  const LANGUAGE_MODEL_NAME = "llm.openai";
8
8
  exports.LANGUAGE_MODEL_NAME = LANGUAGE_MODEL_NAME;
9
9
  class OpenAI extends AbstractLanguageModel_1.AbstractLanguageModel {
10
- constructor(params, telephonyContext) {
10
+ constructor(params, voice, telephonyContext) {
11
11
  const model = new openai_1.ChatOpenAI({
12
12
  ...params
13
13
  }).bind({
@@ -16,7 +16,7 @@ class OpenAI extends AbstractLanguageModel_1.AbstractLanguageModel {
16
16
  super({
17
17
  ...params,
18
18
  model
19
- }, telephonyContext);
19
+ }, voice, telephonyContext);
20
20
  }
21
21
  }
22
22
  exports.OpenAI = OpenAI;
@@ -0,0 +1,12 @@
1
+ import { createChatHistory } from "./chatHistory";
2
+ import { ToolsCatalog } from "../tools";
3
+ import { Voice } from "../voice";
4
+ declare function toolInvocation(params: {
5
+ toolName: string;
6
+ chatHistory: ReturnType<typeof createChatHistory>;
7
+ toolsCatalog: ToolsCatalog;
8
+ firstInvocation: boolean;
9
+ args: string;
10
+ voice: Voice;
11
+ }): Promise<void>;
12
+ export { toolInvocation };
@@ -0,0 +1,42 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.toolInvocation = toolInvocation;
4
+ /*
5
+ * Copyright (C) 2024 by Fonoster Inc (https://fonoster.com)
6
+ * http://github.com/fonoster/fonoster
7
+ *
8
+ * This file is part of Fonoster
9
+ *
10
+ * Licensed under the MIT License (the "License");
11
+ * you may not use this file except in compliance with
12
+ * the License. You may obtain a copy of the License at
13
+ *
14
+ * https://opensource.org/licenses/MIT
15
+ *
16
+ * Unless required by applicable law or agreed to in writing, software
17
+ * distributed under the License is distributed on an "AS IS" BASIS,
18
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19
+ * See the License for the specific language governing permissions and
20
+ * limitations under the License.
21
+ */
22
+ const logger_1 = require("@fonoster/logger");
23
+ const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
24
+ async function toolInvocation(params) {
25
+ const { firstInvocation, voice, args, toolName, chatHistory, toolsCatalog } = params;
26
+ try {
27
+ if (firstInvocation) {
28
+ const tool = toolsCatalog.getTool(toolName);
29
+ const message = tool?.requestStartMessage ?? "";
30
+ if (message) {
31
+ await voice.say(message);
32
+ }
33
+ }
34
+ const toolResult = await toolsCatalog.invokeTool(toolName, JSON.parse(args));
35
+ logger.verbose("tool result: ", toolResult);
36
+ await chatHistory.addAIMessage(`tool result: ${toolResult.result}`);
37
+ }
38
+ catch (error) {
39
+ logger.error(`tool error: ${error.message}`);
40
+ await chatHistory.addAIMessage(`tool error: ${error.message}`);
41
+ }
42
+ }
@@ -1,3 +1,4 @@
1
+ import { CallDirection } from "@fonoster/types";
1
2
  import { BaseChatModel } from "@langchain/core/language_models/chat_models";
2
3
  import { KnowledgeBase } from "../knowledge";
3
4
  import { Tool } from "../tools/type";
@@ -5,6 +6,7 @@ type LanguageModel = {
5
6
  invoke: (text: string) => Promise<InvocationResult>;
6
7
  };
7
8
  type BaseModelParams = {
9
+ firstMessage: string;
8
10
  systemTemplate: string;
9
11
  knowledgeBase: KnowledgeBase;
10
12
  tools: Tool[];
@@ -18,6 +20,7 @@ type InvocationResult = {
18
20
  content?: string;
19
21
  };
20
22
  type TelephonyContext = {
23
+ callDirection: CallDirection;
21
24
  ingressNumber: string;
22
25
  callerNumber: string;
23
26
  };
@@ -48,6 +48,7 @@ declare const toolSchema: z.ZodObject<{
48
48
  }>;
49
49
  required?: string[] | undefined;
50
50
  }>;
51
+ requestStartMessage: z.ZodOptional<z.ZodString>;
51
52
  operation: z.ZodEffects<z.ZodObject<{
52
53
  type: z.ZodNativeEnum<typeof AllowedOperations>;
53
54
  url: z.ZodOptional<z.ZodString>;
@@ -92,6 +93,7 @@ declare const toolSchema: z.ZodObject<{
92
93
  waitForResponse?: boolean | undefined;
93
94
  headers?: Record<string, string> | undefined;
94
95
  };
96
+ requestStartMessage?: string | undefined;
95
97
  }, {
96
98
  name: string;
97
99
  description: string;
@@ -110,5 +112,6 @@ declare const toolSchema: z.ZodObject<{
110
112
  waitForResponse?: boolean | undefined;
111
113
  headers?: Record<string, string> | undefined;
112
114
  };
115
+ requestStartMessage?: string | undefined;
113
116
  }>;
114
117
  export { AllowedOperations, toolSchema };
@@ -45,6 +45,7 @@ const toolSchema = zod_1.z.object({
45
45
  properties: zod_1.z.record(propertySchema),
46
46
  required: zod_1.z.array(zod_1.z.string()).optional()
47
47
  }),
48
+ requestStartMessage: zod_1.z.string().optional(),
48
49
  operation: zod_1.z
49
50
  .object({
50
51
  type: zod_1.z.nativeEnum(AllowedOperations),
@@ -5,6 +5,7 @@ declare class ToolsCatalog {
5
5
  invokeTool(toolName: string, args: Record<string, unknown>): Promise<{
6
6
  result: string;
7
7
  }>;
8
+ getTool(toolName: string): Tool | undefined;
8
9
  addTool(toolDef: Tool): void;
9
10
  listTools(): Tool[];
10
11
  }
@@ -43,6 +43,9 @@ class ToolsCatalog {
43
43
  body: args
44
44
  });
45
45
  }
46
+ getTool(toolName) {
47
+ return this.tools.get(toolName);
48
+ }
46
49
  addTool(toolDef) {
47
50
  this.tools.set(toolDef.name, toolDef);
48
51
  }
@@ -19,18 +19,24 @@ exports.sendRequest = sendRequest;
19
19
  * See the License for the specific language governing permissions and
20
20
  * limitations under the License.
21
21
  */
22
+ const logger_1 = require("@fonoster/logger");
22
23
  const zod_1 = require("zod");
23
24
  const ToolSchema_1 = require("./ToolSchema");
24
25
  const responseSchema = zod_1.z.object({
25
26
  result: zod_1.z.string()
26
27
  });
28
+ const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
27
29
  async function sendRequest(input) {
28
30
  const { url, method, body, headers, waitForResponse } = input;
29
31
  const options = {
30
32
  method,
31
- headers,
33
+ headers: {
34
+ "Content-Type": "application/json",
35
+ ...headers
36
+ },
32
37
  body: method === ToolSchema_1.AllowedOperations.POST ? JSON.stringify(body) : undefined
33
38
  };
39
+ logger.verbose(`sending request to ${url}`, { body, method });
34
40
  if (waitForResponse && method === ToolSchema_1.AllowedOperations.POST) {
35
41
  setTimeout(() => fetch(url, options), 0);
36
42
  return { result: "request sent" };
@@ -1,7 +1,17 @@
1
1
  import { Vad } from "./types";
2
2
  declare class SileroVad implements Vad {
3
3
  private vad;
4
- constructor();
4
+ private params;
5
+ constructor(params: {
6
+ pathToModel?: string;
7
+ activationThreshold: number;
8
+ deactivationThreshold: number;
9
+ debounceFrames: number;
10
+ });
11
+ pathToModel?: string;
12
+ activationThreshold: number;
13
+ deactivationThreshold: number;
14
+ debounceFrames: number;
5
15
  private init;
6
16
  processChunk(data: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void): void;
7
17
  }
@@ -20,14 +20,18 @@ exports.SileroVad = void 0;
20
20
  * See the License for the specific language governing permissions and
21
21
  * limitations under the License.
22
22
  */
23
+ const logger_1 = require("@fonoster/logger");
23
24
  const makeVad_1 = require("./makeVad");
25
+ const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
24
26
  class SileroVad {
25
- constructor() {
27
+ constructor(params) {
28
+ logger.verbose("starting instance of silero vad", { ...params });
29
+ this.params = params;
26
30
  this.init();
27
31
  }
28
32
  async init() {
29
33
  // FIXME: It feels strange to do this in the constructor
30
- this.vad = await (0, makeVad_1.makeVad)();
34
+ this.vad = await (0, makeVad_1.makeVad)(this.params);
31
35
  }
32
36
  processChunk(data, callback) {
33
37
  if (!this.vad) {
@@ -1,2 +1,7 @@
1
- declare function makeVad(pathToModel?: string): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
1
+ declare function makeVad(params: {
2
+ pathToModel?: string;
3
+ activationThreshold: number;
4
+ deactivationThreshold: number;
5
+ debounceFrames: number;
6
+ }): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
2
7
  export { makeVad };
@@ -50,11 +50,14 @@ const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
50
50
  const SileroVadModel_1 = require("./SileroVadModel");
51
51
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
52
52
  const BUFFER_SIZE = 16000;
53
- async function makeVad(pathToModel) {
53
+ async function makeVad(params) {
54
+ const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
54
55
  const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
55
56
  const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
56
57
  let audioBuffer = [];
57
58
  let isSpeechActive = false;
59
+ let consecutiveSpeechFrames = 0;
60
+ let consecutiveNonSpeechFrames = 0;
58
61
  return async function process(chunk, callback) {
59
62
  const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
60
63
  audioBuffer.push(...float32Array);
@@ -65,20 +68,24 @@ async function makeVad(pathToModel) {
65
68
  const remainingBuffer = buffer.slice(BUFFER_SIZE);
66
69
  const result = await silero.process(new Float32Array(audioFrame));
67
70
  logger.silly("last vad result", { ...result });
68
- if (result.isSpeech > 0.5) {
69
- if (!isSpeechActive) {
71
+ if (result.isSpeech > activationThreshold) {
72
+ consecutiveNonSpeechFrames = 0; // Reset non-speech counter
73
+ consecutiveSpeechFrames++;
74
+ if (consecutiveSpeechFrames >= debounceFrames && !isSpeechActive) {
70
75
  isSpeechActive = true;
71
76
  callback("SPEECH_START");
72
- return processBuffer(remainingBuffer);
73
77
  }
74
78
  }
75
- else if (isSpeechActive) {
76
- isSpeechActive = false;
77
- callback("SPEECH_END");
78
- // WARNING: I'm unsure if this has any effect on the model
79
- // but it seems to work fine to ensure the model works optimally
80
- silero.resetState();
81
- return processBuffer(remainingBuffer);
79
+ else {
80
+ consecutiveSpeechFrames = 0; // Reset speech counter
81
+ consecutiveNonSpeechFrames++;
82
+ if (consecutiveNonSpeechFrames >= debounceFrames &&
83
+ isSpeechActive &&
84
+ result.isSpeech < deactivationThreshold) {
85
+ isSpeechActive = false;
86
+ callback("SPEECH_END");
87
+ silero.resetState(); // Reset VAD state after speech ends
88
+ }
82
89
  }
83
90
  return processBuffer(remainingBuffer);
84
91
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/autopilot",
3
- "version": "0.7.17",
3
+ "version": "0.7.19",
4
4
  "description": "Voice AI for the Fonoster platform",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -35,10 +35,10 @@
35
35
  "url": "https://github.com/fonoster/fonoster/issues"
36
36
  },
37
37
  "dependencies": {
38
- "@fonoster/common": "^0.7.16",
39
- "@fonoster/logger": "^0.7.16",
40
- "@fonoster/types": "^0.7.16",
41
- "@fonoster/voice": "^0.7.16",
38
+ "@fonoster/common": "^0.7.18",
39
+ "@fonoster/logger": "^0.7.18",
40
+ "@fonoster/types": "^0.7.18",
41
+ "@fonoster/voice": "^0.7.18",
42
42
  "@langchain/community": "^0.2.31",
43
43
  "@langchain/core": "^0.2.32",
44
44
  "@langchain/groq": "^0.0.17",
@@ -56,5 +56,5 @@
56
56
  "devDependencies": {
57
57
  "typescript": "^5.5.4"
58
58
  },
59
- "gitHead": "4408dbb6ca8aafe086ac86ebd6b5daa255e8c99f"
59
+ "gitHead": "5250aa76f6c4b72a3b26beabe71ea7a7c227d7c1"
60
60
  }