@fonoster/autopilot 0.7.18 → 0.7.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,11 +32,33 @@ declare const conversationSettingsSchema: z.ZodObject<{
32
32
  timeout: number;
33
33
  maxTimeoutCount: number;
34
34
  }>>>;
35
+ vad: z.ZodObject<{
36
+ pathToModel: z.ZodOptional<z.ZodString>;
37
+ activationThreshold: z.ZodNumber;
38
+ deactivationThreshold: z.ZodNumber;
39
+ debounceFrames: z.ZodNumber;
40
+ }, "strip", z.ZodTypeAny, {
41
+ activationThreshold: number;
42
+ deactivationThreshold: number;
43
+ debounceFrames: number;
44
+ pathToModel?: string | undefined;
45
+ }, {
46
+ activationThreshold: number;
47
+ deactivationThreshold: number;
48
+ debounceFrames: number;
49
+ pathToModel?: string | undefined;
50
+ }>;
35
51
  }, "strip", z.ZodTypeAny, {
36
52
  firstMessage: string;
37
53
  systemTemplate: string;
38
54
  goodbyeMessage: string;
39
55
  systemErrorMessage: string;
56
+ vad: {
57
+ activationThreshold: number;
58
+ deactivationThreshold: number;
59
+ debounceFrames: number;
60
+ pathToModel?: string | undefined;
61
+ };
40
62
  initialDtmf?: string | null | undefined;
41
63
  transferOptions?: {
42
64
  message: string;
@@ -53,6 +75,12 @@ declare const conversationSettingsSchema: z.ZodObject<{
53
75
  systemTemplate: string;
54
76
  goodbyeMessage: string;
55
77
  systemErrorMessage: string;
78
+ vad: {
79
+ activationThreshold: number;
80
+ deactivationThreshold: number;
81
+ debounceFrames: number;
82
+ pathToModel?: string | undefined;
83
+ };
56
84
  initialDtmf?: string | null | undefined;
57
85
  transferOptions?: {
58
86
  message: string;
@@ -293,11 +321,33 @@ declare const assistantSchema: z.ZodObject<{
293
321
  timeout: number;
294
322
  maxTimeoutCount: number;
295
323
  }>>>;
324
+ vad: z.ZodObject<{
325
+ pathToModel: z.ZodOptional<z.ZodString>;
326
+ activationThreshold: z.ZodNumber;
327
+ deactivationThreshold: z.ZodNumber;
328
+ debounceFrames: z.ZodNumber;
329
+ }, "strip", z.ZodTypeAny, {
330
+ activationThreshold: number;
331
+ deactivationThreshold: number;
332
+ debounceFrames: number;
333
+ pathToModel?: string | undefined;
334
+ }, {
335
+ activationThreshold: number;
336
+ deactivationThreshold: number;
337
+ debounceFrames: number;
338
+ pathToModel?: string | undefined;
339
+ }>;
296
340
  }, "strip", z.ZodTypeAny, {
297
341
  firstMessage: string;
298
342
  systemTemplate: string;
299
343
  goodbyeMessage: string;
300
344
  systemErrorMessage: string;
345
+ vad: {
346
+ activationThreshold: number;
347
+ deactivationThreshold: number;
348
+ debounceFrames: number;
349
+ pathToModel?: string | undefined;
350
+ };
301
351
  initialDtmf?: string | null | undefined;
302
352
  transferOptions?: {
303
353
  message: string;
@@ -314,6 +364,12 @@ declare const assistantSchema: z.ZodObject<{
314
364
  systemTemplate: string;
315
365
  goodbyeMessage: string;
316
366
  systemErrorMessage: string;
367
+ vad: {
368
+ activationThreshold: number;
369
+ deactivationThreshold: number;
370
+ debounceFrames: number;
371
+ pathToModel?: string | undefined;
372
+ };
317
373
  initialDtmf?: string | null | undefined;
318
374
  transferOptions?: {
319
375
  message: string;
@@ -527,6 +583,12 @@ declare const assistantSchema: z.ZodObject<{
527
583
  systemTemplate: string;
528
584
  goodbyeMessage: string;
529
585
  systemErrorMessage: string;
586
+ vad: {
587
+ activationThreshold: number;
588
+ deactivationThreshold: number;
589
+ debounceFrames: number;
590
+ pathToModel?: string | undefined;
591
+ };
530
592
  initialDtmf?: string | null | undefined;
531
593
  transferOptions?: {
532
594
  message: string;
@@ -578,6 +640,12 @@ declare const assistantSchema: z.ZodObject<{
578
640
  systemTemplate: string;
579
641
  goodbyeMessage: string;
580
642
  systemErrorMessage: string;
643
+ vad: {
644
+ activationThreshold: number;
645
+ deactivationThreshold: number;
646
+ debounceFrames: number;
647
+ pathToModel?: string | undefined;
648
+ };
581
649
  initialDtmf?: string | null | undefined;
582
650
  transferOptions?: {
583
651
  message: string;
@@ -43,7 +43,13 @@ const conversationSettingsSchema = zod_1.z.object({
43
43
  maxTimeoutCount: zod_1.z.number()
44
44
  })
45
45
  .optional()
46
- .nullable()
46
+ .nullable(),
47
+ vad: zod_1.z.object({
48
+ pathToModel: zod_1.z.string().optional(),
49
+ activationThreshold: zod_1.z.number(),
50
+ deactivationThreshold: zod_1.z.number(),
51
+ debounceFrames: zod_1.z.number()
52
+ })
47
53
  });
48
54
  exports.conversationSettingsSchema = conversationSettingsSchema;
49
55
  const languageModelConfigSchema = zod_1.z.object({
@@ -60,7 +60,7 @@ async function handleVoiceRequest(req, res) {
60
60
  const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
61
61
  const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
62
62
  const voice = new _1.VoiceImpl(sessionRef, res);
63
- const vad = new _1.SileroVad();
63
+ const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
64
64
  const languageModel = (0, createLanguageModel_1.createLanguageModel)({
65
65
  voice,
66
66
  assistantConfig,
@@ -1,7 +1,17 @@
1
1
  import { Vad } from "./types";
2
2
  declare class SileroVad implements Vad {
3
3
  private vad;
4
- constructor();
4
+ private params;
5
+ constructor(params: {
6
+ pathToModel?: string;
7
+ activationThreshold: number;
8
+ deactivationThreshold: number;
9
+ debounceFrames: number;
10
+ });
11
+ pathToModel?: string;
12
+ activationThreshold: number;
13
+ deactivationThreshold: number;
14
+ debounceFrames: number;
5
15
  private init;
6
16
  processChunk(data: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void): void;
7
17
  }
@@ -20,14 +20,18 @@ exports.SileroVad = void 0;
20
20
  * See the License for the specific language governing permissions and
21
21
  * limitations under the License.
22
22
  */
23
+ const logger_1 = require("@fonoster/logger");
23
24
  const makeVad_1 = require("./makeVad");
25
+ const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
24
26
  class SileroVad {
25
- constructor() {
27
+ constructor(params) {
28
+ logger.verbose("starting instance of silero vad", { ...params });
29
+ this.params = params;
26
30
  this.init();
27
31
  }
28
32
  async init() {
29
33
  // FIXME: It feels strange to do this in the constructor
30
- this.vad = await (0, makeVad_1.makeVad)();
34
+ this.vad = await (0, makeVad_1.makeVad)(this.params);
31
35
  }
32
36
  processChunk(data, callback) {
33
37
  if (!this.vad) {
@@ -1,2 +1,7 @@
1
- declare function makeVad(pathToModel?: string): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
1
+ declare function makeVad(params: {
2
+ pathToModel?: string;
3
+ activationThreshold: number;
4
+ deactivationThreshold: number;
5
+ debounceFrames: number;
6
+ }): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
2
7
  export { makeVad };
@@ -50,11 +50,14 @@ const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
50
50
  const SileroVadModel_1 = require("./SileroVadModel");
51
51
  const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
52
52
  const BUFFER_SIZE = 16000;
53
- async function makeVad(pathToModel) {
53
+ async function makeVad(params) {
54
+ const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
54
55
  const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
55
56
  const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
56
57
  let audioBuffer = [];
57
58
  let isSpeechActive = false;
59
+ let consecutiveSpeechFrames = 0;
60
+ let consecutiveNonSpeechFrames = 0;
58
61
  return async function process(chunk, callback) {
59
62
  const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
60
63
  audioBuffer.push(...float32Array);
@@ -65,20 +68,24 @@ async function makeVad(pathToModel) {
65
68
  const remainingBuffer = buffer.slice(BUFFER_SIZE);
66
69
  const result = await silero.process(new Float32Array(audioFrame));
67
70
  logger.silly("last vad result", { ...result });
68
- if (result.isSpeech > 0.5) {
69
- if (!isSpeechActive) {
71
+ if (result.isSpeech > activationThreshold) {
72
+ consecutiveNonSpeechFrames = 0; // Reset non-speech counter
73
+ consecutiveSpeechFrames++;
74
+ if (consecutiveSpeechFrames >= debounceFrames && !isSpeechActive) {
70
75
  isSpeechActive = true;
71
76
  callback("SPEECH_START");
72
- return processBuffer(remainingBuffer);
73
77
  }
74
78
  }
75
- else if (isSpeechActive) {
76
- isSpeechActive = false;
77
- callback("SPEECH_END");
78
- // WARNING: I'm unsure if this has any effect on the model
79
- // but it seems to work fine to ensure the model works optimally
80
- silero.resetState();
81
- return processBuffer(remainingBuffer);
79
+ else {
80
+ consecutiveSpeechFrames = 0; // Reset speech counter
81
+ consecutiveNonSpeechFrames++;
82
+ if (consecutiveNonSpeechFrames >= debounceFrames &&
83
+ isSpeechActive &&
84
+ result.isSpeech < deactivationThreshold) {
85
+ isSpeechActive = false;
86
+ callback("SPEECH_END");
87
+ silero.resetState(); // Reset VAD state after speech ends
88
+ }
82
89
  }
83
90
  return processBuffer(remainingBuffer);
84
91
  };
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@fonoster/autopilot",
3
- "version": "0.7.18",
3
+ "version": "0.7.19",
4
4
  "description": "Voice AI for the Fonoster platform",
5
5
  "author": "Pedro Sanders <psanders@fonoster.com>",
6
6
  "homepage": "https://github.com/fonoster/fonoster#readme",
@@ -56,5 +56,5 @@
56
56
  "devDependencies": {
57
57
  "typescript": "^5.5.4"
58
58
  },
59
- "gitHead": "4150dcb8086de182d0650df0c6d990ee76658058"
59
+ "gitHead": "5250aa76f6c4b72a3b26beabe71ea7a7c227d7c1"
60
60
  }