@fonoster/autopilot 0.7.18 → 0.7.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/assistants/AssistantSchema.d.ts +68 -0
- package/dist/assistants/AssistantSchema.js +7 -1
- package/dist/handleVoiceRequest.js +1 -1
- package/dist/vad/SileroVad.d.ts +11 -1
- package/dist/vad/SileroVad.js +6 -2
- package/dist/vad/makeVad.d.ts +6 -1
- package/dist/vad/makeVad.js +18 -11
- package/package.json +2 -2
|
@@ -32,11 +32,33 @@ declare const conversationSettingsSchema: z.ZodObject<{
|
|
|
32
32
|
timeout: number;
|
|
33
33
|
maxTimeoutCount: number;
|
|
34
34
|
}>>>;
|
|
35
|
+
vad: z.ZodObject<{
|
|
36
|
+
pathToModel: z.ZodOptional<z.ZodString>;
|
|
37
|
+
activationThreshold: z.ZodNumber;
|
|
38
|
+
deactivationThreshold: z.ZodNumber;
|
|
39
|
+
debounceFrames: z.ZodNumber;
|
|
40
|
+
}, "strip", z.ZodTypeAny, {
|
|
41
|
+
activationThreshold: number;
|
|
42
|
+
deactivationThreshold: number;
|
|
43
|
+
debounceFrames: number;
|
|
44
|
+
pathToModel?: string | undefined;
|
|
45
|
+
}, {
|
|
46
|
+
activationThreshold: number;
|
|
47
|
+
deactivationThreshold: number;
|
|
48
|
+
debounceFrames: number;
|
|
49
|
+
pathToModel?: string | undefined;
|
|
50
|
+
}>;
|
|
35
51
|
}, "strip", z.ZodTypeAny, {
|
|
36
52
|
firstMessage: string;
|
|
37
53
|
systemTemplate: string;
|
|
38
54
|
goodbyeMessage: string;
|
|
39
55
|
systemErrorMessage: string;
|
|
56
|
+
vad: {
|
|
57
|
+
activationThreshold: number;
|
|
58
|
+
deactivationThreshold: number;
|
|
59
|
+
debounceFrames: number;
|
|
60
|
+
pathToModel?: string | undefined;
|
|
61
|
+
};
|
|
40
62
|
initialDtmf?: string | null | undefined;
|
|
41
63
|
transferOptions?: {
|
|
42
64
|
message: string;
|
|
@@ -53,6 +75,12 @@ declare const conversationSettingsSchema: z.ZodObject<{
|
|
|
53
75
|
systemTemplate: string;
|
|
54
76
|
goodbyeMessage: string;
|
|
55
77
|
systemErrorMessage: string;
|
|
78
|
+
vad: {
|
|
79
|
+
activationThreshold: number;
|
|
80
|
+
deactivationThreshold: number;
|
|
81
|
+
debounceFrames: number;
|
|
82
|
+
pathToModel?: string | undefined;
|
|
83
|
+
};
|
|
56
84
|
initialDtmf?: string | null | undefined;
|
|
57
85
|
transferOptions?: {
|
|
58
86
|
message: string;
|
|
@@ -293,11 +321,33 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
293
321
|
timeout: number;
|
|
294
322
|
maxTimeoutCount: number;
|
|
295
323
|
}>>>;
|
|
324
|
+
vad: z.ZodObject<{
|
|
325
|
+
pathToModel: z.ZodOptional<z.ZodString>;
|
|
326
|
+
activationThreshold: z.ZodNumber;
|
|
327
|
+
deactivationThreshold: z.ZodNumber;
|
|
328
|
+
debounceFrames: z.ZodNumber;
|
|
329
|
+
}, "strip", z.ZodTypeAny, {
|
|
330
|
+
activationThreshold: number;
|
|
331
|
+
deactivationThreshold: number;
|
|
332
|
+
debounceFrames: number;
|
|
333
|
+
pathToModel?: string | undefined;
|
|
334
|
+
}, {
|
|
335
|
+
activationThreshold: number;
|
|
336
|
+
deactivationThreshold: number;
|
|
337
|
+
debounceFrames: number;
|
|
338
|
+
pathToModel?: string | undefined;
|
|
339
|
+
}>;
|
|
296
340
|
}, "strip", z.ZodTypeAny, {
|
|
297
341
|
firstMessage: string;
|
|
298
342
|
systemTemplate: string;
|
|
299
343
|
goodbyeMessage: string;
|
|
300
344
|
systemErrorMessage: string;
|
|
345
|
+
vad: {
|
|
346
|
+
activationThreshold: number;
|
|
347
|
+
deactivationThreshold: number;
|
|
348
|
+
debounceFrames: number;
|
|
349
|
+
pathToModel?: string | undefined;
|
|
350
|
+
};
|
|
301
351
|
initialDtmf?: string | null | undefined;
|
|
302
352
|
transferOptions?: {
|
|
303
353
|
message: string;
|
|
@@ -314,6 +364,12 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
314
364
|
systemTemplate: string;
|
|
315
365
|
goodbyeMessage: string;
|
|
316
366
|
systemErrorMessage: string;
|
|
367
|
+
vad: {
|
|
368
|
+
activationThreshold: number;
|
|
369
|
+
deactivationThreshold: number;
|
|
370
|
+
debounceFrames: number;
|
|
371
|
+
pathToModel?: string | undefined;
|
|
372
|
+
};
|
|
317
373
|
initialDtmf?: string | null | undefined;
|
|
318
374
|
transferOptions?: {
|
|
319
375
|
message: string;
|
|
@@ -527,6 +583,12 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
527
583
|
systemTemplate: string;
|
|
528
584
|
goodbyeMessage: string;
|
|
529
585
|
systemErrorMessage: string;
|
|
586
|
+
vad: {
|
|
587
|
+
activationThreshold: number;
|
|
588
|
+
deactivationThreshold: number;
|
|
589
|
+
debounceFrames: number;
|
|
590
|
+
pathToModel?: string | undefined;
|
|
591
|
+
};
|
|
530
592
|
initialDtmf?: string | null | undefined;
|
|
531
593
|
transferOptions?: {
|
|
532
594
|
message: string;
|
|
@@ -578,6 +640,12 @@ declare const assistantSchema: z.ZodObject<{
|
|
|
578
640
|
systemTemplate: string;
|
|
579
641
|
goodbyeMessage: string;
|
|
580
642
|
systemErrorMessage: string;
|
|
643
|
+
vad: {
|
|
644
|
+
activationThreshold: number;
|
|
645
|
+
deactivationThreshold: number;
|
|
646
|
+
debounceFrames: number;
|
|
647
|
+
pathToModel?: string | undefined;
|
|
648
|
+
};
|
|
581
649
|
initialDtmf?: string | null | undefined;
|
|
582
650
|
transferOptions?: {
|
|
583
651
|
message: string;
|
|
@@ -43,7 +43,13 @@ const conversationSettingsSchema = zod_1.z.object({
|
|
|
43
43
|
maxTimeoutCount: zod_1.z.number()
|
|
44
44
|
})
|
|
45
45
|
.optional()
|
|
46
|
-
.nullable()
|
|
46
|
+
.nullable(),
|
|
47
|
+
vad: zod_1.z.object({
|
|
48
|
+
pathToModel: zod_1.z.string().optional(),
|
|
49
|
+
activationThreshold: zod_1.z.number(),
|
|
50
|
+
deactivationThreshold: zod_1.z.number(),
|
|
51
|
+
debounceFrames: zod_1.z.number()
|
|
52
|
+
})
|
|
47
53
|
});
|
|
48
54
|
exports.conversationSettingsSchema = conversationSettingsSchema;
|
|
49
55
|
const languageModelConfigSchema = zod_1.z.object({
|
|
@@ -60,7 +60,7 @@ async function handleVoiceRequest(req, res) {
|
|
|
60
60
|
const assistantConfig = (0, loadAssistantConfig_1.loadAssistantConfig)();
|
|
61
61
|
const knowledgeBase = await (0, loadKnowledgeBase_1.loadKnowledgeBase)();
|
|
62
62
|
const voice = new _1.VoiceImpl(sessionRef, res);
|
|
63
|
-
const vad = new _1.SileroVad();
|
|
63
|
+
const vad = new _1.SileroVad(assistantConfig.conversationSettings.vad);
|
|
64
64
|
const languageModel = (0, createLanguageModel_1.createLanguageModel)({
|
|
65
65
|
voice,
|
|
66
66
|
assistantConfig,
|
package/dist/vad/SileroVad.d.ts
CHANGED
|
@@ -1,7 +1,17 @@
|
|
|
1
1
|
import { Vad } from "./types";
|
|
2
2
|
declare class SileroVad implements Vad {
|
|
3
3
|
private vad;
|
|
4
|
-
|
|
4
|
+
private params;
|
|
5
|
+
constructor(params: {
|
|
6
|
+
pathToModel?: string;
|
|
7
|
+
activationThreshold: number;
|
|
8
|
+
deactivationThreshold: number;
|
|
9
|
+
debounceFrames: number;
|
|
10
|
+
});
|
|
11
|
+
pathToModel?: string;
|
|
12
|
+
activationThreshold: number;
|
|
13
|
+
deactivationThreshold: number;
|
|
14
|
+
debounceFrames: number;
|
|
5
15
|
private init;
|
|
6
16
|
processChunk(data: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void): void;
|
|
7
17
|
}
|
package/dist/vad/SileroVad.js
CHANGED
|
@@ -20,14 +20,18 @@ exports.SileroVad = void 0;
|
|
|
20
20
|
* See the License for the specific language governing permissions and
|
|
21
21
|
* limitations under the License.
|
|
22
22
|
*/
|
|
23
|
+
const logger_1 = require("@fonoster/logger");
|
|
23
24
|
const makeVad_1 = require("./makeVad");
|
|
25
|
+
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
24
26
|
class SileroVad {
|
|
25
|
-
constructor() {
|
|
27
|
+
constructor(params) {
|
|
28
|
+
logger.verbose("starting instance of silero vad", { ...params });
|
|
29
|
+
this.params = params;
|
|
26
30
|
this.init();
|
|
27
31
|
}
|
|
28
32
|
async init() {
|
|
29
33
|
// FIXME: It feels strange to do this in the constructor
|
|
30
|
-
this.vad = await (0, makeVad_1.makeVad)();
|
|
34
|
+
this.vad = await (0, makeVad_1.makeVad)(this.params);
|
|
31
35
|
}
|
|
32
36
|
processChunk(data, callback) {
|
|
33
37
|
if (!this.vad) {
|
package/dist/vad/makeVad.d.ts
CHANGED
|
@@ -1,2 +1,7 @@
|
|
|
1
|
-
declare function makeVad(
|
|
1
|
+
declare function makeVad(params: {
|
|
2
|
+
pathToModel?: string;
|
|
3
|
+
activationThreshold: number;
|
|
4
|
+
deactivationThreshold: number;
|
|
5
|
+
debounceFrames: number;
|
|
6
|
+
}): Promise<(chunk: Uint8Array, callback: (event: "SPEECH_START" | "SPEECH_END") => void) => Promise<void>>;
|
|
2
7
|
export { makeVad };
|
package/dist/vad/makeVad.js
CHANGED
|
@@ -50,11 +50,14 @@ const chunkToFloat32Array_1 = require("./chunkToFloat32Array");
|
|
|
50
50
|
const SileroVadModel_1 = require("./SileroVadModel");
|
|
51
51
|
const logger = (0, logger_1.getLogger)({ service: "autopilot", filePath: __filename });
|
|
52
52
|
const BUFFER_SIZE = 16000;
|
|
53
|
-
async function makeVad(
|
|
53
|
+
async function makeVad(params) {
|
|
54
|
+
const { pathToModel, activationThreshold, deactivationThreshold, debounceFrames } = params;
|
|
54
55
|
const effectivePath = pathToModel || (0, path_1.join)(__dirname, "..", "..", "silero_vad.onnx");
|
|
55
56
|
const silero = await SileroVadModel_1.SileroVadModel.new(ort, effectivePath);
|
|
56
57
|
let audioBuffer = [];
|
|
57
58
|
let isSpeechActive = false;
|
|
59
|
+
let consecutiveSpeechFrames = 0;
|
|
60
|
+
let consecutiveNonSpeechFrames = 0;
|
|
58
61
|
return async function process(chunk, callback) {
|
|
59
62
|
const float32Array = (0, chunkToFloat32Array_1.chunkToFloat32Array)(chunk);
|
|
60
63
|
audioBuffer.push(...float32Array);
|
|
@@ -65,20 +68,24 @@ async function makeVad(pathToModel) {
|
|
|
65
68
|
const remainingBuffer = buffer.slice(BUFFER_SIZE);
|
|
66
69
|
const result = await silero.process(new Float32Array(audioFrame));
|
|
67
70
|
logger.silly("last vad result", { ...result });
|
|
68
|
-
if (result.isSpeech >
|
|
69
|
-
|
|
71
|
+
if (result.isSpeech > activationThreshold) {
|
|
72
|
+
consecutiveNonSpeechFrames = 0; // Reset non-speech counter
|
|
73
|
+
consecutiveSpeechFrames++;
|
|
74
|
+
if (consecutiveSpeechFrames >= debounceFrames && !isSpeechActive) {
|
|
70
75
|
isSpeechActive = true;
|
|
71
76
|
callback("SPEECH_START");
|
|
72
|
-
return processBuffer(remainingBuffer);
|
|
73
77
|
}
|
|
74
78
|
}
|
|
75
|
-
else
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
79
|
+
else {
|
|
80
|
+
consecutiveSpeechFrames = 0; // Reset speech counter
|
|
81
|
+
consecutiveNonSpeechFrames++;
|
|
82
|
+
if (consecutiveNonSpeechFrames >= debounceFrames &&
|
|
83
|
+
isSpeechActive &&
|
|
84
|
+
result.isSpeech < deactivationThreshold) {
|
|
85
|
+
isSpeechActive = false;
|
|
86
|
+
callback("SPEECH_END");
|
|
87
|
+
silero.resetState(); // Reset VAD state after speech ends
|
|
88
|
+
}
|
|
82
89
|
}
|
|
83
90
|
return processBuffer(remainingBuffer);
|
|
84
91
|
};
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@fonoster/autopilot",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.19",
|
|
4
4
|
"description": "Voice AI for the Fonoster platform",
|
|
5
5
|
"author": "Pedro Sanders <psanders@fonoster.com>",
|
|
6
6
|
"homepage": "https://github.com/fonoster/fonoster#readme",
|
|
@@ -56,5 +56,5 @@
|
|
|
56
56
|
"devDependencies": {
|
|
57
57
|
"typescript": "^5.5.4"
|
|
58
58
|
},
|
|
59
|
-
"gitHead": "
|
|
59
|
+
"gitHead": "5250aa76f6c4b72a3b26beabe71ea7a7c227d7c1"
|
|
60
60
|
}
|