getpatter 0.5.4 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +1 -1
- package/README.md +5 -2
- package/dist/aec-PJJMUM5E.mjs +228 -0
- package/dist/{banner-3GNZ6VQK.mjs → banner-UYW6UM3J.mjs} +4 -1
- package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
- package/dist/{carrier-config-33HQ2W4V.mjs → carrier-config-4ZKVYAWV.mjs} +5 -2
- package/dist/{chunk-AFUYSNDH.mjs → chunk-6GR5MHHQ.mjs} +9 -0
- package/dist/chunk-CYLJVT5G.mjs +7031 -0
- package/dist/chunk-D4424JZR.mjs +71 -0
- package/dist/{chunk-VJVDG4V5.mjs → chunk-MVOQFAEO.mjs} +5 -0
- package/dist/chunk-N565J3CF.mjs +69 -0
- package/dist/chunk-RV7APPYE.mjs +397 -0
- package/dist/{chunk-FIFIWBL7.mjs → chunk-TEW3NAZJ.mjs} +6000 -3156
- package/dist/{chunk-SEMKNPCD.mjs → chunk-XS45BAQL.mjs} +5 -1
- package/dist/cli.js +304 -640
- package/dist/client-2GJVZT42.mjs +8935 -0
- package/dist/dashboard/ui.html +63 -0
- package/dist/{dist-YRCCJQ26.mjs → dist-RYMPCILF.mjs} +28 -2
- package/dist/index.d.mts +3548 -428
- package/dist/index.d.ts +3548 -428
- package/dist/index.js +34336 -9532
- package/dist/index.mjs +3642 -512
- package/dist/{node-cron-6PRPSBG5.mjs → node-cron-JFWQQRBU.mjs} +23 -2
- package/dist/persistence-LVIAHESK.mjs +7 -0
- package/dist/silero-vad-NSEXI4XS.mjs +7 -0
- package/dist/streamableHttp-WKNGHDVO.mjs +1496 -0
- package/dist/test-mode-WEKKNBLD.mjs +8 -0
- package/dist/tunnel-43CHWPVQ.mjs +8 -0
- package/package.json +7 -7
- package/src/dashboard/ui.html +63 -0
- package/dist/chunk-QHHBUCMT.mjs +0 -25
- package/dist/persistence-LQBYQPQQ.mjs +0 -7
- package/dist/test-mode-MVJ3SKG4.mjs +0 -8
- package/dist/tunnel-UVR3PPAU.mjs +0 -8
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getLogger
|
|
3
|
+
} from "./chunk-MVOQFAEO.mjs";
|
|
4
|
+
import {
|
|
5
|
+
init_esm_shims
|
|
6
|
+
} from "./chunk-N565J3CF.mjs";
|
|
7
|
+
|
|
8
|
+
// src/services/barge-in-strategies.ts
|
|
9
|
+
init_esm_shims();
|
|
10
|
+
var MinWordsStrategy = class {
|
|
11
|
+
minWords;
|
|
12
|
+
useInterim;
|
|
13
|
+
constructor(options) {
|
|
14
|
+
if (!Number.isFinite(options.minWords) || options.minWords < 1) {
|
|
15
|
+
throw new Error(
|
|
16
|
+
`minWords must be >= 1 (got ${String(options.minWords)})`
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
this.minWords = Math.floor(options.minWords);
|
|
20
|
+
this.useInterim = options.useInterim ?? true;
|
|
21
|
+
}
|
|
22
|
+
evaluate(ctx) {
|
|
23
|
+
if (ctx.isInterim && !this.useInterim) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
const threshold = ctx.agentSpeaking ? this.minWords : 1;
|
|
27
|
+
const wordCount = (ctx.transcript ?? "").trim().split(/\s+/).filter(Boolean).length;
|
|
28
|
+
return wordCount >= threshold;
|
|
29
|
+
}
|
|
30
|
+
async reset() {
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
async function evaluateStrategies(strategies, ctx) {
|
|
34
|
+
if (!strategies || strategies.length === 0) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
const safeCtx = {
|
|
38
|
+
transcript: ctx.transcript ?? "",
|
|
39
|
+
isInterim: ctx.isInterim,
|
|
40
|
+
agentSpeaking: ctx.agentSpeaking
|
|
41
|
+
};
|
|
42
|
+
for (const strategy of strategies) {
|
|
43
|
+
try {
|
|
44
|
+
const result = await strategy.evaluate(safeCtx);
|
|
45
|
+
if (result === true) return true;
|
|
46
|
+
} catch (err) {
|
|
47
|
+
getLogger().warn(
|
|
48
|
+
`BargeInStrategy ${strategy.constructor?.name ?? "unknown"} threw; treating as 'do not confirm': ${String(err)}`
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
async function resetStrategies(strategies) {
|
|
55
|
+
for (const strategy of strategies) {
|
|
56
|
+
if (typeof strategy.reset !== "function") continue;
|
|
57
|
+
try {
|
|
58
|
+
await strategy.reset();
|
|
59
|
+
} catch (err) {
|
|
60
|
+
getLogger().debug(
|
|
61
|
+
`BargeInStrategy ${strategy.constructor?.name ?? "unknown"}.reset() threw: ${String(err)}`
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export {
|
|
68
|
+
MinWordsStrategy,
|
|
69
|
+
evaluateStrategies,
|
|
70
|
+
resetStrategies
|
|
71
|
+
};
|
|
@@ -1,4 +1,9 @@
|
|
|
1
|
+
import {
|
|
2
|
+
init_esm_shims
|
|
3
|
+
} from "./chunk-N565J3CF.mjs";
|
|
4
|
+
|
|
1
5
|
// src/logger.ts
|
|
6
|
+
init_esm_shims();
|
|
2
7
|
var defaultLogger = {
|
|
3
8
|
info: (msg, ...args) => console.info(`[PATTER] ${msg}`, ...args),
|
|
4
9
|
warn: (msg, ...args) => console.warn(`[PATTER] WARNING: ${msg}`, ...args),
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
var __create = Object.create;
|
|
2
|
+
var __defProp = Object.defineProperty;
|
|
3
|
+
var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
|
|
4
|
+
var __getOwnPropNames = Object.getOwnPropertyNames;
|
|
5
|
+
var __getProtoOf = Object.getPrototypeOf;
|
|
6
|
+
var __hasOwnProp = Object.prototype.hasOwnProperty;
|
|
7
|
+
var __require = /* @__PURE__ */ ((x) => typeof require !== "undefined" ? require : typeof Proxy !== "undefined" ? new Proxy(x, {
|
|
8
|
+
get: (a, b) => (typeof require !== "undefined" ? require : a)[b]
|
|
9
|
+
}) : x)(function(x) {
|
|
10
|
+
if (typeof require !== "undefined") return require.apply(this, arguments);
|
|
11
|
+
throw Error('Dynamic require of "' + x + '" is not supported');
|
|
12
|
+
});
|
|
13
|
+
var __glob = (map) => (path2) => {
|
|
14
|
+
var fn = map[path2];
|
|
15
|
+
if (fn) return fn();
|
|
16
|
+
throw new Error("Module not found in bundle: " + path2);
|
|
17
|
+
};
|
|
18
|
+
var __esm = (fn, res) => function __init() {
|
|
19
|
+
return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
|
|
20
|
+
};
|
|
21
|
+
var __commonJS = (cb, mod) => function __require2() {
|
|
22
|
+
return mod || (0, cb[__getOwnPropNames(cb)[0]])((mod = { exports: {} }).exports, mod), mod.exports;
|
|
23
|
+
};
|
|
24
|
+
var __export = (target, all) => {
|
|
25
|
+
for (var name in all)
|
|
26
|
+
__defProp(target, name, { get: all[name], enumerable: true });
|
|
27
|
+
};
|
|
28
|
+
var __copyProps = (to, from, except, desc) => {
|
|
29
|
+
if (from && typeof from === "object" || typeof from === "function") {
|
|
30
|
+
for (let key of __getOwnPropNames(from))
|
|
31
|
+
if (!__hasOwnProp.call(to, key) && key !== except)
|
|
32
|
+
__defProp(to, key, { get: () => from[key], enumerable: !(desc = __getOwnPropDesc(from, key)) || desc.enumerable });
|
|
33
|
+
}
|
|
34
|
+
return to;
|
|
35
|
+
};
|
|
36
|
+
var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__getProtoOf(mod)) : {}, __copyProps(
|
|
37
|
+
// If the importer is in node compatibility mode or this is not an ESM
|
|
38
|
+
// file that has been converted to a CommonJS file using a Babel-
|
|
39
|
+
// compatible transform (i.e. "__esModule" has not been set), then set
|
|
40
|
+
// "default" to the CommonJS "module.exports" for node compatibility.
|
|
41
|
+
isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target,
|
|
42
|
+
mod
|
|
43
|
+
));
|
|
44
|
+
|
|
45
|
+
// node_modules/tsup/assets/esm_shims.js
|
|
46
|
+
import path from "path";
|
|
47
|
+
import { fileURLToPath } from "url";
|
|
48
|
+
var getFilename, getDirname, __dirname, __filename;
|
|
49
|
+
var init_esm_shims = __esm({
|
|
50
|
+
"node_modules/tsup/assets/esm_shims.js"() {
|
|
51
|
+
"use strict";
|
|
52
|
+
getFilename = () => fileURLToPath(import.meta.url);
|
|
53
|
+
getDirname = () => path.dirname(getFilename());
|
|
54
|
+
__dirname = /* @__PURE__ */ getDirname();
|
|
55
|
+
__filename = /* @__PURE__ */ getFilename();
|
|
56
|
+
}
|
|
57
|
+
});
|
|
58
|
+
|
|
59
|
+
export {
|
|
60
|
+
__require,
|
|
61
|
+
__glob,
|
|
62
|
+
__esm,
|
|
63
|
+
__commonJS,
|
|
64
|
+
__export,
|
|
65
|
+
__toESM,
|
|
66
|
+
__dirname,
|
|
67
|
+
__filename,
|
|
68
|
+
init_esm_shims
|
|
69
|
+
};
|
|
@@ -0,0 +1,397 @@
|
|
|
1
|
+
import {
|
|
2
|
+
init_esm_shims
|
|
3
|
+
} from "./chunk-N565J3CF.mjs";
|
|
4
|
+
|
|
5
|
+
// src/providers/silero-vad.ts
|
|
6
|
+
init_esm_shims();
|
|
7
|
+
import { createRequire } from "module";
|
|
8
|
+
import * as fs from "fs";
|
|
9
|
+
import * as path from "path";
|
|
10
|
+
import { fileURLToPath } from "url";
|
|
11
|
+
var SUPPORTED_SAMPLE_RATES = [8e3, 16e3];
|
|
12
|
+
function resolveModuleDirs() {
|
|
13
|
+
const candidates = [];
|
|
14
|
+
try {
|
|
15
|
+
const cjsDir = new Function("return typeof __dirname !== 'undefined' ? __dirname : null")();
|
|
16
|
+
if (typeof cjsDir === "string") candidates.push(cjsDir);
|
|
17
|
+
} catch {
|
|
18
|
+
}
|
|
19
|
+
try {
|
|
20
|
+
const url = import.meta.url;
|
|
21
|
+
if (url) candidates.push(path.dirname(fileURLToPath(url)));
|
|
22
|
+
} catch {
|
|
23
|
+
}
|
|
24
|
+
try {
|
|
25
|
+
const url = import.meta.url;
|
|
26
|
+
if (url) {
|
|
27
|
+
const req = createRequire(url);
|
|
28
|
+
candidates.push(path.dirname(req.resolve("getpatter/package.json")));
|
|
29
|
+
}
|
|
30
|
+
} catch {
|
|
31
|
+
}
|
|
32
|
+
try {
|
|
33
|
+
const req = createRequire(path.join(process.cwd(), "package.json"));
|
|
34
|
+
candidates.push(path.dirname(req.resolve("getpatter/package.json")));
|
|
35
|
+
} catch {
|
|
36
|
+
}
|
|
37
|
+
candidates.push(process.cwd());
|
|
38
|
+
return candidates;
|
|
39
|
+
}
|
|
40
|
+
var MODULE_DIRS = resolveModuleDirs();
|
|
41
|
+
function resolveDefaultModelPath() {
|
|
42
|
+
for (const dir of MODULE_DIRS) {
|
|
43
|
+
const candidates = [
|
|
44
|
+
path.join(dir, "resources", "silero_vad.onnx"),
|
|
45
|
+
path.join(dir, "..", "resources", "silero_vad.onnx"),
|
|
46
|
+
path.join(dir, "dist", "resources", "silero_vad.onnx")
|
|
47
|
+
];
|
|
48
|
+
for (const c of candidates) if (fs.existsSync(c)) return c;
|
|
49
|
+
}
|
|
50
|
+
return path.join(MODULE_DIRS[0] ?? process.cwd(), "resources", "silero_vad.onnx");
|
|
51
|
+
}
|
|
52
|
+
var DEFAULT_MODEL_PATH = resolveDefaultModelPath();
|
|
53
|
+
function classifyOnnxError(err) {
|
|
54
|
+
const msg = err?.message ?? String(err);
|
|
55
|
+
if (/Cannot find module ['"]?onnxruntime-node['"]?$/m.test(msg)) return "missing";
|
|
56
|
+
if (/onnxruntime_binding\.node|napi-v\d/.test(msg)) return "binding";
|
|
57
|
+
if (/listSupportedBackends|backend_\d/.test(msg)) return "api-drift";
|
|
58
|
+
return "unknown";
|
|
59
|
+
}
|
|
60
|
+
async function loadOnnxRuntime() {
|
|
61
|
+
let firstErr;
|
|
62
|
+
try {
|
|
63
|
+
const mod = await import("./dist-RYMPCILF.mjs");
|
|
64
|
+
return mod;
|
|
65
|
+
} catch (e) {
|
|
66
|
+
firstErr = e;
|
|
67
|
+
}
|
|
68
|
+
try {
|
|
69
|
+
const req = createRequire(path.join(process.cwd(), "package.json"));
|
|
70
|
+
return req("onnxruntime-node");
|
|
71
|
+
} catch (secondErr) {
|
|
72
|
+
const importClass = classifyOnnxError(firstErr);
|
|
73
|
+
const requireClass = classifyOnnxError(secondErr);
|
|
74
|
+
const original = firstErr?.message ?? String(firstErr);
|
|
75
|
+
const detail = secondErr?.message ?? String(secondErr);
|
|
76
|
+
let header;
|
|
77
|
+
let remedy;
|
|
78
|
+
if (importClass === "missing" && requireClass === "missing") {
|
|
79
|
+
header = 'SileroVAD requires the "onnxruntime-node" package \u2014 it is not installed.';
|
|
80
|
+
remedy = " Install: npm install onnxruntime-node@~1.18.0\n\n (~210 MB. Only needed when you actually use SileroVAD in pipeline mode.)";
|
|
81
|
+
} else if (importClass === "api-drift" || requireClass === "api-drift") {
|
|
82
|
+
header = "SileroVAD found onnxruntime-node but the installed version uses an API the SDK does not support.";
|
|
83
|
+
remedy = " Patter is currently tested against onnxruntime-node 1.18.x.\n\n Fix: npm install onnxruntime-node@~1.18.0\n\n Versions 1.24+ removed `listSupportedBackends` from the public surface \u2014 track\n https://github.com/PatterAI/Patter/issues for the SDK update that targets 1.24.";
|
|
84
|
+
} else if (importClass === "binding" || requireClass === "binding") {
|
|
85
|
+
header = "SileroVAD found onnxruntime-node but the native binding for this platform is missing.";
|
|
86
|
+
remedy = " Common cause on macOS x86_64: the prebuilt bin/ layout drifted between releases.\n\n Fix: npm install onnxruntime-node@~1.18.0\n\n Or rebuild from source: npm rebuild onnxruntime-node";
|
|
87
|
+
} else {
|
|
88
|
+
header = 'SileroVAD requires the "onnxruntime-node" package, which could not be resolved.';
|
|
89
|
+
remedy = " Install: npm install onnxruntime-node@~1.18.0\n\n This is an optional peer dependency of getpatter (~210 MB).";
|
|
90
|
+
}
|
|
91
|
+
const err = new Error(
|
|
92
|
+
`
|
|
93
|
+
${header}
|
|
94
|
+
|
|
95
|
+
${remedy}
|
|
96
|
+
|
|
97
|
+
import() failed: ${original}
|
|
98
|
+
cwd-require failed: ${detail}
|
|
99
|
+
`
|
|
100
|
+
);
|
|
101
|
+
err.cause = secondErr ?? firstErr;
|
|
102
|
+
throw err;
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
var ExpFilter = class {
|
|
106
|
+
constructor(alpha) {
|
|
107
|
+
this.alpha = alpha;
|
|
108
|
+
if (!(alpha > 0 && alpha <= 1)) {
|
|
109
|
+
throw new Error("alpha must be in (0, 1].");
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
alpha;
|
|
113
|
+
filtered = null;
|
|
114
|
+
apply(exp, sample) {
|
|
115
|
+
if (this.filtered === null) {
|
|
116
|
+
this.filtered = sample;
|
|
117
|
+
} else {
|
|
118
|
+
const a = Math.pow(this.alpha, exp);
|
|
119
|
+
this.filtered = a * this.filtered + (1 - a) * sample;
|
|
120
|
+
}
|
|
121
|
+
return this.filtered;
|
|
122
|
+
}
|
|
123
|
+
reset() {
|
|
124
|
+
this.filtered = null;
|
|
125
|
+
}
|
|
126
|
+
};
|
|
127
|
+
var OnnxModel = class {
|
|
128
|
+
constructor(runtime, session, sampleRate) {
|
|
129
|
+
this.runtime = runtime;
|
|
130
|
+
this.session = session;
|
|
131
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
132
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
133
|
+
}
|
|
134
|
+
this.sampleRate = sampleRate;
|
|
135
|
+
this.windowSizeSamples = sampleRate === 8e3 ? 256 : 512;
|
|
136
|
+
this.contextSize = sampleRate === 8e3 ? 32 : 64;
|
|
137
|
+
this.context = new Float32Array(this.contextSize);
|
|
138
|
+
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
139
|
+
this.inputBuffer = new Float32Array(this.contextSize + this.windowSizeSamples);
|
|
140
|
+
this.sampleRateTensor = BigInt64Array.from([BigInt(sampleRate)]);
|
|
141
|
+
}
|
|
142
|
+
runtime;
|
|
143
|
+
session;
|
|
144
|
+
sampleRate;
|
|
145
|
+
windowSizeSamples;
|
|
146
|
+
contextSize;
|
|
147
|
+
context;
|
|
148
|
+
rnnState;
|
|
149
|
+
inputBuffer;
|
|
150
|
+
sampleRateTensor;
|
|
151
|
+
async run(window) {
|
|
152
|
+
if (window.length !== this.windowSizeSamples) {
|
|
153
|
+
throw new Error(
|
|
154
|
+
`window must have exactly ${this.windowSizeSamples} samples, got ${window.length}`
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
this.inputBuffer.set(this.context, 0);
|
|
158
|
+
this.inputBuffer.set(window, this.contextSize);
|
|
159
|
+
const { Tensor } = this.runtime;
|
|
160
|
+
const feeds = {
|
|
161
|
+
input: new Tensor("float32", this.inputBuffer, [1, this.inputBuffer.length]),
|
|
162
|
+
state: new Tensor("float32", this.rnnState, [2, 1, 128]),
|
|
163
|
+
sr: new Tensor("int64", this.sampleRateTensor, [])
|
|
164
|
+
};
|
|
165
|
+
const results = await this.session.run(feeds);
|
|
166
|
+
const outputKey = Object.keys(results).find((k) => k !== "stateN") ?? "output";
|
|
167
|
+
const stateKey = "stateN" in results ? "stateN" : Object.keys(results).find((k) => k !== outputKey);
|
|
168
|
+
const out = results[outputKey];
|
|
169
|
+
const newState = stateKey ? results[stateKey] : void 0;
|
|
170
|
+
if (newState && newState.data instanceof Float32Array) {
|
|
171
|
+
this.rnnState = Float32Array.from(newState.data);
|
|
172
|
+
}
|
|
173
|
+
this.context = this.inputBuffer.slice(-this.contextSize);
|
|
174
|
+
const data = out.data;
|
|
175
|
+
return data[0] ?? 0;
|
|
176
|
+
}
|
|
177
|
+
/** Reset the RNN hidden state + rolling context to a fresh inference. */
|
|
178
|
+
reset() {
|
|
179
|
+
this.context = new Float32Array(this.contextSize);
|
|
180
|
+
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
181
|
+
}
|
|
182
|
+
};
|
|
183
|
+
var SileroVAD = class _SileroVAD {
|
|
184
|
+
constructor(model, opts) {
|
|
185
|
+
this.model = model;
|
|
186
|
+
this.opts = opts;
|
|
187
|
+
}
|
|
188
|
+
model;
|
|
189
|
+
opts;
|
|
190
|
+
pending = new Float32Array(0);
|
|
191
|
+
expFilter = new ExpFilter(0.35);
|
|
192
|
+
pubSpeaking = false;
|
|
193
|
+
speechThresholdDuration = 0;
|
|
194
|
+
silenceThresholdDuration = 0;
|
|
195
|
+
closed = false;
|
|
196
|
+
/**
|
|
197
|
+
* Load the Silero VAD model.
|
|
198
|
+
* Throws if `onnxruntime-node` is not installed.
|
|
199
|
+
*/
|
|
200
|
+
static async load(options = {}) {
|
|
201
|
+
const sampleRate = options.sampleRate ?? 16e3;
|
|
202
|
+
if (!SUPPORTED_SAMPLE_RATES.includes(sampleRate)) {
|
|
203
|
+
throw new Error("Silero VAD only supports 8KHz and 16KHz sample rates");
|
|
204
|
+
}
|
|
205
|
+
const activationThreshold = options.activationThreshold ?? 0.5;
|
|
206
|
+
const deactivationThreshold = options.deactivationThreshold ?? Math.max(activationThreshold - 0.15, 0.01);
|
|
207
|
+
if (deactivationThreshold <= 0) {
|
|
208
|
+
throw new Error("deactivationThreshold must be greater than 0");
|
|
209
|
+
}
|
|
210
|
+
const runtime = await loadOnnxRuntime();
|
|
211
|
+
const modelPath = options.onnxFilePath ?? DEFAULT_MODEL_PATH;
|
|
212
|
+
const session = await runtime.InferenceSession.create(modelPath, {
|
|
213
|
+
interOpNumThreads: 1,
|
|
214
|
+
intraOpNumThreads: 1,
|
|
215
|
+
executionMode: "sequential",
|
|
216
|
+
executionProviders: options.forceCpu === false ? void 0 : ["cpu"]
|
|
217
|
+
});
|
|
218
|
+
const model = new OnnxModel(runtime, session, sampleRate);
|
|
219
|
+
return new _SileroVAD(model, {
|
|
220
|
+
minSpeechDuration: options.minSpeechDuration ?? 0.25,
|
|
221
|
+
// Bumped 0.1 -> 0.4s after round 10f confirmed VAD speech_end fired on
|
|
222
|
+
// natural inter-sentence pauses < 250ms, causing double-talk dispatch.
|
|
223
|
+
// 400ms is the industry default for telephony and matches the new
|
|
224
|
+
// inter_utterance_gap_ms debounce in stream-handler.ts.
|
|
225
|
+
minSilenceDuration: options.minSilenceDuration ?? 0.4,
|
|
226
|
+
prefixPaddingDuration: options.prefixPaddingDuration ?? 0.03,
|
|
227
|
+
activationThreshold,
|
|
228
|
+
deactivationThreshold,
|
|
229
|
+
sampleRate
|
|
230
|
+
});
|
|
231
|
+
}
|
|
232
|
+
/**
|
|
233
|
+
* Convenience factory for telephony pipelines.
|
|
234
|
+
*
|
|
235
|
+
* Identical to {@link SileroVAD.load} but pins `sampleRate` to 16000 Hz
|
|
236
|
+
* — the only sample rate Patter's pipeline-mode audio bus uses (8 kHz
|
|
237
|
+
* mulaw from Twilio is upsampled to 16 kHz PCM before reaching the
|
|
238
|
+
* VAD). Every other parameter mirrors the upstream Silero VAD
|
|
239
|
+
* defaults from `snakers4/silero-vad` (`get_speech_timestamps` /
|
|
240
|
+
* `VADIterator`):
|
|
241
|
+
*
|
|
242
|
+
* - `activationThreshold = 0.5` — upstream `threshold`
|
|
243
|
+
* - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
|
|
244
|
+
* - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
|
|
245
|
+
* - `minSilenceDuration = 0.4` — telephony default (was 0.1, bumped after
|
|
246
|
+
* round 10f found speech_end firing on inter-sentence pauses < 250 ms,
|
|
247
|
+
* causing double-talk dispatch). 400 ms matches the industry telephony
|
|
248
|
+
* default and the inter_utterance_gap_ms debounce in stream-handler.ts.
|
|
249
|
+
* - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
|
|
250
|
+
*
|
|
251
|
+
* Override any field by passing `options`. Deployments that experience
|
|
252
|
+
* truncation on natural pauses can raise `minSilenceDuration` (e.g.
|
|
253
|
+
* 0.5–1.0 s) per call site rather than as a global default.
|
|
254
|
+
*
|
|
255
|
+
* @example
|
|
256
|
+
* ```ts
|
|
257
|
+
* const vad = await SileroVAD.forPhoneCall();
|
|
258
|
+
* // or, if natural-pause truncation is observed:
|
|
259
|
+
* const vad = await SileroVAD.forPhoneCall({ minSilenceDuration: 0.5 });
|
|
260
|
+
* ```
|
|
261
|
+
*/
|
|
262
|
+
static forPhoneCall(options = {}) {
|
|
263
|
+
return _SileroVAD.load({
|
|
264
|
+
sampleRate: 16e3,
|
|
265
|
+
...options
|
|
266
|
+
});
|
|
267
|
+
}
|
|
268
|
+
/**
|
|
269
|
+
* Internal factory used by tests — bypasses onnxruntime-node loading.
|
|
270
|
+
* @internal
|
|
271
|
+
*/
|
|
272
|
+
static fromOnnxModel(runtime, session, options) {
|
|
273
|
+
const model = new OnnxModel(runtime, session, options.sampleRate);
|
|
274
|
+
return new _SileroVAD(model, options);
|
|
275
|
+
}
|
|
276
|
+
/** Sample rate (Hz) the underlying ONNX model was loaded with. */
|
|
277
|
+
get sampleRate() {
|
|
278
|
+
return this.opts.sampleRate;
|
|
279
|
+
}
|
|
280
|
+
/**
|
|
281
|
+
* Number of int16 PCM samples that must be provided per call to
|
|
282
|
+
* processFrame for the model to run one inference window.
|
|
283
|
+
*
|
|
284
|
+
* Constraint (Silero ONNX spec):
|
|
285
|
+
* - 16 000 Hz → 512 samples (32 ms)
|
|
286
|
+
* - 8 000 Hz → 256 samples (32 ms)
|
|
287
|
+
*
|
|
288
|
+
* Callers that feed raw audio in fixed-size chunks (e.g. WebSocket frames)
|
|
289
|
+
* should buffer incoming audio until at least numFramesRequired() int16
|
|
290
|
+
* samples are available before calling processFrame. The provider
|
|
291
|
+
* internally buffers partial windows so smaller chunks are also safe, but
|
|
292
|
+
* passing exactly one window per call minimises heap allocation.
|
|
293
|
+
*/
|
|
294
|
+
numFramesRequired() {
|
|
295
|
+
return this.opts.sampleRate === 8e3 ? 256 : 512;
|
|
296
|
+
}
|
|
297
|
+
/** Run VAD on a PCM16 chunk; returns a transition event or null if no change. */
|
|
298
|
+
async processFrame(pcmChunk, sampleRate) {
|
|
299
|
+
if (this.closed) {
|
|
300
|
+
throw new Error("SileroVAD is closed");
|
|
301
|
+
}
|
|
302
|
+
if (sampleRate !== this.opts.sampleRate) {
|
|
303
|
+
throw new Error(
|
|
304
|
+
`input sampleRate ${sampleRate} does not match model sampleRate ${this.opts.sampleRate}; resampling is not implemented in the Patter port`
|
|
305
|
+
);
|
|
306
|
+
}
|
|
307
|
+
if (pcmChunk.length === 0) {
|
|
308
|
+
return null;
|
|
309
|
+
}
|
|
310
|
+
const numSamples = Math.floor(pcmChunk.length / 2);
|
|
311
|
+
if (numSamples === 0) {
|
|
312
|
+
return null;
|
|
313
|
+
}
|
|
314
|
+
const samples = new Float32Array(numSamples);
|
|
315
|
+
for (let i = 0; i < numSamples; i++) {
|
|
316
|
+
samples[i] = pcmChunk.readInt16LE(i * 2) / 32767;
|
|
317
|
+
}
|
|
318
|
+
const merged = new Float32Array(this.pending.length + samples.length);
|
|
319
|
+
merged.set(this.pending, 0);
|
|
320
|
+
merged.set(samples, this.pending.length);
|
|
321
|
+
this.pending = merged;
|
|
322
|
+
const windowSize = this.model.windowSizeSamples;
|
|
323
|
+
let event = null;
|
|
324
|
+
while (this.pending.length >= windowSize) {
|
|
325
|
+
const window = this.pending.slice(0, windowSize);
|
|
326
|
+
this.pending = this.pending.slice(windowSize);
|
|
327
|
+
const rawP = await this.model.run(window);
|
|
328
|
+
const p = this.expFilter.apply(1, rawP);
|
|
329
|
+
const windowDuration = windowSize / this.opts.sampleRate;
|
|
330
|
+
const transition = this.advanceState(p, windowDuration);
|
|
331
|
+
if (transition !== null) {
|
|
332
|
+
event = transition;
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
return event;
|
|
336
|
+
}
|
|
337
|
+
advanceState(p, windowDuration) {
|
|
338
|
+
const opts = this.opts;
|
|
339
|
+
if (p >= opts.activationThreshold || this.pubSpeaking && p > opts.deactivationThreshold) {
|
|
340
|
+
this.speechThresholdDuration += windowDuration;
|
|
341
|
+
this.silenceThresholdDuration = 0;
|
|
342
|
+
if (!this.pubSpeaking) {
|
|
343
|
+
if (this.speechThresholdDuration >= opts.minSpeechDuration) {
|
|
344
|
+
this.pubSpeaking = true;
|
|
345
|
+
return {
|
|
346
|
+
type: "speech_start",
|
|
347
|
+
confidence: p,
|
|
348
|
+
durationMs: this.speechThresholdDuration * 1e3
|
|
349
|
+
};
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
} else {
|
|
353
|
+
this.silenceThresholdDuration += windowDuration;
|
|
354
|
+
this.speechThresholdDuration = 0;
|
|
355
|
+
if (this.pubSpeaking && this.silenceThresholdDuration >= opts.minSilenceDuration) {
|
|
356
|
+
this.pubSpeaking = false;
|
|
357
|
+
return {
|
|
358
|
+
type: "speech_end",
|
|
359
|
+
confidence: p,
|
|
360
|
+
durationMs: this.silenceThresholdDuration * 1e3
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
return null;
|
|
365
|
+
}
|
|
366
|
+
/** Mark the VAD as closed; subsequent processFrame calls throw. */
|
|
367
|
+
async close() {
|
|
368
|
+
if (this.closed) return;
|
|
369
|
+
this.closed = true;
|
|
370
|
+
}
|
|
371
|
+
/**
|
|
372
|
+
* Reset all per-utterance state so the next ``processFrame`` starts from
|
|
373
|
+
* a clean SILENCE state.
|
|
374
|
+
*
|
|
375
|
+
* Called by the stream handler between agent turns to prevent a "stuck
|
|
376
|
+
* SPEECH" condition where PSTN echo / loopback kept the detector's
|
|
377
|
+
* probability above ``deactivationThreshold`` for the entire agent turn.
|
|
378
|
+
* Without this reset the next user utterance would never trigger a
|
|
379
|
+
* SILENCE→SPEECH transition and barge-in would feel "one-shot" (works
|
|
380
|
+
* once, then never again until the call ends).
|
|
381
|
+
*
|
|
382
|
+
* Safe to call any time including on a closed instance (no-op).
|
|
383
|
+
*/
|
|
384
|
+
reset() {
|
|
385
|
+
if (this.closed) return;
|
|
386
|
+
this.pending = new Float32Array(0);
|
|
387
|
+
this.pubSpeaking = false;
|
|
388
|
+
this.speechThresholdDuration = 0;
|
|
389
|
+
this.silenceThresholdDuration = 0;
|
|
390
|
+
this.expFilter.reset();
|
|
391
|
+
this.model.reset();
|
|
392
|
+
}
|
|
393
|
+
};
|
|
394
|
+
|
|
395
|
+
export {
|
|
396
|
+
SileroVAD
|
|
397
|
+
};
|