getpatter 0.6.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/barge-in-strategies-X6ARMGIQ.mjs +12 -0
- package/dist/chunk-D4424JZR.mjs +71 -0
- package/dist/{chunk-X3364LSI.mjs → chunk-RV7APPYE.mjs} +36 -2
- package/dist/{chunk-JUQ5WQTQ.mjs → chunk-TEW3NAZJ.mjs} +3244 -1674
- package/dist/cli.js +277 -24
- package/dist/dashboard/ui.html +13 -13
- package/dist/index.d.mts +1525 -364
- package/dist/index.d.ts +1525 -364
- package/dist/index.js +3921 -986
- package/dist/index.mjs +1310 -70
- package/dist/{silero-vad-YLCXT5GQ.mjs → silero-vad-NSEXI4XS.mjs} +1 -1
- package/dist/{test-mode-Y7YG5LFZ.mjs → test-mode-WEKKNBLD.mjs} +1 -1
- package/package.json +1 -1
- package/src/dashboard/ui.html +13 -13
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import {
|
|
2
|
+
getLogger
|
|
3
|
+
} from "./chunk-MVOQFAEO.mjs";
|
|
4
|
+
import {
|
|
5
|
+
init_esm_shims
|
|
6
|
+
} from "./chunk-N565J3CF.mjs";
|
|
7
|
+
|
|
8
|
+
// src/services/barge-in-strategies.ts
|
|
9
|
+
init_esm_shims();
|
|
10
|
+
var MinWordsStrategy = class {
|
|
11
|
+
minWords;
|
|
12
|
+
useInterim;
|
|
13
|
+
constructor(options) {
|
|
14
|
+
if (!Number.isFinite(options.minWords) || options.minWords < 1) {
|
|
15
|
+
throw new Error(
|
|
16
|
+
`minWords must be >= 1 (got ${String(options.minWords)})`
|
|
17
|
+
);
|
|
18
|
+
}
|
|
19
|
+
this.minWords = Math.floor(options.minWords);
|
|
20
|
+
this.useInterim = options.useInterim ?? true;
|
|
21
|
+
}
|
|
22
|
+
evaluate(ctx) {
|
|
23
|
+
if (ctx.isInterim && !this.useInterim) {
|
|
24
|
+
return false;
|
|
25
|
+
}
|
|
26
|
+
const threshold = ctx.agentSpeaking ? this.minWords : 1;
|
|
27
|
+
const wordCount = (ctx.transcript ?? "").trim().split(/\s+/).filter(Boolean).length;
|
|
28
|
+
return wordCount >= threshold;
|
|
29
|
+
}
|
|
30
|
+
async reset() {
|
|
31
|
+
}
|
|
32
|
+
};
|
|
33
|
+
async function evaluateStrategies(strategies, ctx) {
|
|
34
|
+
if (!strategies || strategies.length === 0) {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
const safeCtx = {
|
|
38
|
+
transcript: ctx.transcript ?? "",
|
|
39
|
+
isInterim: ctx.isInterim,
|
|
40
|
+
agentSpeaking: ctx.agentSpeaking
|
|
41
|
+
};
|
|
42
|
+
for (const strategy of strategies) {
|
|
43
|
+
try {
|
|
44
|
+
const result = await strategy.evaluate(safeCtx);
|
|
45
|
+
if (result === true) return true;
|
|
46
|
+
} catch (err) {
|
|
47
|
+
getLogger().warn(
|
|
48
|
+
`BargeInStrategy ${strategy.constructor?.name ?? "unknown"} threw; treating as 'do not confirm': ${String(err)}`
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
return false;
|
|
53
|
+
}
|
|
54
|
+
async function resetStrategies(strategies) {
|
|
55
|
+
for (const strategy of strategies) {
|
|
56
|
+
if (typeof strategy.reset !== "function") continue;
|
|
57
|
+
try {
|
|
58
|
+
await strategy.reset();
|
|
59
|
+
} catch (err) {
|
|
60
|
+
getLogger().debug(
|
|
61
|
+
`BargeInStrategy ${strategy.constructor?.name ?? "unknown"}.reset() threw: ${String(err)}`
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
export {
|
|
68
|
+
MinWordsStrategy,
|
|
69
|
+
evaluateStrategies,
|
|
70
|
+
resetStrategies
|
|
71
|
+
};
|
|
@@ -174,6 +174,11 @@ var OnnxModel = class {
|
|
|
174
174
|
const data = out.data;
|
|
175
175
|
return data[0] ?? 0;
|
|
176
176
|
}
|
|
177
|
+
/** Reset the RNN hidden state + rolling context to a fresh inference. */
|
|
178
|
+
reset() {
|
|
179
|
+
this.context = new Float32Array(this.contextSize);
|
|
180
|
+
this.rnnState = new Float32Array(2 * 1 * 128);
|
|
181
|
+
}
|
|
177
182
|
};
|
|
178
183
|
var SileroVAD = class _SileroVAD {
|
|
179
184
|
constructor(model, opts) {
|
|
@@ -213,7 +218,11 @@ var SileroVAD = class _SileroVAD {
|
|
|
213
218
|
const model = new OnnxModel(runtime, session, sampleRate);
|
|
214
219
|
return new _SileroVAD(model, {
|
|
215
220
|
minSpeechDuration: options.minSpeechDuration ?? 0.25,
|
|
216
|
-
|
|
221
|
+
// Bumped 0.1 -> 0.4s after round 10f confirmed VAD speech_end fired on
|
|
222
|
+
// natural inter-sentence pauses < 250ms, causing double-talk dispatch.
|
|
223
|
+
// 400ms is the industry default for telephony and matches the new
|
|
224
|
+
// inter_utterance_gap_ms debounce in stream-handler.ts.
|
|
225
|
+
minSilenceDuration: options.minSilenceDuration ?? 0.4,
|
|
217
226
|
prefixPaddingDuration: options.prefixPaddingDuration ?? 0.03,
|
|
218
227
|
activationThreshold,
|
|
219
228
|
deactivationThreshold,
|
|
@@ -233,7 +242,10 @@ var SileroVAD = class _SileroVAD {
|
|
|
233
242
|
* - `activationThreshold = 0.5` — upstream `threshold`
|
|
234
243
|
* - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
|
|
235
244
|
* - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
|
|
236
|
-
* - `minSilenceDuration = 0.
|
|
245
|
+
* - `minSilenceDuration = 0.4` — telephony default (was 0.1, bumped after
|
|
246
|
+
* round 10f found speech_end firing on inter-sentence pauses < 250 ms,
|
|
247
|
+
* causing double-talk dispatch). 400 ms matches the industry telephony
|
|
248
|
+
* default and the inter_utterance_gap_ms debounce in stream-handler.ts.
|
|
237
249
|
* - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
|
|
238
250
|
*
|
|
239
251
|
* Override any field by passing `options`. Deployments that experience
|
|
@@ -356,6 +368,28 @@ var SileroVAD = class _SileroVAD {
|
|
|
356
368
|
if (this.closed) return;
|
|
357
369
|
this.closed = true;
|
|
358
370
|
}
|
|
371
|
+
/**
|
|
372
|
+
* Reset all per-utterance state so the next ``processFrame`` starts from
|
|
373
|
+
* a clean SILENCE state.
|
|
374
|
+
*
|
|
375
|
+
* Called by the stream handler between agent turns to prevent a "stuck
|
|
376
|
+
* SPEECH" condition where PSTN echo / loopback kept the detector's
|
|
377
|
+
* probability above ``deactivationThreshold`` for the entire agent turn.
|
|
378
|
+
* Without this reset the next user utterance would never trigger a
|
|
379
|
+
* SILENCE→SPEECH transition and barge-in would feel "one-shot" (works
|
|
380
|
+
* once, then never again until the call ends).
|
|
381
|
+
*
|
|
382
|
+
* Safe to call any time including on a closed instance (no-op).
|
|
383
|
+
*/
|
|
384
|
+
reset() {
|
|
385
|
+
if (this.closed) return;
|
|
386
|
+
this.pending = new Float32Array(0);
|
|
387
|
+
this.pubSpeaking = false;
|
|
388
|
+
this.speechThresholdDuration = 0;
|
|
389
|
+
this.silenceThresholdDuration = 0;
|
|
390
|
+
this.expFilter.reset();
|
|
391
|
+
this.model.reset();
|
|
392
|
+
}
|
|
359
393
|
};
|
|
360
394
|
|
|
361
395
|
export {
|