getpatter 0.6.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ import {
2
+ MinWordsStrategy,
3
+ evaluateStrategies,
4
+ resetStrategies
5
+ } from "./chunk-D4424JZR.mjs";
6
+ import "./chunk-MVOQFAEO.mjs";
7
+ import "./chunk-N565J3CF.mjs";
8
+ export {
9
+ MinWordsStrategy,
10
+ evaluateStrategies,
11
+ resetStrategies
12
+ };
@@ -0,0 +1,71 @@
1
+ import {
2
+ getLogger
3
+ } from "./chunk-MVOQFAEO.mjs";
4
+ import {
5
+ init_esm_shims
6
+ } from "./chunk-N565J3CF.mjs";
7
+
8
+ // src/services/barge-in-strategies.ts
9
+ init_esm_shims();
10
+ var MinWordsStrategy = class {
11
+ minWords;
12
+ useInterim;
13
+ constructor(options) {
14
+ if (!Number.isFinite(options.minWords) || options.minWords < 1) {
15
+ throw new Error(
16
+ `minWords must be >= 1 (got ${String(options.minWords)})`
17
+ );
18
+ }
19
+ this.minWords = Math.floor(options.minWords);
20
+ this.useInterim = options.useInterim ?? true;
21
+ }
22
+ evaluate(ctx) {
23
+ if (ctx.isInterim && !this.useInterim) {
24
+ return false;
25
+ }
26
+ const threshold = ctx.agentSpeaking ? this.minWords : 1;
27
+ const wordCount = (ctx.transcript ?? "").trim().split(/\s+/).filter(Boolean).length;
28
+ return wordCount >= threshold;
29
+ }
30
+ async reset() {
31
+ }
32
+ };
33
+ async function evaluateStrategies(strategies, ctx) {
34
+ if (!strategies || strategies.length === 0) {
35
+ return false;
36
+ }
37
+ const safeCtx = {
38
+ transcript: ctx.transcript ?? "",
39
+ isInterim: ctx.isInterim,
40
+ agentSpeaking: ctx.agentSpeaking
41
+ };
42
+ for (const strategy of strategies) {
43
+ try {
44
+ const result = await strategy.evaluate(safeCtx);
45
+ if (result === true) return true;
46
+ } catch (err) {
47
+ getLogger().warn(
48
+ `BargeInStrategy ${strategy.constructor?.name ?? "unknown"} threw; treating as 'do not confirm': ${String(err)}`
49
+ );
50
+ }
51
+ }
52
+ return false;
53
+ }
54
+ async function resetStrategies(strategies) {
55
+ for (const strategy of strategies) {
56
+ if (typeof strategy.reset !== "function") continue;
57
+ try {
58
+ await strategy.reset();
59
+ } catch (err) {
60
+ getLogger().debug(
61
+ `BargeInStrategy ${strategy.constructor?.name ?? "unknown"}.reset() threw: ${String(err)}`
62
+ );
63
+ }
64
+ }
65
+ }
66
+
67
+ export {
68
+ MinWordsStrategy,
69
+ evaluateStrategies,
70
+ resetStrategies
71
+ };
@@ -174,6 +174,11 @@ var OnnxModel = class {
174
174
  const data = out.data;
175
175
  return data[0] ?? 0;
176
176
  }
177
+ /** Reset the RNN hidden state + rolling context to a fresh inference. */
178
+ reset() {
179
+ this.context = new Float32Array(this.contextSize);
180
+ this.rnnState = new Float32Array(2 * 1 * 128);
181
+ }
177
182
  };
178
183
  var SileroVAD = class _SileroVAD {
179
184
  constructor(model, opts) {
@@ -213,7 +218,11 @@ var SileroVAD = class _SileroVAD {
213
218
  const model = new OnnxModel(runtime, session, sampleRate);
214
219
  return new _SileroVAD(model, {
215
220
  minSpeechDuration: options.minSpeechDuration ?? 0.25,
216
- minSilenceDuration: options.minSilenceDuration ?? 0.1,
221
+ // Bumped 0.1 -> 0.4s after round 10f confirmed VAD speech_end fired on
222
+ // natural inter-sentence pauses < 250ms, causing double-talk dispatch.
223
+ // 400ms is the industry default for telephony and matches the new
224
+ // inter_utterance_gap_ms debounce in stream-handler.ts.
225
+ minSilenceDuration: options.minSilenceDuration ?? 0.4,
217
226
  prefixPaddingDuration: options.prefixPaddingDuration ?? 0.03,
218
227
  activationThreshold,
219
228
  deactivationThreshold,
@@ -233,7 +242,10 @@ var SileroVAD = class _SileroVAD {
233
242
  * - `activationThreshold = 0.5` — upstream `threshold`
234
243
  * - `deactivationThreshold = 0.35` — upstream `neg_threshold = threshold - 0.15`
235
244
  * - `minSpeechDuration = 0.25` — upstream `min_speech_duration_ms = 250`
236
- * - `minSilenceDuration = 0.1` — upstream `min_silence_duration_ms = 100`
245
+ * - `minSilenceDuration = 0.4` — telephony default (was 0.1, bumped after
246
+ * round 10f found speech_end firing on inter-sentence pauses < 250 ms,
247
+ * causing double-talk dispatch). 400 ms matches the industry telephony
248
+ * default and the inter_utterance_gap_ms debounce in stream-handler.ts.
237
249
  * - `prefixPaddingDuration = 0.03` — upstream `speech_pad_ms = 30`
238
250
  *
239
251
  * Override any field by passing `options`. Deployments that experience
@@ -356,6 +368,28 @@ var SileroVAD = class _SileroVAD {
356
368
  if (this.closed) return;
357
369
  this.closed = true;
358
370
  }
371
+ /**
372
+ * Reset all per-utterance state so the next ``processFrame`` starts from
373
+ * a clean SILENCE state.
374
+ *
375
+ * Called by the stream handler between agent turns to prevent a "stuck
376
+ * SPEECH" condition where PSTN echo / loopback kept the detector's
377
+ * probability above ``deactivationThreshold`` for the entire agent turn.
378
+ * Without this reset the next user utterance would never trigger a
379
+ * SILENCE→SPEECH transition and barge-in would feel "one-shot" (works
380
+ * once, then never again until the call ends).
381
+ *
382
+ * Safe to call any time including on a closed instance (no-op).
383
+ */
384
+ reset() {
385
+ if (this.closed) return;
386
+ this.pending = new Float32Array(0);
387
+ this.pubSpeaking = false;
388
+ this.speechThresholdDuration = 0;
389
+ this.silenceThresholdDuration = 0;
390
+ this.expFilter.reset();
391
+ this.model.reset();
392
+ }
359
393
  };
360
394
 
361
395
  export {