npm - @framers/agentos - Versions diffs - 0.1.108 → 0.1.110 - Mend

@framers/agentos 0.1.108 → 0.1.110

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/dist/voice-pipeline/AcousticEndpointDetector.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"AcousticEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;GAYG~~;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;~~AAgC~~/F,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E~~;;;;;;;;;;;;;GAaG~~;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;~~IAmBxD~~,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,SAAyC,EAAE;QACrD,KAAK,EAAE,CAAC;~~QA1BV,kBAAkB~~;~~QACF~~,SAAI,GAAG,UAAmB,CAAC;~~QAK3C;;;WAGG~~;QACK,sBAAiB,GAAkB,IAAI,CAAC;QAEhD~~;;;WAGG~~;QACK,oBAAe,GAAkB,IAAI,CAAC;~~QAY5C~~,MAAM,QAAQ,GAA0B;YACtC,2BAA2B,EAAE,MAAM,CAAC,2BAA2B,IAAI,IAAI;YACvE,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,IAAI,IAAI;SAChE,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,~~4EAA4E~~;~~QAC5E~~,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC,wBAAwB,EAAE,CAAC,kBAA0B,EAAE,EAAE;YAC/E,MAAM,UAAU,GACd,IAAI,CAAC,iBAAiB,KAAK,IAAI,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;gBAC9D,CAAC,CAAC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB;gBAC/C,CAAC,CAAC,CAAC,CAAC;YAER,MAAM,KAAK,GAAsB;gBAC/B,UAAU,EAAE,EAAE,~~EAAI,yCAAyC~~;~~gBAC3D~~,UAAU,EAAE,CAAC;gBACb,UAAU;gBACV,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,~~oBAAoB~~;~~IACpB~~,8EAA8E;IAE9E~~;;;;;;;;OAQG~~;IACI,YAAY,CAAC,KAAe;QACjC,~~2EAA2E~~;~~QAC3E~~,~~2DAA2D~~;~~QAC3D~~,MAAM,aAAa,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAW,CAAC;QAE9D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,IAAI,CAAC,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC;gBACzC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC5B,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;gBACtD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC1B,MAAM;YAER,KAAK,YAAY;gBACf,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,SAAS,CAAC;gBACvC,IAAI,CAAC,eAAe,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;gBACvD,MAAM;YAER,KAAK,SAAS;gBACZ,~~2DAA2D~~;~~gBAC3D~~,IAAI,CAAC,eAAe,CAAC,qBAAqB,CAAC,aAAa,CAAC,CAAC;gBAC1D,MAAM;QACV,CAAC;IACH,CAAC;IAED~~;;;;OAIG~~;IACI,cAAc,CAAC,MAAuB;QAC3C~~,+DAA+D~~;~~IACjE~~,CAAC;IAED~~;;OAEG~~;IACI,KAAK;QACV,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF"}
1	+ {"version":3,"file":"AcousticEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AA8C/F,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;IA+BxD,8EAA8E;IAC9E,cAAc;IACd,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,SAAyC,EAAE;QACrD,KAAK,EAAE,CAAC;QAxCV;;;WAGG;QACa,SAAI,GAAG,UAAmB,CAAC;QAQ3C;;;;;;WAMG;QACK,sBAAiB,GAAkB,IAAI,CAAC;QAEhD;;;;;;WAMG;QACK,oBAAe,GAAkB,IAAI,CAAC;QAc5C,uEAAuE;QACvE,MAAM,QAAQ,GAA0B;YACtC,2BAA2B,EAAE,MAAM,CAAC,2BAA2B,IAAI,IAAI;YACvE,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,IAAI,IAAI;SAChE,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,yEAAyE;QACzE,qEAAqE;QACrE,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC,wBAAwB,EAAE,CAAC,kBAA0B,EAAE,EAAE;YAC/E,iEAAiE;YACjE,yDAAyD;YACzD,MAAM,UAAU,GACd,IAAI,CAAC,iBAAiB,KAAK,IAAI,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;gBAC9D,CAAC,CAAC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB;gBAC/C,CAAC,CAAC,CAAC,CAAC;YAER,MAAM,KAAK,GAAsB;gBAC/B,kEAAkE;gBAClE,sEAAsE;gBACtE,UAAU,EAAE,EAAE;gBACd,wDAAwD;gBACxD,UAAU,EAAE,CAAC;gBACb,UAAU;gBACV,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,oCAAoC;IACpC,8EAA8E;IAE9E;;;;;;;;;;OAUG;IACI,YAAY,CAAC,KAAe;QACjC,wEAAwE;QACxE,sEAAsE;QACtE,mEAAmE;QACnE,uDAAuD;QACvD,MAAM,aAAa,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAW,CAAC;QAE9D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,oDAAoD;gBACpD,IAAI,CAAC,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC;gBACzC,mEAAmE;gBACnE,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC5B,6DAA6D;gBAC7D,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;gBACtD,2DAA2D;gBAC3D,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC1B,MAAM;YAER,KAAK,YAAY;gBACf,oDAAoD;gBACpD,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,SAAS,CAAC;gBACvC,wDAAwD;gBACxD,yEAAyE;gBACzE,0EAA0E;gBAC1E,IAAI,CAAC,eAAe,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;gBACvD,MAAM;YAER,KAAK,SAAS;gBACZ,mEAAmE;gBACnE,0DAA0D;gBAC1D,IAAI,CAAC,eAAe,CAAC,qBAAqB,CAAC,aAAa,CAAC,CAAC;gBAC1D,MAAM;QACV,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,sCAAsC;IACtC,8EAA8E;IAE9E;;;;;;;OAOG;IACI,cAAc,CAAC,MAAuB;QAC3C,mEAAmE;QACnE,mEAAmE;QACnE,iCAAiC;IACnC,CAAC;IAED,8EAA8E;IAC9E,6BAA6B;IAC7B,8EAA8E;IAE9E;;;;;;OAMG;IACI,KAAK;QACV,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF"}

package/dist/voice-pipeline/HardCutBargeinHandler.d.ts CHANGED Viewed

@@ -2,21 +2,59 @@
  * @module voice-pipeline/HardCutBargeinHandler
  *
  * Implements a hard-cut barge-in policy: when the user speaks over TTS output
- * for at least `minSpeechMs` milliseconds, playback is stopped immediately with
- * no fade-out. Short detections below the threshold are treated as accidental
- * noise and ignored.
+ * for at least {@link HardCutBargeinHandlerOptions.minSpeechMs} milliseconds,
+ * playback is stopped immediately with no fade-out. Short detections below the
+ * threshold are treated as accidental noise and ignored.
+ *
+ * ## Why 300 ms default threshold?
+ *
+ * The 300 ms threshold was chosen to filter out common non-speech audio events
+ * that trigger false barge-in detections:
+ *
+ * - **Lip smacks**: Typically 50-150 ms of energy.
+ * - **Breaths/sighs**: Typically 100-250 ms of energy.
+ * - **Coughs/sneezes**: Short burst 100-200 ms, but may exceed threshold.
+ * - **Background noise spikes**: Door closing, keyboard typing -- usually < 200 ms.
+ *
+ * At 300 ms, a detection almost certainly represents intentional speech rather
+ * than ambient noise. Lowering to < 200 ms increases false positives significantly
+ * in noisy environments. Raising to > 500 ms adds noticeable delay before the
+ * agent acknowledges the interruption.
+ *
+ * ## When to use hard-cut vs soft-fade
+ *
+ * Use hard-cut when:
+ * - The conversation style is fast-paced (e.g. customer support).
+ * - Users expect immediate response to interruption.
+ * - Audio quality is high (fewer false positives).
+ *
+ * Use {@link SoftFadeBargeinHandler} when:
+ * - The conversation is more measured (e.g. storytelling, education).
+ * - Users may accidentally trigger barge-in (noisy environment).
+ * - A smoother audio experience is preferred.
+ *
+ * @see {@link SoftFadeBargeinHandler} for the three-tier soft-fade alternative.
+ * @see {@link IBargeinHandler} for the interface contract.
  */
 import type { BargeinAction, BargeinContext, IBargeinHandler } from './types.js';
 /**
  * Construction options for {@link HardCutBargeinHandler}.
+ *
+ * @example
+ * ```typescript
+ * const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
+ * ```
  */
 export interface HardCutBargeinHandlerOptions {
     /**
      * Minimum confirmed speech duration (in milliseconds) required before a
      * barge-in is treated as intentional. Detections shorter than this value are
-     * returned as `{ type: 'ignore' }` to avoid reacting to background noise.
+     * returned as `{ type: 'ignore' }` to avoid reacting to background noise,
+     * lip smacks, breaths, or other brief non-speech audio events.
      *
      * @defaultValue 300
+     *
+     * @see Module-level documentation for rationale behind the 300 ms default.
      */
     minSpeechMs?: number;
 }
@@ -24,26 +62,38 @@ export interface HardCutBargeinHandlerOptions {
  * Barge-in handler that applies a hard-cut strategy.
  *
  * When the user speaks over an active TTS stream, this handler immediately
- * cancels playback if the detected speech exceeds `minSpeechMs`. Below that
- * threshold the interruption is considered noise and playback continues
+ * cancels playback if the detected speech exceeds {@link minSpeechMs}. Below
+ * that threshold the interruption is considered noise and playback continues
  * uninterrupted.
  *
+ * The handler is stateless -- each {@link handleBargein} call is evaluated
+ * independently with no memory of previous barge-in events.
+ *
+ * @see {@link IBargeinHandler} for the interface contract.
+ * @see {@link SoftFadeBargeinHandler} for the three-tier alternative.
+ *
  * @example
- * ```ts
+ * ```typescript
  * const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
- * const action = handler.handleBargein({ speechDurationMs: 400, ... });
- * // action.type === 'cancel'
+ *
+ * // Short noise -> ignored
+ * handler.handleBargein({ speechDurationMs: 100, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'ignore' }
+ *
+ * // Intentional speech -> cancel
+ * handler.handleBargein({ speechDurationMs: 400, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'cancel', injectMarker: '[interrupted]' }
  * ```
  */
 export declare class HardCutBargeinHandler implements IBargeinHandler {
     /**
      * The interruption strategy implemented by this handler.
-     * Always `'hard-cut'`.
+     * Always `'hard-cut'` -- playback is stopped instantly with no fade.
      */
     readonly mode: "hard-cut";
     /**
      * Minimum speech duration in milliseconds before the interruption is
-     * considered intentional.
+     * considered intentional. Set once at construction and never changed.
      */
     private readonly minSpeechMs;
     /**
@@ -55,12 +105,13 @@ export declare class HardCutBargeinHandler implements IBargeinHandler {
     /**
      * Evaluate the barge-in context and return the action the pipeline should take.
      *
-     * - If `context.speechDurationMs >= minSpeechMs`, returns
-     *   `{ type: 'cancel', injectMarker: '[interrupted]' }` to immediately halt TTS.
-     * - Otherwise returns `{ type: 'ignore' }` to continue playback.
+     * Decision logic (binary threshold):
+     * - `speechDurationMs >= minSpeechMs` -> Cancel TTS immediately and inject
+     *   an `'[interrupted]'` marker into the conversation context.
+     * - `speechDurationMs < minSpeechMs` -> Ignore the detection as noise.
      *
      * @param context - Snapshot of the barge-in state at the moment of detection.
-     * @returns The pipeline action to execute.
+     * @returns The pipeline action to execute. Always synchronous (no Promise).
      */
     handleBargein(context: BargeinContext): BargeinAction;
 }

package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"HardCutBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA~~;;;;;;;GAOG~~;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;~~AAEjF;;GAEG~~;AACH,MAAM,WAAW,4BAA4B;IAC3C~~;;;;;;OAMG~~;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;~~AAED;;;;;;;;;;;;;;GAcG~~;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,UAAU,CAAU;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;;OAIG;gBACS,OAAO,GAAE,4BAAiC;IAItD~~;;;;;;;;;OASG~~;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;~~CAMtD~~"}
1	+ {"version":3,"file":"HardCutBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAMjF;;;;;;;GAOG;AACH,MAAM,WAAW,4BAA4B;IAC3C;;;;;;;;;OASG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,UAAU,CAAU;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;;OAIG;gBACS,OAAO,GAAE,4BAAiC;IAItD;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;CAatD"}

package/dist/voice-pipeline/HardCutBargeinHandler.js CHANGED Viewed

@@ -2,23 +2,68 @@
  * @module voice-pipeline/HardCutBargeinHandler
  *
  * Implements a hard-cut barge-in policy: when the user speaks over TTS output
- * for at least `minSpeechMs` milliseconds, playback is stopped immediately with
- * no fade-out. Short detections below the threshold are treated as accidental
- * noise and ignored.
+ * for at least {@link HardCutBargeinHandlerOptions.minSpeechMs} milliseconds,
+ * playback is stopped immediately with no fade-out. Short detections below the
+ * threshold are treated as accidental noise and ignored.
+ *
+ * ## Why 300 ms default threshold?
+ *
+ * The 300 ms threshold was chosen to filter out common non-speech audio events
+ * that trigger false barge-in detections:
+ *
+ * - **Lip smacks**: Typically 50-150 ms of energy.
+ * - **Breaths/sighs**: Typically 100-250 ms of energy.
+ * - **Coughs/sneezes**: Short burst 100-200 ms, but may exceed threshold.
+ * - **Background noise spikes**: Door closing, keyboard typing -- usually < 200 ms.
+ *
+ * At 300 ms, a detection almost certainly represents intentional speech rather
+ * than ambient noise. Lowering to < 200 ms increases false positives significantly
+ * in noisy environments. Raising to > 500 ms adds noticeable delay before the
+ * agent acknowledges the interruption.
+ *
+ * ## When to use hard-cut vs soft-fade
+ *
+ * Use hard-cut when:
+ * - The conversation style is fast-paced (e.g. customer support).
+ * - Users expect immediate response to interruption.
+ * - Audio quality is high (fewer false positives).
+ *
+ * Use {@link SoftFadeBargeinHandler} when:
+ * - The conversation is more measured (e.g. storytelling, education).
+ * - Users may accidentally trigger barge-in (noisy environment).
+ * - A smoother audio experience is preferred.
+ *
+ * @see {@link SoftFadeBargeinHandler} for the three-tier soft-fade alternative.
+ * @see {@link IBargeinHandler} for the interface contract.
  */
+// ---------------------------------------------------------------------------
+// Implementation
+// ---------------------------------------------------------------------------
 /**
  * Barge-in handler that applies a hard-cut strategy.
  *
  * When the user speaks over an active TTS stream, this handler immediately
- * cancels playback if the detected speech exceeds `minSpeechMs`. Below that
- * threshold the interruption is considered noise and playback continues
+ * cancels playback if the detected speech exceeds {@link minSpeechMs}. Below
+ * that threshold the interruption is considered noise and playback continues
  * uninterrupted.
  *
+ * The handler is stateless -- each {@link handleBargein} call is evaluated
+ * independently with no memory of previous barge-in events.
+ *
+ * @see {@link IBargeinHandler} for the interface contract.
+ * @see {@link SoftFadeBargeinHandler} for the three-tier alternative.
+ *
  * @example
- * ```ts
+ * ```typescript
  * const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
- * const action = handler.handleBargein({ speechDurationMs: 400, ... });
- * // action.type === 'cancel'
+ *
+ * // Short noise -> ignored
+ * handler.handleBargein({ speechDurationMs: 100, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'ignore' }
+ *
+ * // Intentional speech -> cancel
+ * handler.handleBargein({ speechDurationMs: 400, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'cancel', injectMarker: '[interrupted]' }
  * ```
  */
 export class HardCutBargeinHandler {
@@ -30,7 +75,7 @@ export class HardCutBargeinHandler {
     constructor(options = {}) {
         /**
          * The interruption strategy implemented by this handler.
-         * Always `'hard-cut'`.
+         * Always `'hard-cut'` -- playback is stopped instantly with no fade.
          */
         this.mode = 'hard-cut';
         this.minSpeechMs = options.minSpeechMs ?? 300;
@@ -38,17 +83,24 @@ export class HardCutBargeinHandler {
     /**
      * Evaluate the barge-in context and return the action the pipeline should take.
      *
-     * - If `context.speechDurationMs >= minSpeechMs`, returns
-     *   `{ type: 'cancel', injectMarker: '[interrupted]' }` to immediately halt TTS.
-     * - Otherwise returns `{ type: 'ignore' }` to continue playback.
+     * Decision logic (binary threshold):
+     * - `speechDurationMs >= minSpeechMs` -> Cancel TTS immediately and inject
+     *   an `'[interrupted]'` marker into the conversation context.
+     * - `speechDurationMs < minSpeechMs` -> Ignore the detection as noise.
      *
      * @param context - Snapshot of the barge-in state at the moment of detection.
-     * @returns The pipeline action to execute.
+     * @returns The pipeline action to execute. Always synchronous (no Promise).
      */
     handleBargein(context) {
         if (context.speechDurationMs >= this.minSpeechMs) {
+            // Speech duration meets the threshold -> intentional interruption.
+            // The '[interrupted]' marker is injected into the conversation history
+            // so the agent knows its previous response was cut short and can avoid
+            // repeating the interrupted content.
             return { type: 'cancel', injectMarker: '[interrupted]' };
         }
+        // Below threshold -> likely noise, lip smack, or breath.
+        // Continue TTS playback as if nothing happened.
         return { type: 'ignore' };
     }
 }

package/dist/voice-pipeline/HardCutBargeinHandler.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"HardCutBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA~~;;;;;;;GAOG~~;~~AAkBH;;;;;;;;;;;;;;GAcG~~;AACH,MAAM,OAAO,qBAAqB;IAahC;;;;OAIG;IACH,YAAY,UAAwC,EAAE;QAjBtD;;;WAGG;QACM,SAAI,GAAG,UAAmB,CAAC;QAclC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;IAChD,CAAC;IAED~~;;;;;;;;;OASG~~;IACH,aAAa,CAAC,OAAuB;QACnC,IAAI,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC;QAC3D,CAAC;~~QACD~~,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;CACF"}
1	+ {"version":3,"file":"HardCutBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AA8BH,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,OAAO,qBAAqB;IAahC;;;;OAIG;IACH,YAAY,UAAwC,EAAE;QAjBtD;;;WAGG;QACM,SAAI,GAAG,UAAmB,CAAC;QAclC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;IAChD,CAAC;IAED;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAuB;QACnC,IAAI,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjD,mEAAmE;YACnE,uEAAuE;YACvE,uEAAuE;YACvE,qCAAqC;YACrC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC;QAC3D,CAAC;QAED,yDAAyD;QACzD,gDAAgD;QAChD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;CACF"}

package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts CHANGED Viewed

@@ -6,24 +6,46 @@
  * finished speaking. Suitable for low-latency deployments where an LLM-based
  * semantic detector would add unacceptable round-trip overhead.
  *
- * Detection strategy:
- *   1. On `speech_end`, if the accumulated final transcript ends with `.`, `?`, or `!`,
- *      fire `turn_complete` immediately with reason `'punctuation'`.
- *   2. Otherwise, start a silence timer (default 1 500 ms). If speech does not
- *      resume before the timer fires, emit `turn_complete` with reason `'silence_timeout'`.
- *   3. Backchannel phrases (e.g. "uh huh", "yeah") are recognised, suppressed from
- *      accumulation, and re-emitted as `'backchannel_detected'` events so the
- *      pipeline can decide whether to suppress an agent response.
+ * ## Detection strategy
+ *
+ * 1. On `speech_end`, if the accumulated final transcript ends with `.`, `?`,
+ *    or `!`, fire `turn_complete` immediately with reason `'punctuation'`.
+ *    This provides the lowest-latency turn handoff for well-punctuated speech.
+ *
+ * 2. Otherwise, start a silence timer (default 1,500 ms). If speech does not
+ *    resume before the timer fires, emit `turn_complete` with reason
+ *    `'silence_timeout'`. The timeout acts as a safety net for STT providers
+ *    that don't produce terminal punctuation reliably.
+ *
+ * 3. Backchannel phrases (e.g. "uh huh", "yeah") are recognised, suppressed
+ *    from accumulation, and re-emitted as `'backchannel_detected'` events so
+ *    the pipeline can decide whether to suppress an agent response.
+ *
+ * ## Why heuristic over acoustic-only?
+ *
+ * Pure silence timeout adds up to 1.5 s of unnecessary latency on every turn
+ * when the user ends a sentence cleanly. By checking for terminal punctuation,
+ * this detector can fire turn_complete immediately, cutting perceived latency
+ * by more than half for typical conversational speech.
+ *
+ * @see {@link AcousticEndpointDetector} for the purely acoustic alternative.
+ * @see {@link IEndpointDetector} for the interface contract.
  */
 import { EventEmitter } from 'node:events';
 import type { IEndpointDetector, TranscriptEvent, VadEvent } from './types.js';
 /**
  * Constructor options for {@link HeuristicEndpointDetector}.
+ *
+ * @example
+ * ```typescript
+ * const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
+ * ```
  */
 export interface HeuristicEndpointDetectorOptions {
     /**
      * How long (ms) to wait after `speech_end` before emitting `turn_complete`
-     * when no terminal punctuation is detected.
+     * when no terminal punctuation is detected. Lower values reduce latency
+     * but risk firing mid-sentence during natural pauses.
      * @defaultValue 1500
      */
     silenceTimeoutMs?: number;
@@ -32,96 +54,148 @@ export interface HeuristicEndpointDetectorOptions {
  * Heuristic endpoint detector that uses terminal punctuation and a silence
  * timeout to decide when the user's turn is complete.
  *
- * Emits:
- * - `'turn_complete'` ({@link TurnCompleteEvent}) — user turn has ended.
- * - `'backchannel_detected'` (`{ text: string }`) — a backchannel phrase was
- *   recognised; accumulation is suppressed for this utterance.
+ * ## Events emitted
+ *
+ * | Event                    | Payload                  | Description                        |
+ * |--------------------------|--------------------------|------------------------------------|
+ * | `'turn_complete'`        | {@link TurnCompleteEvent}| User turn has ended.               |
+ * | `'backchannel_detected'` | `{ text: string }`       | Backchannel phrase was recognised.  |
+ *
+ * @see {@link IEndpointDetector} for the interface contract.
+ * @see {@link AcousticEndpointDetector} for the purely acoustic alternative.
  *
  * @example
  * ```typescript
  * const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
  * detector.on('turn_complete', (event) => console.log('Turn done:', event));
+ *
+ * // Simulate a punctuated sentence followed by speech_end
  * detector.pushTranscript({ text: 'Hello there.', isFinal: true, confidence: 0.95, words: [] });
  * detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now(), source: 'vad' });
- * // → 'turn_complete' fires immediately with reason 'punctuation'
+ * // -> 'turn_complete' fires immediately with reason 'punctuation'
  * ```
  */
 export declare class HeuristicEndpointDetector extends EventEmitter implements IEndpointDetector {
     /**
      * Active detection strategy label.
-     * Typed as `'hybrid'` to satisfy {@link IEndpointDetector.mode}; consumers
-     * that need to distinguish heuristic detectors may inspect `instanceof`.
+     * Always `'heuristic'` for this implementation.
+     *
+     * @see {@link IEndpointDetector.mode}
      */
     readonly mode: IEndpointDetector['mode'];
     /** Resolved silence timeout in milliseconds. */
     private readonly silenceTimeoutMs;
-    /** The latest final transcript text accumulated for the current turn. */
+    /**
+     * The latest final transcript text accumulated for the current turn.
+     * Only updated by final (non-interim) transcript events.
+     * Reset to empty string after each `turn_complete` emission.
+     */
     private accumulatedText;
-    /** Whether the VAD currently reports active speech. */
+    /**
+     * Whether the VAD currently reports active speech. Set to `true` on
+     * `speech_start` and `false` on `speech_end`. Used to prevent the
+     * silence timer from starting while the user is still speaking.
+     */
     private speechActive;
-    /** Handle to a pending silence timeout, or `null` if none is running. */
+    /**
+     * Handle to a pending silence timeout, or `null` if none is running.
+     * Cleared when speech resumes or when the detector is reset.
+     */
     private silenceTimer;
-    /** Wall-clock timestamp (ms) when the current turn's speech started. */
+    /**
+     * Wall-clock timestamp (ms) when the current turn's speech started.
+     * Used to compute `durationMs` in the emitted {@link TurnCompleteEvent}.
+     * `null` when no speech has been detected in the current turn.
+     */
     private turnStartMs;
-    /** Confidence of the most recent final transcript. */
+    /**
+     * Confidence of the most recent final transcript. Forwarded into the
+     * emitted {@link TurnCompleteEvent}. Defaults to 1 (perfect confidence)
+     * and is updated with each final transcript event.
+     */
     private lastConfidence;
     /**
      * Create a new {@link HeuristicEndpointDetector}.
      *
-     * @param options — Optional configuration overrides.
+     * @param options - Optional configuration overrides.
      */
     constructor(options?: HeuristicEndpointDetectorOptions);
     /**
      * Ingest a transcript event from the upstream STT session.
      *
      * Only final events (`isFinal: true`) affect internal state. Interim results
-     * are silently ignored — they may arrive very frequently and their text is
-     * unstable.
+     * are silently ignored because:
+     * 1. They arrive very frequently (10-50 per second) and would trigger
+     *    excessive punctuation checks.
+     * 2. Their text is unstable -- a word ending with "." may be revised in
+     *    the next interim result, causing false turn-completion signals.
      *
-     * If the final text is a recognised backchannel phrase the detector emits
-     * `'backchannel_detected'` and returns without accumulating the text, so that
-     * a subsequent `speech_end` event does not trigger `turn_complete`.
+     * If the final text is a recognised backchannel phrase, the detector emits
+     * `'backchannel_detected'` and returns WITHOUT accumulating the text. This
+     * prevents a subsequent `speech_end` event from triggering `turn_complete`
+     * for what was merely an acknowledgement, not a real conversational turn.
      *
-     * @param transcript — Transcript event from the STT session.
+     * @param transcript - Transcript event from the STT session.
      */
     pushTranscript(transcript: TranscriptEvent): void;
     /**
      * Ingest a VAD (voice activity detection) event.
      *
-     * - `speech_start`: marks the turn as active and cancels any pending silence
-     *   timer (the user resumed speaking before the timeout elapsed).
-     * - `speech_end`: if accumulated text is available, either fires
-     *   `turn_complete` immediately (punctuation) or starts the silence timer.
-     * - `silence`: heartbeat events are ignored; only explicit `speech_end`
-     *   drives the timeout logic.
+     * Event handling by type:
+     *
+     * - **`speech_start`**: Marks the turn as active and cancels any pending
+     *   silence timer (the user resumed speaking before the timeout elapsed).
+     *   This is critical for avoiding false turn-completion when the user
+     *   takes a brief pause mid-sentence.
+     *
+     * - **`speech_end`**: If accumulated text is available, either fires
+     *   `turn_complete` immediately (when text ends with terminal punctuation)
+     *   or starts the silence timer (when no punctuation is detected).
+     *
+     * - **`silence`**: Periodic heartbeat events are ignored. The silence timer
+     *   (started on `speech_end`) already handles delayed turn-completion
+     *   independently of heartbeat cadence.
      *
-     * @param event — VAD transition event.
+     * @param event - VAD transition event.
      */
     pushVadEvent(event: VadEvent): void;
     /**
      * Reset all internal state, cancel pending timers, and prepare the detector
-     * for the next user turn. Should be called by the pipeline after each
-     * `turn_complete` event before audio for the next turn begins to arrive.
+     * for the next user turn.
+     *
+     * Called by the pipeline after each `turn_complete` event (both internally
+     * and by the orchestrator's flush_complete handler) to ensure clean state
+     * before audio for the next turn begins to arrive.
      */
     reset(): void;
     /**
      * Emit `turn_complete` with the currently accumulated transcript and then
      * reset internal state so the detector is ready for the next turn.
      *
-     * @param reason — The semantic reason driving this completion.
-     * @param speechEndTimestamp — Unix epoch ms timestamp of the `speech_end` event,
-     *   used to compute `durationMs`.
+     * The reset happens BEFORE the emit to ensure that any re-entrant listeners
+     * (e.g. an endpoint detector handler that immediately calls pushVadEvent)
+     * see clean state.
+     *
+     * @param reason - The semantic reason driving this completion.
+     * @param speechEndTimestamp - Unix epoch ms timestamp of the `speech_end` event,
+     *   used to compute `durationMs` as `speechEndTimestamp - turnStartMs`.
      */
     private _emitTurnComplete;
     /**
      * Start the silence-timeout timer. If the user does not resume speaking
-     * within {@link silenceTimeoutMs} ms the detector fires `turn_complete`.
+     * within {@link silenceTimeoutMs} ms, the detector fires `turn_complete`
+     * with reason `'silence_timeout'`.
+     *
+     * Any previously running silence timer is cleared first to prevent
+     * double-fires from rapid speech_end -> speech_start -> speech_end sequences.
      *
-     * @param speechEndTimestamp — Timestamp passed through to `_emitTurnComplete`.
+     * @param speechEndTimestamp - Timestamp passed through to {@link _emitTurnComplete}
+     *   for duration calculation.
      */
     private _startSilenceTimer;
     /**
      * Cancel a pending silence timer without any side effects.
+     * Safe to call when no timer is active (no-op).
      */
     private _clearSilenceTimer;
 }

package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"HeuristicEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;;;;;GAgBG~~;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,QAAQ,EAET,MAAM,YAAY,CAAC;~~AAwCpB;;GAEG~~;AACH,MAAM,WAAW,gCAAgC;IAC/C~~;;;;OAIG~~;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD~~;;;;;;;;;;;;;;;;;GAiBG~~;AACH,qBAAa,yBACX,SAAQ,YACR,YAAW,iBAAiB;IAE5B~~;;;;OAIG~~;IACH,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAe;IAEvD,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAE1C~~,yEAAyE~~;~~IACzE~~,OAAO,CAAC,eAAe,CAAM;IAE7B~~,uDAAuD~~;~~IACvD~~,OAAO,CAAC,YAAY,CAAS;IAE7B~~,yEAAyE~~;~~IACzE~~,OAAO,CAAC,YAAY,CAA8C;IAElE~~,wEAAwE~~;~~IACxE~~,OAAO,CAAC,WAAW,CAAuB;IAE1C~~,sDAAsD~~;~~IACtD~~,OAAO,CAAC,cAAc,CAAK;IAM3B;;;;OAIG;gBACS,OAAO,GAAE,gCAAqC;IAS1D~~;;;;;;;;;;;;OAYG~~;IACH,cAAc,CAAC,UAAU,EAAE,eAAe,GAAG,IAAI;~~IAwBjD;;;;;;;;;;;OAWG~~;IACH,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;~~IAyCnC;;;;OAIG~~;IACH,KAAK,IAAI,IAAI;IAYb~~;;;;;;;OAOG~~;IACH,OAAO,CAAC,iBAAiB;~~IAoBzB;;;;;OAKG~~;IACH,OAAO,CAAC,kBAAkB;IAQ1B~~;;OAEG~~;IACH,OAAO,CAAC,kBAAkB;CAM3B"}
1	+ {"version":3,"file":"HeuristicEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,QAAQ,EAET,MAAM,YAAY,CAAC;AAkEpB;;;;;;;GAOG;AACH,MAAM,WAAW,gCAAgC;IAC/C;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,qBAAa,yBACX,SAAQ,YACR,YAAW,iBAAiB;IAE5B;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAe;IAEvD,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAE1C;;;;OAIG;IACH,OAAO,CAAC,eAAe,CAAM;IAE7B;;;;OAIG;IACH,OAAO,CAAC,YAAY,CAAS;IAE7B;;;OAGG;IACH,OAAO,CAAC,YAAY,CAA8C;IAElE;;;;OAIG;IACH,OAAO,CAAC,WAAW,CAAuB;IAE1C;;;;OAIG;IACH,OAAO,CAAC,cAAc,CAAK;IAM3B;;;;OAIG;gBACS,OAAO,GAAE,gCAAqC;IAS1D;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,UAAU,EAAE,eAAe,GAAG,IAAI;IA4BjD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAgDnC;;;;;;;OAOG;IACH,KAAK,IAAI,IAAI;IAYb;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,iBAAiB;IAsBzB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAM3B"}