npm - @framers/agentos - Versions diffs - 0.1.108 → 0.1.109 - Mend

@framers/agentos 0.1.108 → 0.1.109

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/dist/voice-pipeline/AcousticEndpointDetector.d.ts CHANGED Viewed

@@ -6,27 +6,74 @@
  * and relies solely on the duration of post-speech silence to decide when the user
  * has finished speaking.
  *
- * Emits:
- * - `'turn_complete'` ({@link TurnCompleteEvent}) — silence exceeded the configured
- *   `utteranceEndThresholdMs` after the most recent `speech_end` VAD event.
- * - `'speech_start'` () — re-emitted when a `speech_start` VAD event is received.
+ * ## How it works
+ *
+ * This detector delegates all silence timing to a {@link SilenceDetector} instance
+ * (from `core/audio/`). The SilenceDetector maintains an internal timer that
+ * starts when `handleSpeechEnd()` is called and fires `'utterance_end_detected'`
+ * when silence exceeds the configured `utteranceEndThresholdMs`. A
+ * `handleSpeechStart()` call cancels the timer.
+ *
+ * ## Energy threshold adaptation
+ *
+ * The SilenceDetector internally uses adaptive energy thresholds from the
+ * {@link AdaptiveVAD}. The VAD continuously recalibrates its speech/silence
+ * boundary based on ambient noise levels, so the effective silence threshold
+ * adapts to the environment (e.g. coffee shop vs quiet room). This detector
+ * does not perform its own energy analysis -- it trusts the upstream VAD's
+ * speech_start/speech_end decisions.
+ *
+ * ## When to use
+ *
+ * Use this detector when:
+ * - The STT provider does not produce reliable punctuation.
+ * - You want the simplest possible endpoint detection with no linguistic analysis.
+ * - Latency tolerance is higher (the full `utteranceEndThresholdMs` is always
+ *   consumed, unlike the {@link HeuristicEndpointDetector} which can fire
+ *   immediately on terminal punctuation).
+ *
+ * @see {@link HeuristicEndpointDetector} for the rule-based alternative with
+ *   punctuation-triggered fast path.
+ * @see {@link IEndpointDetector} for the interface contract.
+ * @see {@link SilenceDetector} for the underlying silence timing logic.
+ *
+ * ## Events emitted
+ *
+ * | Event             | Payload                  | Description                                 |
+ * |-------------------|--------------------------|---------------------------------------------|
+ * | `'turn_complete'` | {@link TurnCompleteEvent} | Silence exceeded `utteranceEndThresholdMs`. |
+ * | `'speech_start'`  | *(none)*                 | Re-emitted from incoming VAD event.         |
  */
 import { EventEmitter } from 'node:events';
 import type { IEndpointDetector, VadEvent, TranscriptEvent } from './types.js';
 /**
  * Constructor options for {@link AcousticEndpointDetector}.
+ *
+ * @example
+ * ```typescript
+ * const detector = new AcousticEndpointDetector({
+ *   significantPauseThresholdMs: 1000,
+ *   utteranceEndThresholdMs: 2000,
+ * });
+ * ```
  */
 export interface AcousticEndpointDetectorConfig {
     /**
      * Silence duration after speech (ms) that triggers a "significant pause"
      * notification on the underlying {@link SilenceDetector}. Does not directly
-     * cause `turn_complete` to fire, but is forwarded to the SilenceDetector.
+     * cause `turn_complete` to fire, but can be used by other pipeline components
+     * to show a "thinking" indicator.
      * @defaultValue 1500
      */
     significantPauseThresholdMs?: number;
     /**
      * Silence duration after speech (ms) that triggers `turn_complete` with
-     * `reason: 'silence_timeout'`.
+     * `reason: 'silence_timeout'`. This is the primary tuning knob for how
+     * long the pipeline waits after the user stops speaking.
+     *
+     * - Lower values (1000-2000 ms): Faster response, but may fire during natural pauses.
+     * - Higher values (3000-5000 ms): More tolerant of pauses, but feels sluggish.
+     *
      * @defaultValue 3000
      */
     utteranceEndThresholdMs?: number;
@@ -38,26 +85,45 @@ export interface AcousticEndpointDetectorConfig {
  * `speech_end` events start the silence clock; `speech_start` events cancel
  * any pending turn-complete emission. Transcript content is completely ignored.
  *
+ * @see {@link IEndpointDetector} for the interface contract.
+ * @see {@link HeuristicEndpointDetector} for the heuristic alternative.
+ *
  * @example
- * ```ts
+ * ```typescript
  * const detector = new AcousticEndpointDetector({ utteranceEndThresholdMs: 2000 });
- * detector.on('turn_complete', (event) => console.log('Turn done:', event));
- * detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() });
+ * detector.on('turn_complete', (event) => {
+ *   console.log(`Turn done after ${event.durationMs}ms of speech`);
+ * });
+ * detector.pushVadEvent({ type: 'speech_start', timestamp: Date.now() });
+ * detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() + 500 });
+ * // -> After 2000ms of silence, 'turn_complete' fires with reason 'silence_timeout'
  * ```
  */
 export declare class AcousticEndpointDetector extends EventEmitter implements IEndpointDetector {
-    /** @inheritdoc */
+    /**
+     * Detection mode identifier. Always `'acoustic'` for this implementation.
+     * @see {@link IEndpointDetector.mode}
+     */
     readonly mode: "acoustic";
-    /** Underlying silence-duration tracker. */
+    /**
+     * Underlying silence-duration tracker from `core/audio/`.
+     * Handles the actual timer management and threshold comparison.
+     */
     private readonly silenceDetector;
     /**
-     * Timestamp (ms) when the current speech segment began. Tracked so that
-     * `durationMs` in the emitted {@link TurnCompleteEvent} can be computed.
+     * Timestamp (ms) when the current speech segment began. Used to compute
+     * `durationMs` in the emitted {@link TurnCompleteEvent} as:
+     * `speechEndTimeMs - speechStartTimeMs`.
+     *
+     * Reset to `null` on each {@link reset} call.
      */
     private speechStartTimeMs;
     /**
      * Timestamp (ms) when the most recent `speech_end` VAD event was received.
-     * Used to calculate `durationMs` for the turn-complete event.
+     * Used together with {@link speechStartTimeMs} to calculate `durationMs`
+     * for the turn-complete event.
+     *
+     * Reset to `null` on each {@link reset} call.
      */
     private speechEndTimeMs;
     /**
@@ -69,21 +135,30 @@ export declare class AcousticEndpointDetector extends EventEmitter implements IE
     /**
      * Converts a {@link VadEvent} into the SilenceDetector's expected API calls.
      *
-     * - `speech_start` — resets silence state and re-emits `'speech_start'` on self.
-     * - `speech_end` — starts the silence clock.
-     * - `silence` — treated as ongoing non-speech frames.
+     * - **`speech_start`**: Resets silence state (cancels pending timers) and
+     *   re-emits `'speech_start'` on this detector for pipeline consumption.
+     * - **`speech_end`**: Records the timestamp and starts the silence clock.
+     * - **`silence`**: Treated as ongoing non-speech frames, advancing the
+     *   SilenceDetector's internal timer.
      *
-     * @param event - Incoming VAD event.
+     * @param event - Incoming VAD event from the upstream voice activity detector.
      */
     pushVadEvent(event: VadEvent): void;
     /**
-     * No-op — this detector is purely acoustic and does not use transcript content.
+     * No-op -- this detector is purely acoustic and does not use transcript content.
+     *
+     * The method exists solely to satisfy the {@link IEndpointDetector} interface.
+     * Calling it has no effect and does not throw.
      *
      * @param _event - Ignored transcript event.
      */
     pushTranscript(_event: TranscriptEvent): void;
     /**
-     * Resets all internal state and timers. Call at the start of each new turn.
+     * Resets all internal state and cancels pending timers.
+     *
+     * Should be called at the start of each new turn to ensure clean state.
+     * This also resets the underlying SilenceDetector, cancelling any pending
+     * utterance_end_detected timer.
      */
     reset(): void;
 }

package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"AcousticEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;GAYG~~;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EACV,iBAAiB,EACjB,QAAQ,EACR,eAAe,EAEhB,MAAM,YAAY,CAAC;AAMpB~~;;GAEG~~;AACH,MAAM,WAAW,8BAA8B;IAC7C~~;;;;;OAKG~~;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC~~;;;;OAIG~~;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;CAClC;AAMD~~;;;;;;;;;;;;;GAaG~~;AACH,qBAAa,wBAAyB,SAAQ,YAAa,YAAW,iBAAiB;IACrF~~,kBAAkB~~;~~IAClB~~,SAAgB,IAAI,EAAG,UAAU,CAAU;IAE3C~~,2CAA2C~~;~~IAC3C~~,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAElD~~;;;OAGG~~;IACH,OAAO,CAAC,iBAAiB,CAAuB;IAEhD~~;;;OAGG~~;IACH,OAAO,CAAC,eAAe,CAAuB;~~IAI9C~~;;;;OAIG;gBACS,MAAM,GAAE,8BAAmC;~~IAgCvD;;;;;;;;OAQG~~;IACI,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;~~IAyB1C;;;;OAIG~~;IACI,cAAc,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI;~~IAIpD;;OAEG~~;IACI,KAAK,IAAI,IAAI;CAKrB"}
1	+ {"version":3,"file":"AcousticEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,KAAK,EACV,iBAAiB,EACjB,QAAQ,EACR,eAAe,EAEhB,MAAM,YAAY,CAAC;AAMpB;;;;;;;;;;GAUG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;;;;;OAMG;IACH,2BAA2B,CAAC,EAAE,MAAM,CAAC;IAErC;;;;;;;;;OASG;IACH,uBAAuB,CAAC,EAAE,MAAM,CAAC;CAClC;AAMD;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,qBAAa,wBAAyB,SAAQ,YAAa,YAAW,iBAAiB;IACrF;;;OAGG;IACH,SAAgB,IAAI,EAAG,UAAU,CAAU;IAE3C;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAElD;;;;;;OAMG;IACH,OAAO,CAAC,iBAAiB,CAAuB;IAEhD;;;;;;OAMG;IACH,OAAO,CAAC,eAAe,CAAuB;IAM9C;;;;OAIG;gBACS,MAAM,GAAE,8BAAmC;IAuCvD;;;;;;;;;;OAUG;IACI,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAwC1C;;;;;;;OAOG;IACI,cAAc,CAAC,MAAM,EAAE,eAAe,GAAG,IAAI;IAUpD;;;;;;OAMG;IACI,KAAK,IAAI,IAAI;CAKrB"}

package/dist/voice-pipeline/AcousticEndpointDetector.js CHANGED Viewed

@@ -6,10 +6,43 @@
  * and relies solely on the duration of post-speech silence to decide when the user
  * has finished speaking.
  *
- * Emits:
- * - `'turn_complete'` ({@link TurnCompleteEvent}) — silence exceeded the configured
- *   `utteranceEndThresholdMs` after the most recent `speech_end` VAD event.
- * - `'speech_start'` () — re-emitted when a `speech_start` VAD event is received.
+ * ## How it works
+ *
+ * This detector delegates all silence timing to a {@link SilenceDetector} instance
+ * (from `core/audio/`). The SilenceDetector maintains an internal timer that
+ * starts when `handleSpeechEnd()` is called and fires `'utterance_end_detected'`
+ * when silence exceeds the configured `utteranceEndThresholdMs`. A
+ * `handleSpeechStart()` call cancels the timer.
+ *
+ * ## Energy threshold adaptation
+ *
+ * The SilenceDetector internally uses adaptive energy thresholds from the
+ * {@link AdaptiveVAD}. The VAD continuously recalibrates its speech/silence
+ * boundary based on ambient noise levels, so the effective silence threshold
+ * adapts to the environment (e.g. coffee shop vs quiet room). This detector
+ * does not perform its own energy analysis -- it trusts the upstream VAD's
+ * speech_start/speech_end decisions.
+ *
+ * ## When to use
+ *
+ * Use this detector when:
+ * - The STT provider does not produce reliable punctuation.
+ * - You want the simplest possible endpoint detection with no linguistic analysis.
+ * - Latency tolerance is higher (the full `utteranceEndThresholdMs` is always
+ *   consumed, unlike the {@link HeuristicEndpointDetector} which can fire
+ *   immediately on terminal punctuation).
+ *
+ * @see {@link HeuristicEndpointDetector} for the rule-based alternative with
+ *   punctuation-triggered fast path.
+ * @see {@link IEndpointDetector} for the interface contract.
+ * @see {@link SilenceDetector} for the underlying silence timing logic.
+ *
+ * ## Events emitted
+ *
+ * | Event             | Payload                  | Description                                 |
+ * |-------------------|--------------------------|---------------------------------------------|
+ * | `'turn_complete'` | {@link TurnCompleteEvent} | Silence exceeded `utteranceEndThresholdMs`. |
+ * | `'speech_start'`  | *(none)*                 | Re-emitted from incoming VAD event.         |
  */
 import { EventEmitter } from 'node:events';
 import { SilenceDetector } from '../core/audio/SilenceDetector.js';
@@ -23,14 +56,23 @@ import { SilenceDetector } from '../core/audio/SilenceDetector.js';
  * `speech_end` events start the silence clock; `speech_start` events cancel
  * any pending turn-complete emission. Transcript content is completely ignored.
  *
+ * @see {@link IEndpointDetector} for the interface contract.
+ * @see {@link HeuristicEndpointDetector} for the heuristic alternative.
+ *
  * @example
- * ```ts
+ * ```typescript
  * const detector = new AcousticEndpointDetector({ utteranceEndThresholdMs: 2000 });
- * detector.on('turn_complete', (event) => console.log('Turn done:', event));
- * detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() });
+ * detector.on('turn_complete', (event) => {
+ *   console.log(`Turn done after ${event.durationMs}ms of speech`);
+ * });
+ * detector.pushVadEvent({ type: 'speech_start', timestamp: Date.now() });
+ * detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now() + 500 });
+ * // -> After 2000ms of silence, 'turn_complete' fires with reason 'silence_timeout'
  * ```
  */
 export class AcousticEndpointDetector extends EventEmitter {
+    // ---------------------------------------------------------------------------
+    // Constructor
     // ---------------------------------------------------------------------------
     /**
      * Creates a new AcousticEndpointDetector.
@@ -39,30 +81,46 @@ export class AcousticEndpointDetector extends EventEmitter {
      */
     constructor(config = {}) {
         super();
-        /** @inheritdoc */
+        /**
+         * Detection mode identifier. Always `'acoustic'` for this implementation.
+         * @see {@link IEndpointDetector.mode}
+         */
         this.mode = 'acoustic';
         /**
-         * Timestamp (ms) when the current speech segment began. Tracked so that
-         * `durationMs` in the emitted {@link TurnCompleteEvent} can be computed.
+         * Timestamp (ms) when the current speech segment began. Used to compute
+         * `durationMs` in the emitted {@link TurnCompleteEvent} as:
+         * `speechEndTimeMs - speechStartTimeMs`.
+         *
+         * Reset to `null` on each {@link reset} call.
          */
         this.speechStartTimeMs = null;
         /**
          * Timestamp (ms) when the most recent `speech_end` VAD event was received.
-         * Used to calculate `durationMs` for the turn-complete event.
+         * Used together with {@link speechStartTimeMs} to calculate `durationMs`
+         * for the turn-complete event.
+         *
+         * Reset to `null` on each {@link reset} call.
          */
         this.speechEndTimeMs = null;
+        // Build SilenceDetector config from our options with sensible defaults
         const sdConfig = {
             significantPauseThresholdMs: config.significantPauseThresholdMs ?? 1500,
             utteranceEndThresholdMs: config.utteranceEndThresholdMs ?? 3000,
         };
         this.silenceDetector = new SilenceDetector(sdConfig);
-        // When SilenceDetector decides the utterance has ended, fire turn_complete.
+        // When SilenceDetector decides the utterance has ended (silence exceeded
+        // utteranceEndThresholdMs), translate that into a TurnCompleteEvent.
         this.silenceDetector.on('utterance_end_detected', (_silenceDurationMs) => {
+            // Compute the duration of actual speech (not including silence).
+            // Falls back to 0 if timestamps are missing (defensive).
             const durationMs = this.speechStartTimeMs !== null && this.speechEndTimeMs !== null
                 ? this.speechEndTimeMs - this.speechStartTimeMs
                 : 0;
             const event = {
-                transcript: '', // Acoustic mode has no transcript access
+                // Acoustic mode has no transcript access -- the orchestrator will
+                // use whatever transcript the STT session has accumulated separately.
+                transcript: '',
+                // Confidence is 0 because we have no STT data to score.
                 confidence: 0,
                 durationMs,
                 reason: 'silence_timeout',
@@ -71,48 +129,76 @@ export class AcousticEndpointDetector extends EventEmitter {
         });
     }
     // ---------------------------------------------------------------------------
-    // IEndpointDetector
+    // IEndpointDetector -- pushVadEvent
     // ---------------------------------------------------------------------------
     /**
      * Converts a {@link VadEvent} into the SilenceDetector's expected API calls.
      *
-     * - `speech_start` — resets silence state and re-emits `'speech_start'` on self.
-     * - `speech_end` — starts the silence clock.
-     * - `silence` — treated as ongoing non-speech frames.
+     * - **`speech_start`**: Resets silence state (cancels pending timers) and
+     *   re-emits `'speech_start'` on this detector for pipeline consumption.
+     * - **`speech_end`**: Records the timestamp and starts the silence clock.
+     * - **`silence`**: Treated as ongoing non-speech frames, advancing the
+     *   SilenceDetector's internal timer.
      *
-     * @param event - Incoming VAD event.
+     * @param event - Incoming VAD event from the upstream voice activity detector.
      */
     pushVadEvent(event) {
-        // Minimal VADResult stub — SilenceDetector's public methods only use it as
-        // a pass-through parameter and don't inspect its contents.
+        // The SilenceDetector's API requires a VADResult parameter, but it only
+        // uses it as a pass-through and doesn't inspect its contents. We pass
+        // a minimal stub typed as `never` to satisfy the signature without
+        // introducing a dependency on the full VADResult type.
         const vadResultStub = { timestamp: event.timestamp };
         switch (event.type) {
             case 'speech_start':
+                // Record when speech began for duration calculation
                 this.speechStartTimeMs = event.timestamp;
+                // Clear the previous speech_end since a new speech segment started
                 this.speechEndTimeMs = null;
+                // Notify SilenceDetector to cancel any pending silence timer
                 this.silenceDetector.handleSpeechStart(vadResultStub);
+                // Re-emit for pipeline consumers (e.g. barge-in detection)
                 this.emit('speech_start');
                 break;
             case 'speech_end':
+                // Record when speech ended for duration calculation
                 this.speechEndTimeMs = event.timestamp;
+                // Start the silence clock -- if silence persists beyond
+                // utteranceEndThresholdMs, SilenceDetector fires utterance_end_detected.
+                // The second argument (0) is the energy level -- not used in our context.
                 this.silenceDetector.handleSpeechEnd(vadResultStub, 0);
                 break;
             case 'silence':
-                // Periodic silence heartbeat — pass as a non-speech frame.
+                // Periodic silence heartbeat -- advance SilenceDetector's internal
+                // timer by notifying it of continued non-speech activity.
                 this.silenceDetector.handleNoVoiceActivity(vadResultStub);
                 break;
         }
     }
+    // ---------------------------------------------------------------------------
+    // IEndpointDetector -- pushTranscript
+    // ---------------------------------------------------------------------------
     /**
-     * No-op — this detector is purely acoustic and does not use transcript content.
+     * No-op -- this detector is purely acoustic and does not use transcript content.
+     *
+     * The method exists solely to satisfy the {@link IEndpointDetector} interface.
+     * Calling it has no effect and does not throw.
      *
      * @param _event - Ignored transcript event.
      */
     pushTranscript(_event) {
-        // Intentional no-op: acoustic mode ignores linguistic content.
+        // Intentional no-op: acoustic mode ignores all linguistic content.
+        // The HeuristicEndpointDetector should be used if transcript-based
+        // endpoint detection is desired.
     }
+    // ---------------------------------------------------------------------------
+    // IEndpointDetector -- reset
+    // ---------------------------------------------------------------------------
     /**
-     * Resets all internal state and timers. Call at the start of each new turn.
+     * Resets all internal state and cancels pending timers.
+     *
+     * Should be called at the start of each new turn to ensure clean state.
+     * This also resets the underlying SilenceDetector, cancelling any pending
+     * utterance_end_detected timer.
      */
     reset() {
         this.speechStartTimeMs = null;

package/dist/voice-pipeline/AcousticEndpointDetector.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"AcousticEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA~~;;;;;;;;;;;;GAYG~~;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;~~AAgC~~/F,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E~~;;;;;;;;;;;;;GAaG~~;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;~~IAmBxD~~,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,SAAyC,EAAE;QACrD,KAAK,EAAE,CAAC;~~QA1BV,kBAAkB~~;~~QACF~~,SAAI,GAAG,UAAmB,CAAC;~~QAK3C;;;WAGG~~;QACK,sBAAiB,GAAkB,IAAI,CAAC;QAEhD~~;;;WAGG~~;QACK,oBAAe,GAAkB,IAAI,CAAC;~~QAY5C~~,MAAM,QAAQ,GAA0B;YACtC,2BAA2B,EAAE,MAAM,CAAC,2BAA2B,IAAI,IAAI;YACvE,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,IAAI,IAAI;SAChE,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,~~4EAA4E~~;~~QAC5E~~,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC,wBAAwB,EAAE,CAAC,kBAA0B,EAAE,EAAE;YAC/E,MAAM,UAAU,GACd,IAAI,CAAC,iBAAiB,KAAK,IAAI,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;gBAC9D,CAAC,CAAC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB;gBAC/C,CAAC,CAAC,CAAC,CAAC;YAER,MAAM,KAAK,GAAsB;gBAC/B,UAAU,EAAE,EAAE,~~EAAI,yCAAyC~~;~~gBAC3D~~,UAAU,EAAE,CAAC;gBACb,UAAU;gBACV,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,~~oBAAoB~~;~~IACpB~~,8EAA8E;IAE9E~~;;;;;;;;OAQG~~;IACI,YAAY,CAAC,KAAe;QACjC,~~2EAA2E~~;~~QAC3E~~,~~2DAA2D~~;~~QAC3D~~,MAAM,aAAa,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAW,CAAC;QAE9D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,IAAI,CAAC,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC;gBACzC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC5B,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;gBACtD,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC1B,MAAM;YAER,KAAK,YAAY;gBACf,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,SAAS,CAAC;gBACvC,IAAI,CAAC,eAAe,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;gBACvD,MAAM;YAER,KAAK,SAAS;gBACZ,~~2DAA2D~~;~~gBAC3D~~,IAAI,CAAC,eAAe,CAAC,qBAAqB,CAAC,aAAa,CAAC,CAAC;gBAC1D,MAAM;QACV,CAAC;IACH,CAAC;IAED~~;;;;OAIG~~;IACI,cAAc,CAAC,MAAuB;QAC3C~~,+DAA+D~~;~~IACjE~~,CAAC;IAED~~;;OAEG~~;IACI,KAAK;QACV,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF"}
1	+ {"version":3,"file":"AcousticEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/AcousticEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6CG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AA8C/F,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;GAoBG;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;IA+BxD,8EAA8E;IAC9E,cAAc;IACd,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,SAAyC,EAAE;QACrD,KAAK,EAAE,CAAC;QAxCV;;;WAGG;QACa,SAAI,GAAG,UAAmB,CAAC;QAQ3C;;;;;;WAMG;QACK,sBAAiB,GAAkB,IAAI,CAAC;QAEhD;;;;;;WAMG;QACK,oBAAe,GAAkB,IAAI,CAAC;QAc5C,uEAAuE;QACvE,MAAM,QAAQ,GAA0B;YACtC,2BAA2B,EAAE,MAAM,CAAC,2BAA2B,IAAI,IAAI;YACvE,uBAAuB,EAAE,MAAM,CAAC,uBAAuB,IAAI,IAAI;SAChE,CAAC;QAEF,IAAI,CAAC,eAAe,GAAG,IAAI,eAAe,CAAC,QAAQ,CAAC,CAAC;QAErD,yEAAyE;QACzE,qEAAqE;QACrE,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC,wBAAwB,EAAE,CAAC,kBAA0B,EAAE,EAAE;YAC/E,iEAAiE;YACjE,yDAAyD;YACzD,MAAM,UAAU,GACd,IAAI,CAAC,iBAAiB,KAAK,IAAI,IAAI,IAAI,CAAC,eAAe,KAAK,IAAI;gBAC9D,CAAC,CAAC,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC,iBAAiB;gBAC/C,CAAC,CAAC,CAAC,CAAC;YAER,MAAM,KAAK,GAAsB;gBAC/B,kEAAkE;gBAClE,sEAAsE;gBACtE,UAAU,EAAE,EAAE;gBACd,wDAAwD;gBACxD,UAAU,EAAE,CAAC;gBACb,UAAU;gBACV,MAAM,EAAE,iBAAiB;aAC1B,CAAC;YAEF,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;QACpC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,8EAA8E;IAC9E,oCAAoC;IACpC,8EAA8E;IAE9E;;;;;;;;;;OAUG;IACI,YAAY,CAAC,KAAe;QACjC,wEAAwE;QACxE,sEAAsE;QACtE,mEAAmE;QACnE,uDAAuD;QACvD,MAAM,aAAa,GAAG,EAAE,SAAS,EAAE,KAAK,CAAC,SAAS,EAAW,CAAC;QAE9D,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc;gBACjB,oDAAoD;gBACpD,IAAI,CAAC,iBAAiB,GAAG,KAAK,CAAC,SAAS,CAAC;gBACzC,mEAAmE;gBACnE,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;gBAC5B,6DAA6D;gBAC7D,IAAI,CAAC,eAAe,CAAC,iBAAiB,CAAC,aAAa,CAAC,CAAC;gBACtD,2DAA2D;gBAC3D,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;gBAC1B,MAAM;YAER,KAAK,YAAY;gBACf,oDAAoD;gBACpD,IAAI,CAAC,eAAe,GAAG,KAAK,CAAC,SAAS,CAAC;gBACvC,wDAAwD;gBACxD,yEAAyE;gBACzE,0EAA0E;gBAC1E,IAAI,CAAC,eAAe,CAAC,eAAe,CAAC,aAAa,EAAE,CAAC,CAAC,CAAC;gBACvD,MAAM;YAER,KAAK,SAAS;gBACZ,mEAAmE;gBACnE,0DAA0D;gBAC1D,IAAI,CAAC,eAAe,CAAC,qBAAqB,CAAC,aAAa,CAAC,CAAC;gBAC1D,MAAM;QACV,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,sCAAsC;IACtC,8EAA8E;IAE9E;;;;;;;OAOG;IACI,cAAc,CAAC,MAAuB;QAC3C,mEAAmE;QACnE,mEAAmE;QACnE,iCAAiC;IACnC,CAAC;IAED,8EAA8E;IAC9E,6BAA6B;IAC7B,8EAA8E;IAE9E;;;;;;OAMG;IACI,KAAK;QACV,IAAI,CAAC,iBAAiB,GAAG,IAAI,CAAC;QAC9B,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,CAAC;IAC/B,CAAC;CACF"}

package/dist/voice-pipeline/HardCutBargeinHandler.d.ts CHANGED Viewed

@@ -2,21 +2,59 @@
  * @module voice-pipeline/HardCutBargeinHandler
  *
  * Implements a hard-cut barge-in policy: when the user speaks over TTS output
- * for at least `minSpeechMs` milliseconds, playback is stopped immediately with
- * no fade-out. Short detections below the threshold are treated as accidental
- * noise and ignored.
+ * for at least {@link HardCutBargeinHandlerOptions.minSpeechMs} milliseconds,
+ * playback is stopped immediately with no fade-out. Short detections below the
+ * threshold are treated as accidental noise and ignored.
+ *
+ * ## Why 300 ms default threshold?
+ *
+ * The 300 ms threshold was chosen to filter out common non-speech audio events
+ * that trigger false barge-in detections:
+ *
+ * - **Lip smacks**: Typically 50-150 ms of energy.
+ * - **Breaths/sighs**: Typically 100-250 ms of energy.
+ * - **Coughs/sneezes**: Short burst 100-200 ms, but may exceed threshold.
+ * - **Background noise spikes**: Door closing, keyboard typing -- usually < 200 ms.
+ *
+ * At 300 ms, a detection almost certainly represents intentional speech rather
+ * than ambient noise. Lowering to < 200 ms increases false positives significantly
+ * in noisy environments. Raising to > 500 ms adds noticeable delay before the
+ * agent acknowledges the interruption.
+ *
+ * ## When to use hard-cut vs soft-fade
+ *
+ * Use hard-cut when:
+ * - The conversation style is fast-paced (e.g. customer support).
+ * - Users expect immediate response to interruption.
+ * - Audio quality is high (fewer false positives).
+ *
+ * Use {@link SoftFadeBargeinHandler} when:
+ * - The conversation is more measured (e.g. storytelling, education).
+ * - Users may accidentally trigger barge-in (noisy environment).
+ * - A smoother audio experience is preferred.
+ *
+ * @see {@link SoftFadeBargeinHandler} for the three-tier soft-fade alternative.
+ * @see {@link IBargeinHandler} for the interface contract.
  */
 import type { BargeinAction, BargeinContext, IBargeinHandler } from './types.js';
 /**
  * Construction options for {@link HardCutBargeinHandler}.
+ *
+ * @example
+ * ```typescript
+ * const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
+ * ```
  */
 export interface HardCutBargeinHandlerOptions {
     /**
      * Minimum confirmed speech duration (in milliseconds) required before a
      * barge-in is treated as intentional. Detections shorter than this value are
-     * returned as `{ type: 'ignore' }` to avoid reacting to background noise.
+     * returned as `{ type: 'ignore' }` to avoid reacting to background noise,
+     * lip smacks, breaths, or other brief non-speech audio events.
      *
      * @defaultValue 300
+     *
+     * @see Module-level documentation for rationale behind the 300 ms default.
      */
     minSpeechMs?: number;
 }
@@ -24,26 +62,38 @@ export interface HardCutBargeinHandlerOptions {
  * Barge-in handler that applies a hard-cut strategy.
  *
  * When the user speaks over an active TTS stream, this handler immediately
- * cancels playback if the detected speech exceeds `minSpeechMs`. Below that
- * threshold the interruption is considered noise and playback continues
+ * cancels playback if the detected speech exceeds {@link minSpeechMs}. Below
+ * that threshold the interruption is considered noise and playback continues
  * uninterrupted.
  *
+ * The handler is stateless -- each {@link handleBargein} call is evaluated
+ * independently with no memory of previous barge-in events.
+ *
+ * @see {@link IBargeinHandler} for the interface contract.
+ * @see {@link SoftFadeBargeinHandler} for the three-tier alternative.
+ *
  * @example
- * ```ts
+ * ```typescript
  * const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
- * const action = handler.handleBargein({ speechDurationMs: 400, ... });
- * // action.type === 'cancel'
+ *
+ * // Short noise -> ignored
+ * handler.handleBargein({ speechDurationMs: 100, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'ignore' }
+ *
+ * // Intentional speech -> cancel
+ * handler.handleBargein({ speechDurationMs: 400, interruptedText: '...', playedDurationMs: 500 });
+ * // -> { type: 'cancel', injectMarker: '[interrupted]' }
  * ```
  */
 export declare class HardCutBargeinHandler implements IBargeinHandler {
     /**
      * The interruption strategy implemented by this handler.
-     * Always `'hard-cut'`.
+     * Always `'hard-cut'` -- playback is stopped instantly with no fade.
      */
     readonly mode: "hard-cut";
     /**
      * Minimum speech duration in milliseconds before the interruption is
-     * considered intentional.
+     * considered intentional. Set once at construction and never changed.
      */
     private readonly minSpeechMs;
     /**
@@ -55,12 +105,13 @@ export declare class HardCutBargeinHandler implements IBargeinHandler {
     /**
      * Evaluate the barge-in context and return the action the pipeline should take.
      *
-     * - If `context.speechDurationMs >= minSpeechMs`, returns
-     *   `{ type: 'cancel', injectMarker: '[interrupted]' }` to immediately halt TTS.
-     * - Otherwise returns `{ type: 'ignore' }` to continue playback.
+     * Decision logic (binary threshold):
+     * - `speechDurationMs >= minSpeechMs` -> Cancel TTS immediately and inject
+     *   an `'[interrupted]'` marker into the conversation context.
+     * - `speechDurationMs < minSpeechMs` -> Ignore the detection as noise.
      *
      * @param context - Snapshot of the barge-in state at the moment of detection.
-     * @returns The pipeline action to execute.
+     * @returns The pipeline action to execute. Always synchronous (no Promise).
      */
     handleBargein(context: BargeinContext): BargeinAction;
 }

package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"HardCutBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA~~;;;;;;;GAOG~~;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;~~AAEjF;;GAEG~~;AACH,MAAM,WAAW,4BAA4B;IAC3C~~;;;;;;OAMG~~;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;~~AAED;;;;;;;;;;;;;;GAcG~~;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,UAAU,CAAU;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;;OAIG;gBACS,OAAO,GAAE,4BAAiC;IAItD~~;;;;;;;;;OASG~~;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;~~CAMtD~~"}
1	+ {"version":3,"file":"HardCutBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAMjF;;;;;;;GAOG;AACH,MAAM,WAAW,4BAA4B;IAC3C;;;;;;;;;OASG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAMD;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,qBAAsB,YAAW,eAAe;IAC3D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,UAAU,CAAU;IAEpC;;;OAGG;IACH,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAS;IAErC;;;;OAIG;gBACS,OAAO,GAAE,4BAAiC;IAItD;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;CAatD"}