@framers/agentos 0.1.107 → 0.1.109
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/memory/ingestion/ChunkingEngine.d.ts.map +1 -1
- package/dist/memory/ingestion/ChunkingEngine.js +5 -1
- package/dist/memory/ingestion/ChunkingEngine.js.map +1 -1
- package/dist/memory/ingestion/DocxLoader.d.ts.map +1 -1
- package/dist/memory/ingestion/DocxLoader.js +2 -1
- package/dist/memory/ingestion/DocxLoader.js.map +1 -1
- package/dist/memory/ingestion/FolderScanner.d.ts.map +1 -1
- package/dist/memory/ingestion/FolderScanner.js +6 -3
- package/dist/memory/ingestion/FolderScanner.js.map +1 -1
- package/dist/memory/ingestion/HtmlLoader.d.ts.map +1 -1
- package/dist/memory/ingestion/HtmlLoader.js +2 -1
- package/dist/memory/ingestion/HtmlLoader.js.map +1 -1
- package/dist/memory/ingestion/MarkdownLoader.d.ts.map +1 -1
- package/dist/memory/ingestion/MarkdownLoader.js +2 -1
- package/dist/memory/ingestion/MarkdownLoader.js.map +1 -1
- package/dist/memory/ingestion/PdfLoader.d.ts.map +1 -1
- package/dist/memory/ingestion/PdfLoader.js +2 -1
- package/dist/memory/ingestion/PdfLoader.js.map +1 -1
- package/dist/memory/ingestion/TextLoader.d.ts.map +1 -1
- package/dist/memory/ingestion/TextLoader.js +3 -2
- package/dist/memory/ingestion/TextLoader.js.map +1 -1
- package/dist/memory/ingestion/pathUtils.d.ts +40 -0
- package/dist/memory/ingestion/pathUtils.d.ts.map +1 -0
- package/dist/memory/ingestion/pathUtils.js +62 -0
- package/dist/memory/ingestion/pathUtils.js.map +1 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts +95 -20
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map +1 -1
- package/dist/voice-pipeline/AcousticEndpointDetector.js +110 -24
- package/dist/voice-pipeline/AcousticEndpointDetector.js.map +1 -1
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts +66 -15
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map +1 -1
- package/dist/voice-pipeline/HardCutBargeinHandler.js +65 -13
- package/dist/voice-pipeline/HardCutBargeinHandler.js.map +1 -1
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts +116 -42
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map +1 -1
- package/dist/voice-pipeline/HeuristicEndpointDetector.js +159 -52
- package/dist/voice-pipeline/HeuristicEndpointDetector.js.map +1 -1
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts +89 -24
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts.map +1 -1
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js +74 -20
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js.map +1 -1
- package/dist/voice-pipeline/VoiceInterruptError.d.ts +68 -10
- package/dist/voice-pipeline/VoiceInterruptError.d.ts.map +1 -1
- package/dist/voice-pipeline/VoiceInterruptError.js +53 -6
- package/dist/voice-pipeline/VoiceInterruptError.js.map +1 -1
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts +190 -39
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts.map +1 -1
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js +266 -53
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js.map +1 -1
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +135 -43
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts.map +1 -1
- package/dist/voice-pipeline/WebSocketStreamTransport.js +109 -47
- package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -1
- package/dist/voice-pipeline/index.d.ts +34 -1
- package/dist/voice-pipeline/index.d.ts.map +1 -1
- package/dist/voice-pipeline/index.js +41 -1
- package/dist/voice-pipeline/index.js.map +1 -1
- package/dist/voice-pipeline/types.d.ts +432 -106
- package/dist/voice-pipeline/types.d.ts.map +1 -1
- package/dist/voice-pipeline/types.js +21 -9
- package/dist/voice-pipeline/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -2,23 +2,68 @@
|
|
|
2
2
|
* @module voice-pipeline/HardCutBargeinHandler
|
|
3
3
|
*
|
|
4
4
|
* Implements a hard-cut barge-in policy: when the user speaks over TTS output
|
|
5
|
-
* for at least
|
|
6
|
-
* no fade-out. Short detections below the
|
|
7
|
-
* noise and ignored.
|
|
5
|
+
* for at least {@link HardCutBargeinHandlerOptions.minSpeechMs} milliseconds,
|
|
6
|
+
* playback is stopped immediately with no fade-out. Short detections below the
|
|
7
|
+
* threshold are treated as accidental noise and ignored.
|
|
8
|
+
*
|
|
9
|
+
* ## Why 300 ms default threshold?
|
|
10
|
+
*
|
|
11
|
+
* The 300 ms threshold was chosen to filter out common non-speech audio events
|
|
12
|
+
* that trigger false barge-in detections:
|
|
13
|
+
*
|
|
14
|
+
* - **Lip smacks**: Typically 50-150 ms of energy.
|
|
15
|
+
* - **Breaths/sighs**: Typically 100-250 ms of energy.
|
|
16
|
+
* - **Coughs/sneezes**: Short burst 100-200 ms, but may exceed threshold.
|
|
17
|
+
* - **Background noise spikes**: Door closing, keyboard typing -- usually < 200 ms.
|
|
18
|
+
*
|
|
19
|
+
* At 300 ms, a detection almost certainly represents intentional speech rather
|
|
20
|
+
* than ambient noise. Lowering to < 200 ms increases false positives significantly
|
|
21
|
+
* in noisy environments. Raising to > 500 ms adds noticeable delay before the
|
|
22
|
+
* agent acknowledges the interruption.
|
|
23
|
+
*
|
|
24
|
+
* ## When to use hard-cut vs soft-fade
|
|
25
|
+
*
|
|
26
|
+
* Use hard-cut when:
|
|
27
|
+
* - The conversation style is fast-paced (e.g. customer support).
|
|
28
|
+
* - Users expect immediate response to interruption.
|
|
29
|
+
* - Audio quality is high (fewer false positives).
|
|
30
|
+
*
|
|
31
|
+
* Use {@link SoftFadeBargeinHandler} when:
|
|
32
|
+
* - The conversation is more measured (e.g. storytelling, education).
|
|
33
|
+
* - Users may accidentally trigger barge-in (noisy environment).
|
|
34
|
+
* - A smoother audio experience is preferred.
|
|
35
|
+
*
|
|
36
|
+
* @see {@link SoftFadeBargeinHandler} for the three-tier soft-fade alternative.
|
|
37
|
+
* @see {@link IBargeinHandler} for the interface contract.
|
|
8
38
|
*/
|
|
39
|
+
// ---------------------------------------------------------------------------
|
|
40
|
+
// Implementation
|
|
41
|
+
// ---------------------------------------------------------------------------
|
|
9
42
|
/**
|
|
10
43
|
* Barge-in handler that applies a hard-cut strategy.
|
|
11
44
|
*
|
|
12
45
|
* When the user speaks over an active TTS stream, this handler immediately
|
|
13
|
-
* cancels playback if the detected speech exceeds
|
|
14
|
-
* threshold the interruption is considered noise and playback continues
|
|
46
|
+
* cancels playback if the detected speech exceeds {@link minSpeechMs}. Below
|
|
47
|
+
* that threshold the interruption is considered noise and playback continues
|
|
15
48
|
* uninterrupted.
|
|
16
49
|
*
|
|
50
|
+
* The handler is stateless -- each {@link handleBargein} call is evaluated
|
|
51
|
+
* independently with no memory of previous barge-in events.
|
|
52
|
+
*
|
|
53
|
+
* @see {@link IBargeinHandler} for the interface contract.
|
|
54
|
+
* @see {@link SoftFadeBargeinHandler} for the three-tier alternative.
|
|
55
|
+
*
|
|
17
56
|
* @example
|
|
18
|
-
* ```
|
|
57
|
+
* ```typescript
|
|
19
58
|
* const handler = new HardCutBargeinHandler({ minSpeechMs: 250 });
|
|
20
|
-
*
|
|
21
|
-
* //
|
|
59
|
+
*
|
|
60
|
+
* // Short noise -> ignored
|
|
61
|
+
* handler.handleBargein({ speechDurationMs: 100, interruptedText: '...', playedDurationMs: 500 });
|
|
62
|
+
* // -> { type: 'ignore' }
|
|
63
|
+
*
|
|
64
|
+
* // Intentional speech -> cancel
|
|
65
|
+
* handler.handleBargein({ speechDurationMs: 400, interruptedText: '...', playedDurationMs: 500 });
|
|
66
|
+
* // -> { type: 'cancel', injectMarker: '[interrupted]' }
|
|
22
67
|
* ```
|
|
23
68
|
*/
|
|
24
69
|
export class HardCutBargeinHandler {
|
|
@@ -30,7 +75,7 @@ export class HardCutBargeinHandler {
|
|
|
30
75
|
constructor(options = {}) {
|
|
31
76
|
/**
|
|
32
77
|
* The interruption strategy implemented by this handler.
|
|
33
|
-
* Always `'hard-cut'
|
|
78
|
+
* Always `'hard-cut'` -- playback is stopped instantly with no fade.
|
|
34
79
|
*/
|
|
35
80
|
this.mode = 'hard-cut';
|
|
36
81
|
this.minSpeechMs = options.minSpeechMs ?? 300;
|
|
@@ -38,17 +83,24 @@ export class HardCutBargeinHandler {
|
|
|
38
83
|
/**
|
|
39
84
|
* Evaluate the barge-in context and return the action the pipeline should take.
|
|
40
85
|
*
|
|
41
|
-
*
|
|
42
|
-
*
|
|
43
|
-
*
|
|
86
|
+
* Decision logic (binary threshold):
|
|
87
|
+
* - `speechDurationMs >= minSpeechMs` -> Cancel TTS immediately and inject
|
|
88
|
+
* an `'[interrupted]'` marker into the conversation context.
|
|
89
|
+
* - `speechDurationMs < minSpeechMs` -> Ignore the detection as noise.
|
|
44
90
|
*
|
|
45
91
|
* @param context - Snapshot of the barge-in state at the moment of detection.
|
|
46
|
-
* @returns The pipeline action to execute.
|
|
92
|
+
* @returns The pipeline action to execute. Always synchronous (no Promise).
|
|
47
93
|
*/
|
|
48
94
|
handleBargein(context) {
|
|
49
95
|
if (context.speechDurationMs >= this.minSpeechMs) {
|
|
96
|
+
// Speech duration meets the threshold -> intentional interruption.
|
|
97
|
+
// The '[interrupted]' marker is injected into the conversation history
|
|
98
|
+
// so the agent knows its previous response was cut short and can avoid
|
|
99
|
+
// repeating the interrupted content.
|
|
50
100
|
return { type: 'cancel', injectMarker: '[interrupted]' };
|
|
51
101
|
}
|
|
102
|
+
// Below threshold -> likely noise, lip smack, or breath.
|
|
103
|
+
// Continue TTS playback as if nothing happened.
|
|
52
104
|
return { type: 'ignore' };
|
|
53
105
|
}
|
|
54
106
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HardCutBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"HardCutBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/HardCutBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqCG;AA8BH,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,MAAM,OAAO,qBAAqB;IAahC;;;;OAIG;IACH,YAAY,UAAwC,EAAE;QAjBtD;;;WAGG;QACM,SAAI,GAAG,UAAmB,CAAC;QAclC,IAAI,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW,IAAI,GAAG,CAAC;IAChD,CAAC;IAED;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAuB;QACnC,IAAI,OAAO,CAAC,gBAAgB,IAAI,IAAI,CAAC,WAAW,EAAE,CAAC;YACjD,mEAAmE;YACnE,uEAAuE;YACvE,uEAAuE;YACvE,qCAAqC;YACrC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC;QAC3D,CAAC;QAED,yDAAyD;QACzD,gDAAgD;QAChD,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;IAC5B,CAAC;CACF"}
|
|
@@ -6,24 +6,46 @@
|
|
|
6
6
|
* finished speaking. Suitable for low-latency deployments where an LLM-based
|
|
7
7
|
* semantic detector would add unacceptable round-trip overhead.
|
|
8
8
|
*
|
|
9
|
-
* Detection strategy
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
9
|
+
* ## Detection strategy
|
|
10
|
+
*
|
|
11
|
+
* 1. On `speech_end`, if the accumulated final transcript ends with `.`, `?`,
|
|
12
|
+
* or `!`, fire `turn_complete` immediately with reason `'punctuation'`.
|
|
13
|
+
* This provides the lowest-latency turn handoff for well-punctuated speech.
|
|
14
|
+
*
|
|
15
|
+
* 2. Otherwise, start a silence timer (default 1,500 ms). If speech does not
|
|
16
|
+
* resume before the timer fires, emit `turn_complete` with reason
|
|
17
|
+
* `'silence_timeout'`. The timeout acts as a safety net for STT providers
|
|
18
|
+
* that don't produce terminal punctuation reliably.
|
|
19
|
+
*
|
|
20
|
+
* 3. Backchannel phrases (e.g. "uh huh", "yeah") are recognised, suppressed
|
|
21
|
+
* from accumulation, and re-emitted as `'backchannel_detected'` events so
|
|
22
|
+
* the pipeline can decide whether to suppress an agent response.
|
|
23
|
+
*
|
|
24
|
+
* ## Why heuristic over acoustic-only?
|
|
25
|
+
*
|
|
26
|
+
* Pure silence timeout adds up to 1.5 s of unnecessary latency on every turn
|
|
27
|
+
* when the user ends a sentence cleanly. By checking for terminal punctuation,
|
|
28
|
+
* this detector can fire turn_complete immediately, cutting perceived latency
|
|
29
|
+
* by more than half for typical conversational speech.
|
|
30
|
+
*
|
|
31
|
+
* @see {@link AcousticEndpointDetector} for the purely acoustic alternative.
|
|
32
|
+
* @see {@link IEndpointDetector} for the interface contract.
|
|
17
33
|
*/
|
|
18
34
|
import { EventEmitter } from 'node:events';
|
|
19
35
|
import type { IEndpointDetector, TranscriptEvent, VadEvent } from './types.js';
|
|
20
36
|
/**
|
|
21
37
|
* Constructor options for {@link HeuristicEndpointDetector}.
|
|
38
|
+
*
|
|
39
|
+
* @example
|
|
40
|
+
* ```typescript
|
|
41
|
+
* const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
|
|
42
|
+
* ```
|
|
22
43
|
*/
|
|
23
44
|
export interface HeuristicEndpointDetectorOptions {
|
|
24
45
|
/**
|
|
25
46
|
* How long (ms) to wait after `speech_end` before emitting `turn_complete`
|
|
26
|
-
* when no terminal punctuation is detected.
|
|
47
|
+
* when no terminal punctuation is detected. Lower values reduce latency
|
|
48
|
+
* but risk firing mid-sentence during natural pauses.
|
|
27
49
|
* @defaultValue 1500
|
|
28
50
|
*/
|
|
29
51
|
silenceTimeoutMs?: number;
|
|
@@ -32,96 +54,148 @@ export interface HeuristicEndpointDetectorOptions {
|
|
|
32
54
|
* Heuristic endpoint detector that uses terminal punctuation and a silence
|
|
33
55
|
* timeout to decide when the user's turn is complete.
|
|
34
56
|
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
37
|
-
*
|
|
38
|
-
*
|
|
57
|
+
* ## Events emitted
|
|
58
|
+
*
|
|
59
|
+
* | Event | Payload | Description |
|
|
60
|
+
* |--------------------------|--------------------------|------------------------------------|
|
|
61
|
+
* | `'turn_complete'` | {@link TurnCompleteEvent}| User turn has ended. |
|
|
62
|
+
* | `'backchannel_detected'` | `{ text: string }` | Backchannel phrase was recognised. |
|
|
63
|
+
*
|
|
64
|
+
* @see {@link IEndpointDetector} for the interface contract.
|
|
65
|
+
* @see {@link AcousticEndpointDetector} for the purely acoustic alternative.
|
|
39
66
|
*
|
|
40
67
|
* @example
|
|
41
68
|
* ```typescript
|
|
42
69
|
* const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
|
|
43
70
|
* detector.on('turn_complete', (event) => console.log('Turn done:', event));
|
|
71
|
+
*
|
|
72
|
+
* // Simulate a punctuated sentence followed by speech_end
|
|
44
73
|
* detector.pushTranscript({ text: 'Hello there.', isFinal: true, confidence: 0.95, words: [] });
|
|
45
74
|
* detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now(), source: 'vad' });
|
|
46
|
-
* //
|
|
75
|
+
* // -> 'turn_complete' fires immediately with reason 'punctuation'
|
|
47
76
|
* ```
|
|
48
77
|
*/
|
|
49
78
|
export declare class HeuristicEndpointDetector extends EventEmitter implements IEndpointDetector {
|
|
50
79
|
/**
|
|
51
80
|
* Active detection strategy label.
|
|
52
|
-
*
|
|
53
|
-
*
|
|
81
|
+
* Always `'heuristic'` for this implementation.
|
|
82
|
+
*
|
|
83
|
+
* @see {@link IEndpointDetector.mode}
|
|
54
84
|
*/
|
|
55
85
|
readonly mode: IEndpointDetector['mode'];
|
|
56
86
|
/** Resolved silence timeout in milliseconds. */
|
|
57
87
|
private readonly silenceTimeoutMs;
|
|
58
|
-
/**
|
|
88
|
+
/**
|
|
89
|
+
* The latest final transcript text accumulated for the current turn.
|
|
90
|
+
* Only updated by final (non-interim) transcript events.
|
|
91
|
+
* Reset to empty string after each `turn_complete` emission.
|
|
92
|
+
*/
|
|
59
93
|
private accumulatedText;
|
|
60
|
-
/**
|
|
94
|
+
/**
|
|
95
|
+
* Whether the VAD currently reports active speech. Set to `true` on
|
|
96
|
+
* `speech_start` and `false` on `speech_end`. Used to prevent the
|
|
97
|
+
* silence timer from starting while the user is still speaking.
|
|
98
|
+
*/
|
|
61
99
|
private speechActive;
|
|
62
|
-
/**
|
|
100
|
+
/**
|
|
101
|
+
* Handle to a pending silence timeout, or `null` if none is running.
|
|
102
|
+
* Cleared when speech resumes or when the detector is reset.
|
|
103
|
+
*/
|
|
63
104
|
private silenceTimer;
|
|
64
|
-
/**
|
|
105
|
+
/**
|
|
106
|
+
* Wall-clock timestamp (ms) when the current turn's speech started.
|
|
107
|
+
* Used to compute `durationMs` in the emitted {@link TurnCompleteEvent}.
|
|
108
|
+
* `null` when no speech has been detected in the current turn.
|
|
109
|
+
*/
|
|
65
110
|
private turnStartMs;
|
|
66
|
-
/**
|
|
111
|
+
/**
|
|
112
|
+
* Confidence of the most recent final transcript. Forwarded into the
|
|
113
|
+
* emitted {@link TurnCompleteEvent}. Defaults to 1 (perfect confidence)
|
|
114
|
+
* and is updated with each final transcript event.
|
|
115
|
+
*/
|
|
67
116
|
private lastConfidence;
|
|
68
117
|
/**
|
|
69
118
|
* Create a new {@link HeuristicEndpointDetector}.
|
|
70
119
|
*
|
|
71
|
-
* @param options
|
|
120
|
+
* @param options - Optional configuration overrides.
|
|
72
121
|
*/
|
|
73
122
|
constructor(options?: HeuristicEndpointDetectorOptions);
|
|
74
123
|
/**
|
|
75
124
|
* Ingest a transcript event from the upstream STT session.
|
|
76
125
|
*
|
|
77
126
|
* Only final events (`isFinal: true`) affect internal state. Interim results
|
|
78
|
-
* are silently ignored
|
|
79
|
-
*
|
|
127
|
+
* are silently ignored because:
|
|
128
|
+
* 1. They arrive very frequently (10-50 per second) and would trigger
|
|
129
|
+
* excessive punctuation checks.
|
|
130
|
+
* 2. Their text is unstable -- a word ending with "." may be revised in
|
|
131
|
+
* the next interim result, causing false turn-completion signals.
|
|
80
132
|
*
|
|
81
|
-
* If the final text is a recognised backchannel phrase the detector emits
|
|
82
|
-
* `'backchannel_detected'` and returns
|
|
83
|
-
* a subsequent `speech_end` event
|
|
133
|
+
* If the final text is a recognised backchannel phrase, the detector emits
|
|
134
|
+
* `'backchannel_detected'` and returns WITHOUT accumulating the text. This
|
|
135
|
+
* prevents a subsequent `speech_end` event from triggering `turn_complete`
|
|
136
|
+
* for what was merely an acknowledgement, not a real conversational turn.
|
|
84
137
|
*
|
|
85
|
-
* @param transcript
|
|
138
|
+
* @param transcript - Transcript event from the STT session.
|
|
86
139
|
*/
|
|
87
140
|
pushTranscript(transcript: TranscriptEvent): void;
|
|
88
141
|
/**
|
|
89
142
|
* Ingest a VAD (voice activity detection) event.
|
|
90
143
|
*
|
|
91
|
-
*
|
|
92
|
-
*
|
|
93
|
-
* -
|
|
94
|
-
*
|
|
95
|
-
*
|
|
96
|
-
*
|
|
144
|
+
* Event handling by type:
|
|
145
|
+
*
|
|
146
|
+
* - **`speech_start`**: Marks the turn as active and cancels any pending
|
|
147
|
+
* silence timer (the user resumed speaking before the timeout elapsed).
|
|
148
|
+
* This is critical for avoiding false turn-completion when the user
|
|
149
|
+
* takes a brief pause mid-sentence.
|
|
150
|
+
*
|
|
151
|
+
* - **`speech_end`**: If accumulated text is available, either fires
|
|
152
|
+
* `turn_complete` immediately (when text ends with terminal punctuation)
|
|
153
|
+
* or starts the silence timer (when no punctuation is detected).
|
|
154
|
+
*
|
|
155
|
+
* - **`silence`**: Periodic heartbeat events are ignored. The silence timer
|
|
156
|
+
* (started on `speech_end`) already handles delayed turn-completion
|
|
157
|
+
* independently of heartbeat cadence.
|
|
97
158
|
*
|
|
98
|
-
* @param event
|
|
159
|
+
* @param event - VAD transition event.
|
|
99
160
|
*/
|
|
100
161
|
pushVadEvent(event: VadEvent): void;
|
|
101
162
|
/**
|
|
102
163
|
* Reset all internal state, cancel pending timers, and prepare the detector
|
|
103
|
-
* for the next user turn.
|
|
104
|
-
*
|
|
164
|
+
* for the next user turn.
|
|
165
|
+
*
|
|
166
|
+
* Called by the pipeline after each `turn_complete` event (both internally
|
|
167
|
+
* and by the orchestrator's flush_complete handler) to ensure clean state
|
|
168
|
+
* before audio for the next turn begins to arrive.
|
|
105
169
|
*/
|
|
106
170
|
reset(): void;
|
|
107
171
|
/**
|
|
108
172
|
* Emit `turn_complete` with the currently accumulated transcript and then
|
|
109
173
|
* reset internal state so the detector is ready for the next turn.
|
|
110
174
|
*
|
|
111
|
-
*
|
|
112
|
-
*
|
|
113
|
-
*
|
|
175
|
+
* The reset happens BEFORE the emit to ensure that any re-entrant listeners
|
|
176
|
+
* (e.g. an endpoint detector handler that immediately calls pushVadEvent)
|
|
177
|
+
* see clean state.
|
|
178
|
+
*
|
|
179
|
+
* @param reason - The semantic reason driving this completion.
|
|
180
|
+
* @param speechEndTimestamp - Unix epoch ms timestamp of the `speech_end` event,
|
|
181
|
+
* used to compute `durationMs` as `speechEndTimestamp - turnStartMs`.
|
|
114
182
|
*/
|
|
115
183
|
private _emitTurnComplete;
|
|
116
184
|
/**
|
|
117
185
|
* Start the silence-timeout timer. If the user does not resume speaking
|
|
118
|
-
* within {@link silenceTimeoutMs} ms the detector fires `turn_complete
|
|
186
|
+
* within {@link silenceTimeoutMs} ms, the detector fires `turn_complete`
|
|
187
|
+
* with reason `'silence_timeout'`.
|
|
188
|
+
*
|
|
189
|
+
* Any previously running silence timer is cleared first to prevent
|
|
190
|
+
* double-fires from rapid speech_end -> speech_start -> speech_end sequences.
|
|
119
191
|
*
|
|
120
|
-
* @param speechEndTimestamp
|
|
192
|
+
* @param speechEndTimestamp - Timestamp passed through to {@link _emitTurnComplete}
|
|
193
|
+
* for duration calculation.
|
|
121
194
|
*/
|
|
122
195
|
private _startSilenceTimer;
|
|
123
196
|
/**
|
|
124
197
|
* Cancel a pending silence timer without any side effects.
|
|
198
|
+
* Safe to call when no timer is active (no-op).
|
|
125
199
|
*/
|
|
126
200
|
private _clearSilenceTimer;
|
|
127
201
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"HeuristicEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"HeuristicEndpointDetector.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgCG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,KAAK,EACV,iBAAiB,EACjB,eAAe,EACf,QAAQ,EAET,MAAM,YAAY,CAAC;AAkEpB;;;;;;;GAOG;AACH,MAAM,WAAW,gCAAgC;IAC/C;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,MAAM,CAAC;CAC3B;AAMD;;;;;;;;;;;;;;;;;;;;;;;;GAwBG;AACH,qBAAa,yBACX,SAAQ,YACR,YAAW,iBAAiB;IAE5B;;;;;OAKG;IACH,QAAQ,CAAC,IAAI,EAAE,iBAAiB,CAAC,MAAM,CAAC,CAAe;IAEvD,gDAAgD;IAChD,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAS;IAE1C;;;;OAIG;IACH,OAAO,CAAC,eAAe,CAAM;IAE7B;;;;OAIG;IACH,OAAO,CAAC,YAAY,CAAS;IAE7B;;;OAGG;IACH,OAAO,CAAC,YAAY,CAA8C;IAElE;;;;OAIG;IACH,OAAO,CAAC,WAAW,CAAuB;IAE1C;;;;OAIG;IACH,OAAO,CAAC,cAAc,CAAK;IAM3B;;;;OAIG;gBACS,OAAO,GAAE,gCAAqC;IAS1D;;;;;;;;;;;;;;;;OAgBG;IACH,cAAc,CAAC,UAAU,EAAE,eAAe,GAAG,IAAI;IA4BjD;;;;;;;;;;;;;;;;;;;OAmBG;IACH,YAAY,CAAC,KAAK,EAAE,QAAQ,GAAG,IAAI;IAgDnC;;;;;;;OAOG;IACH,KAAK,IAAI,IAAI;IAYb;;;;;;;;;;;OAWG;IACH,OAAO,CAAC,iBAAiB;IAsBzB;;;;;;;;;;OAUG;IACH,OAAO,CAAC,kBAAkB;IAQ1B;;;OAGG;IACH,OAAO,CAAC,kBAAkB;CAM3B"}
|