@framers/agentos 0.1.74 → 0.1.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -34
- package/dist/core/agency/AgentCommunicationBus.d.ts +1 -0
- package/dist/core/agency/AgentCommunicationBus.d.ts.map +1 -1
- package/dist/core/agency/AgentCommunicationBus.js +62 -8
- package/dist/core/agency/AgentCommunicationBus.js.map +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts.map +1 -1
- package/dist/orchestration/builders/index.d.ts +1 -1
- package/dist/orchestration/builders/index.d.ts.map +1 -1
- package/dist/orchestration/builders/index.js +1 -1
- package/dist/orchestration/builders/index.js.map +1 -1
- package/dist/orchestration/builders/nodes.d.ts +15 -0
- package/dist/orchestration/builders/nodes.d.ts.map +1 -1
- package/dist/orchestration/builders/nodes.js +33 -0
- package/dist/orchestration/builders/nodes.js.map +1 -1
- package/dist/orchestration/runtime/LoopController.d.ts +10 -10
- package/dist/orchestration/runtime/LoopController.d.ts.map +1 -1
- package/dist/orchestration/runtime/LoopController.js +1 -1
- package/dist/orchestration/runtime/LoopController.js.map +1 -1
- package/dist/orchestration/runtime/index.d.ts +1 -1
- package/dist/orchestration/runtime/index.d.ts.map +1 -1
- package/dist/orchestration/runtime/index.js.map +1 -1
- package/dist/speech/FallbackProxy.d.ts +104 -0
- package/dist/speech/FallbackProxy.d.ts.map +1 -0
- package/dist/speech/FallbackProxy.js +151 -0
- package/dist/speech/FallbackProxy.js.map +1 -0
- package/dist/speech/SpeechProviderResolver.d.ts +103 -0
- package/dist/speech/SpeechProviderResolver.d.ts.map +1 -0
- package/dist/speech/SpeechProviderResolver.js +256 -0
- package/dist/speech/SpeechProviderResolver.js.map +1 -0
- package/dist/speech/SpeechRuntime.d.ts +23 -1
- package/dist/speech/SpeechRuntime.d.ts.map +1 -1
- package/dist/speech/SpeechRuntime.js +82 -8
- package/dist/speech/SpeechRuntime.js.map +1 -1
- package/dist/speech/index.d.ts +6 -0
- package/dist/speech/index.d.ts.map +1 -1
- package/dist/speech/index.js +6 -0
- package/dist/speech/index.js.map +1 -1
- package/dist/speech/providerCatalog.d.ts.map +1 -1
- package/dist/speech/providerCatalog.js +15 -1
- package/dist/speech/providerCatalog.js.map +1 -1
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts +49 -0
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js +151 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts +48 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js +90 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +60 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js +127 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +55 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js +102 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -0
- package/dist/speech/types.d.ts +35 -0
- package/dist/speech/types.d.ts.map +1 -1
- package/dist/voice/CallManager.d.ts +1 -1
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -0
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +83 -0
- package/dist/voice/MediaStreamParser.d.ts.map +1 -0
- package/dist/voice/MediaStreamParser.js +2 -0
- package/dist/voice/MediaStreamParser.js.map +1 -0
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -0
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -0
- package/dist/voice/TelephonyStreamTransport.js +208 -0
- package/dist/voice/TelephonyStreamTransport.js.map +1 -0
- package/dist/voice/index.d.ts +10 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +11 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +43 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js +92 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +51 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +103 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +50 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js +144 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -0
- package/dist/voice/providers/plivo.d.ts +77 -0
- package/dist/voice/providers/plivo.d.ts.map +1 -0
- package/dist/voice/providers/plivo.js +180 -0
- package/dist/voice/providers/plivo.js.map +1 -0
- package/dist/voice/providers/telnyx.d.ts +93 -0
- package/dist/voice/providers/telnyx.d.ts.map +1 -0
- package/dist/voice/providers/telnyx.js +193 -0
- package/dist/voice/providers/telnyx.js.map +1 -0
- package/dist/voice/providers/twilio.d.ts +79 -0
- package/dist/voice/providers/twilio.d.ts.map +1 -0
- package/dist/voice/providers/twilio.js +191 -0
- package/dist/voice/providers/twilio.js.map +1 -0
- package/dist/voice/twiml.d.ts +69 -0
- package/dist/voice/twiml.d.ts.map +1 -0
- package/dist/voice/twiml.js +92 -0
- package/dist/voice/twiml.js.map +1 -0
- package/dist/voice/types.d.ts +9 -1
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts +90 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js +123 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts +67 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js +55 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts +128 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js +240 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts +96 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js +69 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts +122 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js +317 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +148 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js +207 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -0
- package/dist/voice-pipeline/index.d.ts +13 -0
- package/dist/voice-pipeline/index.d.ts.map +1 -0
- package/dist/voice-pipeline/index.js +13 -0
- package/dist/voice-pipeline/index.js.map +1 -0
- package/dist/voice-pipeline/types.d.ts +905 -0
- package/dist/voice-pipeline/types.d.ts.map +1 -0
- package/dist/voice-pipeline/types.js +23 -0
- package/dist/voice-pipeline/types.js.map +1 -0
- package/package.json +6 -1
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/HeuristicEndpointDetector
|
|
3
|
+
*
|
|
4
|
+
* A lightweight, rule-based endpoint detector that combines terminal punctuation
|
|
5
|
+
* analysis with a configurable silence timeout to determine when the user has
|
|
6
|
+
* finished speaking. Suitable for low-latency deployments where an LLM-based
|
|
7
|
+
* semantic detector would add unacceptable round-trip overhead.
|
|
8
|
+
*
|
|
9
|
+
* Detection strategy:
|
|
10
|
+
* 1. On `speech_end`, if the accumulated final transcript ends with `.`, `?`, or `!`,
|
|
11
|
+
* fire `turn_complete` immediately with reason `'punctuation'`.
|
|
12
|
+
* 2. Otherwise, start a silence timer (default 1 500 ms). If speech does not
|
|
13
|
+
* resume before the timer fires, emit `turn_complete` with reason `'silence_timeout'`.
|
|
14
|
+
* 3. Backchannel phrases (e.g. "uh huh", "yeah") are recognised, suppressed from
|
|
15
|
+
* accumulation, and re-emitted as `'backchannel_detected'` events so the
|
|
16
|
+
* pipeline can decide whether to suppress an agent response.
|
|
17
|
+
*/
|
|
18
|
+
import { EventEmitter } from 'node:events';
|
|
19
|
+
// ---------------------------------------------------------------------------
|
|
20
|
+
// Constants
|
|
21
|
+
// ---------------------------------------------------------------------------
|
|
22
|
+
/**
|
|
23
|
+
* Default silence duration (ms) after speech stops before firing `turn_complete`.
|
|
24
|
+
*/
|
|
25
|
+
const DEFAULT_SILENCE_TIMEOUT_MS = 1500;
|
|
26
|
+
/**
|
|
27
|
+
* Terminal punctuation characters that signal sentence completion.
|
|
28
|
+
*/
|
|
29
|
+
const TERMINAL_PUNCTUATION = /[.?!]$/;
|
|
30
|
+
/**
|
|
31
|
+
* Normalised backchannel phrases that indicate the listener is acknowledging
|
|
32
|
+
* but not taking a full conversational turn. Compared after `.trim().toLowerCase()`.
|
|
33
|
+
*/
|
|
34
|
+
const BACKCHANNEL_PHRASES = new Set([
|
|
35
|
+
'uh huh',
|
|
36
|
+
'yeah',
|
|
37
|
+
'okay',
|
|
38
|
+
'ok',
|
|
39
|
+
'mm hmm',
|
|
40
|
+
'mmhmm',
|
|
41
|
+
'mhm',
|
|
42
|
+
'mm-hmm',
|
|
43
|
+
'right',
|
|
44
|
+
'sure',
|
|
45
|
+
'yep',
|
|
46
|
+
'yup',
|
|
47
|
+
'gotcha',
|
|
48
|
+
]);
|
|
49
|
+
// ---------------------------------------------------------------------------
|
|
50
|
+
// Implementation
|
|
51
|
+
// ---------------------------------------------------------------------------
|
|
52
|
+
/**
|
|
53
|
+
* Heuristic endpoint detector that uses terminal punctuation and a silence
|
|
54
|
+
* timeout to decide when the user's turn is complete.
|
|
55
|
+
*
|
|
56
|
+
* Emits:
|
|
57
|
+
* - `'turn_complete'` ({@link TurnCompleteEvent}) — user turn has ended.
|
|
58
|
+
* - `'backchannel_detected'` (`{ text: string }`) — a backchannel phrase was
|
|
59
|
+
* recognised; accumulation is suppressed for this utterance.
|
|
60
|
+
*
|
|
61
|
+
* @example
|
|
62
|
+
* ```typescript
|
|
63
|
+
* const detector = new HeuristicEndpointDetector({ silenceTimeoutMs: 1000 });
|
|
64
|
+
* detector.on('turn_complete', (event) => console.log('Turn done:', event));
|
|
65
|
+
* detector.pushTranscript({ text: 'Hello there.', isFinal: true, confidence: 0.95, words: [] });
|
|
66
|
+
* detector.pushVadEvent({ type: 'speech_end', timestamp: Date.now(), source: 'vad' });
|
|
67
|
+
* // → 'turn_complete' fires immediately with reason 'punctuation'
|
|
68
|
+
* ```
|
|
69
|
+
*/
|
|
70
|
+
export class HeuristicEndpointDetector extends EventEmitter {
|
|
71
|
+
// ---------------------------------------------------------------------------
|
|
72
|
+
// Constructor
|
|
73
|
+
// ---------------------------------------------------------------------------
|
|
74
|
+
/**
|
|
75
|
+
* Create a new {@link HeuristicEndpointDetector}.
|
|
76
|
+
*
|
|
77
|
+
* @param options — Optional configuration overrides.
|
|
78
|
+
*/
|
|
79
|
+
constructor(options = {}) {
|
|
80
|
+
super();
|
|
81
|
+
/**
|
|
82
|
+
* Active detection strategy label.
|
|
83
|
+
* Typed as `'hybrid'` to satisfy {@link IEndpointDetector.mode}; consumers
|
|
84
|
+
* that need to distinguish heuristic detectors may inspect `instanceof`.
|
|
85
|
+
*/
|
|
86
|
+
this.mode = 'heuristic';
|
|
87
|
+
/** The latest final transcript text accumulated for the current turn. */
|
|
88
|
+
this.accumulatedText = '';
|
|
89
|
+
/** Whether the VAD currently reports active speech. */
|
|
90
|
+
this.speechActive = false;
|
|
91
|
+
/** Handle to a pending silence timeout, or `null` if none is running. */
|
|
92
|
+
this.silenceTimer = null;
|
|
93
|
+
/** Wall-clock timestamp (ms) when the current turn's speech started. */
|
|
94
|
+
this.turnStartMs = null;
|
|
95
|
+
/** Confidence of the most recent final transcript. */
|
|
96
|
+
this.lastConfidence = 1;
|
|
97
|
+
this.silenceTimeoutMs = options.silenceTimeoutMs ?? DEFAULT_SILENCE_TIMEOUT_MS;
|
|
98
|
+
}
|
|
99
|
+
// ---------------------------------------------------------------------------
|
|
100
|
+
// IEndpointDetector — pushTranscript
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
/**
|
|
103
|
+
* Ingest a transcript event from the upstream STT session.
|
|
104
|
+
*
|
|
105
|
+
* Only final events (`isFinal: true`) affect internal state. Interim results
|
|
106
|
+
* are silently ignored — they may arrive very frequently and their text is
|
|
107
|
+
* unstable.
|
|
108
|
+
*
|
|
109
|
+
* If the final text is a recognised backchannel phrase the detector emits
|
|
110
|
+
* `'backchannel_detected'` and returns without accumulating the text, so that
|
|
111
|
+
* a subsequent `speech_end` event does not trigger `turn_complete`.
|
|
112
|
+
*
|
|
113
|
+
* @param transcript — Transcript event from the STT session.
|
|
114
|
+
*/
|
|
115
|
+
pushTranscript(transcript) {
|
|
116
|
+
if (!transcript.isFinal) {
|
|
117
|
+
// Ignore partial/interim hypotheses — they will be superseded.
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
const text = transcript.text;
|
|
121
|
+
const normalised = text.trim().toLowerCase();
|
|
122
|
+
// Detect backchannel acknowledgements before accumulating.
|
|
123
|
+
if (BACKCHANNEL_PHRASES.has(normalised)) {
|
|
124
|
+
this.emit('backchannel_detected', { text });
|
|
125
|
+
return;
|
|
126
|
+
}
|
|
127
|
+
// Accumulate the final transcript and store the confidence score.
|
|
128
|
+
this.accumulatedText = text;
|
|
129
|
+
this.lastConfidence = transcript.confidence;
|
|
130
|
+
}
|
|
131
|
+
// ---------------------------------------------------------------------------
|
|
132
|
+
// IEndpointDetector — pushVadEvent
|
|
133
|
+
// ---------------------------------------------------------------------------
|
|
134
|
+
/**
|
|
135
|
+
* Ingest a VAD (voice activity detection) event.
|
|
136
|
+
*
|
|
137
|
+
* - `speech_start`: marks the turn as active and cancels any pending silence
|
|
138
|
+
* timer (the user resumed speaking before the timeout elapsed).
|
|
139
|
+
* - `speech_end`: if accumulated text is available, either fires
|
|
140
|
+
* `turn_complete` immediately (punctuation) or starts the silence timer.
|
|
141
|
+
* - `silence`: heartbeat events are ignored; only explicit `speech_end`
|
|
142
|
+
* drives the timeout logic.
|
|
143
|
+
*
|
|
144
|
+
* @param event — VAD transition event.
|
|
145
|
+
*/
|
|
146
|
+
pushVadEvent(event) {
|
|
147
|
+
switch (event.type) {
|
|
148
|
+
case 'speech_start': {
|
|
149
|
+
this.speechActive = true;
|
|
150
|
+
this._clearSilenceTimer();
|
|
151
|
+
if (this.turnStartMs === null) {
|
|
152
|
+
this.turnStartMs = event.timestamp;
|
|
153
|
+
}
|
|
154
|
+
break;
|
|
155
|
+
}
|
|
156
|
+
case 'speech_end': {
|
|
157
|
+
this.speechActive = false;
|
|
158
|
+
if (!this.accumulatedText) {
|
|
159
|
+
// Nothing to flush — no transcript arrived yet.
|
|
160
|
+
break;
|
|
161
|
+
}
|
|
162
|
+
if (TERMINAL_PUNCTUATION.test(this.accumulatedText)) {
|
|
163
|
+
// Sentence-terminal punctuation → fire immediately.
|
|
164
|
+
this._emitTurnComplete('punctuation', event.timestamp);
|
|
165
|
+
}
|
|
166
|
+
else {
|
|
167
|
+
// No punctuation → wait for silence timeout.
|
|
168
|
+
this._startSilenceTimer(event.timestamp);
|
|
169
|
+
}
|
|
170
|
+
break;
|
|
171
|
+
}
|
|
172
|
+
case 'silence': {
|
|
173
|
+
// Periodic heartbeat — no action required; the silence timer already
|
|
174
|
+
// handles the delayed fire if one is pending.
|
|
175
|
+
break;
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
// ---------------------------------------------------------------------------
|
|
180
|
+
// IEndpointDetector — reset
|
|
181
|
+
// ---------------------------------------------------------------------------
|
|
182
|
+
/**
|
|
183
|
+
* Reset all internal state, cancel pending timers, and prepare the detector
|
|
184
|
+
* for the next user turn. Should be called by the pipeline after each
|
|
185
|
+
* `turn_complete` event before audio for the next turn begins to arrive.
|
|
186
|
+
*/
|
|
187
|
+
reset() {
|
|
188
|
+
this._clearSilenceTimer();
|
|
189
|
+
this.accumulatedText = '';
|
|
190
|
+
this.speechActive = false;
|
|
191
|
+
this.turnStartMs = null;
|
|
192
|
+
this.lastConfidence = 1;
|
|
193
|
+
}
|
|
194
|
+
// ---------------------------------------------------------------------------
|
|
195
|
+
// Private helpers
|
|
196
|
+
// ---------------------------------------------------------------------------
|
|
197
|
+
/**
|
|
198
|
+
* Emit `turn_complete` with the currently accumulated transcript and then
|
|
199
|
+
* reset internal state so the detector is ready for the next turn.
|
|
200
|
+
*
|
|
201
|
+
* @param reason — The semantic reason driving this completion.
|
|
202
|
+
* @param speechEndTimestamp — Unix epoch ms timestamp of the `speech_end` event,
|
|
203
|
+
* used to compute `durationMs`.
|
|
204
|
+
*/
|
|
205
|
+
_emitTurnComplete(reason, speechEndTimestamp) {
|
|
206
|
+
const durationMs = this.turnStartMs !== null ? speechEndTimestamp - this.turnStartMs : 0;
|
|
207
|
+
const event = {
|
|
208
|
+
transcript: this.accumulatedText,
|
|
209
|
+
confidence: this.lastConfidence,
|
|
210
|
+
durationMs,
|
|
211
|
+
reason,
|
|
212
|
+
};
|
|
213
|
+
// Reset before emitting so that any re-entrant listeners see clean state.
|
|
214
|
+
this.reset();
|
|
215
|
+
this.emit('turn_complete', event);
|
|
216
|
+
}
|
|
217
|
+
/**
|
|
218
|
+
* Start the silence-timeout timer. If the user does not resume speaking
|
|
219
|
+
* within {@link silenceTimeoutMs} ms the detector fires `turn_complete`.
|
|
220
|
+
*
|
|
221
|
+
* @param speechEndTimestamp — Timestamp passed through to `_emitTurnComplete`.
|
|
222
|
+
*/
|
|
223
|
+
_startSilenceTimer(speechEndTimestamp) {
|
|
224
|
+
this._clearSilenceTimer();
|
|
225
|
+
this.silenceTimer = setTimeout(() => {
|
|
226
|
+
this.silenceTimer = null;
|
|
227
|
+
this._emitTurnComplete('silence_timeout', speechEndTimestamp);
|
|
228
|
+
}, this.silenceTimeoutMs);
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Cancel a pending silence timer without any side effects.
|
|
232
|
+
*/
|
|
233
|
+
_clearSilenceTimer() {
|
|
234
|
+
if (this.silenceTimer !== null) {
|
|
235
|
+
clearTimeout(this.silenceTimer);
|
|
236
|
+
this.silenceTimer = null;
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
//# sourceMappingURL=HeuristicEndpointDetector.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"HeuristicEndpointDetector.js","sourceRoot":"","sources":["../../src/voice-pipeline/HeuristicEndpointDetector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAQ3C,8EAA8E;AAC9E,YAAY;AACZ,8EAA8E;AAE9E;;GAEG;AACH,MAAM,0BAA0B,GAAG,IAAK,CAAC;AAEzC;;GAEG;AACH,MAAM,oBAAoB,GAAG,QAAQ,CAAC;AAEtC;;;GAGG;AACH,MAAM,mBAAmB,GAAG,IAAI,GAAG,CAAC;IAClC,QAAQ;IACR,MAAM;IACN,MAAM;IACN,IAAI;IACJ,QAAQ;IACR,OAAO;IACP,KAAK;IACL,QAAQ;IACR,OAAO;IACP,MAAM;IACN,KAAK;IACL,KAAK;IACL,QAAQ;CACT,CAAC,CAAC;AAkBH,8EAA8E;AAC9E,iBAAiB;AACjB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;GAiBG;AACH,MAAM,OAAO,yBACX,SAAQ,YAAY;IA4BpB,8EAA8E;IAC9E,cAAc;IACd,8EAA8E;IAE9E;;;;OAIG;IACH,YAAY,UAA4C,EAAE;QACxD,KAAK,EAAE,CAAC;QAnCV;;;;WAIG;QACM,SAAI,GAA8B,WAAW,CAAC;QAKvD,yEAAyE;QACjE,oBAAe,GAAG,EAAE,CAAC;QAE7B,uDAAuD;QAC/C,iBAAY,GAAG,KAAK,CAAC;QAE7B,yEAAyE;QACjE,iBAAY,GAAyC,IAAI,CAAC;QAElE,wEAAwE;QAChE,gBAAW,GAAkB,IAAI,CAAC;QAE1C,sDAAsD;QAC9C,mBAAc,GAAG,CAAC,CAAC;QAazB,IAAI,CAAC,gBAAgB,GAAG,OAAO,CAAC,gBAAgB,IAAI,0BAA0B,CAAC;IACjF,CAAC;IAED,8EAA8E;IAC9E,qCAAqC;IACrC,8EAA8E;IAE9E;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,UAA2B;QACxC,IAAI,CAAC,UAAU,CAAC,OAAO,EAAE,CAAC;YACxB,+DAA+D;YAC/D,OAAO;QACT,CAAC;QAED,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,CAAC;QAC7B,MAAM,UAAU,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC;QAE7C,2DAA2D;QAC3D,IAAI,mBAAmB,CAAC,GAAG,CAAC,UAAU,CAAC,EAAE,CAAC;YACxC,IAAI,CAAC,IAAI,CAAC,sBAAsB,EAAE,EAAE,IAAI,EAAE,CAAC,CAAC;YAC5C,OAAO;QACT,CAAC;QAED,kEAAkE;QAClE,IAAI,CAAC,eAAe,GAAG,IAAI,CAAC;QAC5B,IAAI,CAAC,cAAc,GAAG,UAAU,CAAC,UAAU,CAAC;IAC9C,CAAC;IAED,8EAA8E;IAC9E,mCAAmC;IACnC,8EAA8E;IAE9E;;;;;;;;;;;OAWG;IACH,YAAY,CAAC,KAAe;QAC1B,QAAQ,KAAK,CAAC,IAAI,EAAE,CAAC;YACnB,KAAK,cAAc,CAAC,CAAC,CAAC;gBACpB,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;gBACzB,IAAI,CAAC,kBAAkB,EAAE,CAAC;gBAC1B,IAAI,IAAI,CAAC,WAAW,KAAK,IAAI,EAAE,CAAC;oBAC9B,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC,SAAS,CAAC;gBACrC,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,YAAY,CAAC,CAAC,CAAC;gBAClB,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;gBAE1B,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC;oBAC1B,gDAAgD;oBAChD,MAAM;gBACR,CAAC;gBAED,IAAI,oBAAoB,CAAC,IAAI,CAAC,IAAI,CAAC,eAAe,CAAC,EAAE,CAAC;oBACpD,oDAAoD;oBACpD,IAAI,CAAC,iBAAiB,CAAC,aAAa,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;gBACzD,CAAC;qBAAM,CAAC;oBACN,6CAA6C;oBAC7C,IAAI,CAAC,kBAAkB,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;gBAC3C,CAAC;gBACD,MAAM;YACR,CAAC;YAED,KAAK,SAAS,CAAC,CAAC,CAAC;gBACf,qEAAqE;gBACrE,8CAA8C;gBAC9C,MAAM;YACR,CAAC;QACH,CAAC;IACH,CAAC;IAED,8EAA8E;IAC9E,4BAA4B;IAC5B,8EAA8E;IAE9E;;;;OAIG;IACH,KAAK;QACH,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1B,IAAI,CAAC,eAAe,GAAG,EAAE,CAAC;QAC1B,IAAI,CAAC,YAAY,GAAG,KAAK,CAAC;QAC1B,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QACxB,IAAI,CAAC,cAAc,GAAG,CAAC,CAAC;IAC1B,CAAC;IAED,8EAA8E;IAC9E,kBAAkB;IAClB,8EAA8E;IAE9E;;;;;;;OAOG;IACK,iBAAiB,CACvB,MAAmC,EACnC,kBAA0B;QAE1B,MAAM,UAAU,GACd,IAAI,CAAC,WAAW,KAAK,IAAI,CAAC,CAAC,CAAC,kBAAkB,GAAG,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,CAAC,CAAC;QAExE,MAAM,KAAK,GAAsB;YAC/B,UAAU,EAAE,IAAI,CAAC,eAAe;YAChC,UAAU,EAAE,IAAI,CAAC,cAAc;YAC/B,UAAU;YACV,MAAM;SACP,CAAC;QAEF,0EAA0E;QAC1E,IAAI,CAAC,KAAK,EAAE,CAAC;QAEb,IAAI,CAAC,IAAI,CAAC,eAAe,EAAE,KAAK,CAAC,CAAC;IACpC,CAAC;IAED;;;;;OAKG;IACK,kBAAkB,CAAC,kBAA0B;QACnD,IAAI,CAAC,kBAAkB,EAAE,CAAC;QAC1B,IAAI,CAAC,YAAY,GAAG,UAAU,CAAC,GAAG,EAAE;YAClC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;YACzB,IAAI,CAAC,iBAAiB,CAAC,iBAAiB,EAAE,kBAAkB,CAAC,CAAC;QAChE,CAAC,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,kBAAkB;QACxB,IAAI,IAAI,CAAC,YAAY,KAAK,IAAI,EAAE,CAAC;YAC/B,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;YAChC,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;QAC3B,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/SoftFadeBargeinHandler
|
|
3
|
+
*
|
|
4
|
+
* Implements a three-tier soft-fade barge-in policy.
|
|
5
|
+
*
|
|
6
|
+
* Very short speech detections (< `ignoreMs`) are dismissed as noise.
|
|
7
|
+
* Medium-length detections trigger a fade-out pause so the user can speak
|
|
8
|
+
* without an abrupt cut. Long detections (>= `cancelMs`) stop playback
|
|
9
|
+
* outright and inject a conversation marker.
|
|
10
|
+
*/
|
|
11
|
+
import type { BargeinAction, BargeinContext, IBargeinHandler } from './types.js';
|
|
12
|
+
/**
|
|
13
|
+
* Construction options for {@link SoftFadeBargeinHandler}.
|
|
14
|
+
*/
|
|
15
|
+
export interface SoftFadeBargeinHandlerOptions {
|
|
16
|
+
/**
|
|
17
|
+
* Speech duration threshold in milliseconds below which the barge-in is
|
|
18
|
+
* treated as accidental noise and ignored.
|
|
19
|
+
*
|
|
20
|
+
* @defaultValue 100
|
|
21
|
+
*/
|
|
22
|
+
ignoreMs?: number;
|
|
23
|
+
/**
|
|
24
|
+
* Speech duration threshold in milliseconds at or above which the barge-in
|
|
25
|
+
* triggers an immediate cancel rather than a fade-out pause. Must be greater
|
|
26
|
+
* than `ignoreMs` for the fade region to exist.
|
|
27
|
+
*
|
|
28
|
+
* @defaultValue 2000
|
|
29
|
+
*/
|
|
30
|
+
cancelMs?: number;
|
|
31
|
+
/**
|
|
32
|
+
* Duration of the TTS fade-out in milliseconds applied when the speech
|
|
33
|
+
* duration falls in the range `[ignoreMs, cancelMs)`.
|
|
34
|
+
*
|
|
35
|
+
* @defaultValue 200
|
|
36
|
+
*/
|
|
37
|
+
fadeMs?: number;
|
|
38
|
+
}
|
|
39
|
+
/**
|
|
40
|
+
* Barge-in handler that applies a three-tier soft-fade strategy.
|
|
41
|
+
*
|
|
42
|
+
* The handler maps the confirmed speech duration to one of three actions:
|
|
43
|
+
*
|
|
44
|
+
* | Speech duration | Action |
|
|
45
|
+
* |--------------------------|---------------------------------------------|
|
|
46
|
+
* | `< ignoreMs` | `ignore` — noise, continue TTS uninterrupted |
|
|
47
|
+
* | `>= ignoreMs < cancelMs` | `pause` with `fadeMs` fade-out |
|
|
48
|
+
* | `>= cancelMs` | `cancel` with `'[interrupted]'` marker |
|
|
49
|
+
*
|
|
50
|
+
* @example
|
|
51
|
+
* ```ts
|
|
52
|
+
* const handler = new SoftFadeBargeinHandler({ ignoreMs: 80, cancelMs: 1500, fadeMs: 150 });
|
|
53
|
+
* handler.handleBargein({ speechDurationMs: 500, ... }); // { type: 'pause', fadeMs: 150 }
|
|
54
|
+
* handler.handleBargein({ speechDurationMs: 1600, ... }); // { type: 'cancel', injectMarker: '[interrupted]' }
|
|
55
|
+
* handler.handleBargein({ speechDurationMs: 30, ... }); // { type: 'ignore' }
|
|
56
|
+
* ```
|
|
57
|
+
*/
|
|
58
|
+
export declare class SoftFadeBargeinHandler implements IBargeinHandler {
|
|
59
|
+
/**
|
|
60
|
+
* The interruption strategy implemented by this handler.
|
|
61
|
+
* Always `'soft-fade'`.
|
|
62
|
+
*/
|
|
63
|
+
readonly mode: "soft-fade";
|
|
64
|
+
/**
|
|
65
|
+
* Speech duration below which the barge-in is dismissed as noise.
|
|
66
|
+
*/
|
|
67
|
+
private readonly ignoreMs;
|
|
68
|
+
/**
|
|
69
|
+
* Speech duration at or above which the barge-in escalates to a full cancel.
|
|
70
|
+
*/
|
|
71
|
+
private readonly cancelMs;
|
|
72
|
+
/**
|
|
73
|
+
* Duration of the TTS audio fade-out applied during a `'pause'` action.
|
|
74
|
+
*/
|
|
75
|
+
private readonly fadeMs;
|
|
76
|
+
/**
|
|
77
|
+
* Constructs a new {@link SoftFadeBargeinHandler}.
|
|
78
|
+
*
|
|
79
|
+
* @param options - Optional configuration. Defaults to
|
|
80
|
+
* `{ ignoreMs: 100, cancelMs: 2000, fadeMs: 200 }`.
|
|
81
|
+
*/
|
|
82
|
+
constructor(options?: SoftFadeBargeinHandlerOptions);
|
|
83
|
+
/**
|
|
84
|
+
* Evaluate the barge-in context and return the pipeline action.
|
|
85
|
+
*
|
|
86
|
+
* Decision tree (evaluated in order):
|
|
87
|
+
* 1. `speechDurationMs < ignoreMs` → `{ type: 'ignore' }`
|
|
88
|
+
* 2. `speechDurationMs >= cancelMs` → `{ type: 'cancel', injectMarker: '[interrupted]' }`
|
|
89
|
+
* 3. Otherwise → `{ type: 'pause', fadeMs }`
|
|
90
|
+
*
|
|
91
|
+
* @param context - Snapshot of the barge-in state at the moment of detection.
|
|
92
|
+
* @returns The pipeline action to execute.
|
|
93
|
+
*/
|
|
94
|
+
handleBargein(context: BargeinContext): BargeinAction;
|
|
95
|
+
}
|
|
96
|
+
//# sourceMappingURL=SoftFadeBargeinHandler.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SoftFadeBargeinHandler.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/SoftFadeBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAEH,OAAO,KAAK,EAAE,aAAa,EAAE,cAAc,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAEjF;;GAEG;AACH,MAAM,WAAW,6BAA6B;IAC5C;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;;OAMG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC;IAElB;;;;;OAKG;IACH,MAAM,CAAC,EAAE,MAAM,CAAC;CACjB;AAED;;;;;;;;;;;;;;;;;;GAkBG;AACH,qBAAa,sBAAuB,YAAW,eAAe;IAC5D;;;OAGG;IACH,QAAQ,CAAC,IAAI,EAAG,WAAW,CAAU;IAErC;;OAEG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAElC;;OAEG;IACH,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAElC;;OAEG;IACH,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAEhC;;;;;OAKG;gBACS,OAAO,GAAE,6BAAkC;IAMvD;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAO,EAAE,cAAc,GAAG,aAAa;CAatD"}
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/SoftFadeBargeinHandler
|
|
3
|
+
*
|
|
4
|
+
* Implements a three-tier soft-fade barge-in policy.
|
|
5
|
+
*
|
|
6
|
+
* Very short speech detections (< `ignoreMs`) are dismissed as noise.
|
|
7
|
+
* Medium-length detections trigger a fade-out pause so the user can speak
|
|
8
|
+
* without an abrupt cut. Long detections (>= `cancelMs`) stop playback
|
|
9
|
+
* outright and inject a conversation marker.
|
|
10
|
+
*/
|
|
11
|
+
/**
|
|
12
|
+
* Barge-in handler that applies a three-tier soft-fade strategy.
|
|
13
|
+
*
|
|
14
|
+
* The handler maps the confirmed speech duration to one of three actions:
|
|
15
|
+
*
|
|
16
|
+
* | Speech duration | Action |
|
|
17
|
+
* |--------------------------|---------------------------------------------|
|
|
18
|
+
* | `< ignoreMs` | `ignore` — noise, continue TTS uninterrupted |
|
|
19
|
+
* | `>= ignoreMs < cancelMs` | `pause` with `fadeMs` fade-out |
|
|
20
|
+
* | `>= cancelMs` | `cancel` with `'[interrupted]'` marker |
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* ```ts
|
|
24
|
+
* const handler = new SoftFadeBargeinHandler({ ignoreMs: 80, cancelMs: 1500, fadeMs: 150 });
|
|
25
|
+
* handler.handleBargein({ speechDurationMs: 500, ... }); // { type: 'pause', fadeMs: 150 }
|
|
26
|
+
* handler.handleBargein({ speechDurationMs: 1600, ... }); // { type: 'cancel', injectMarker: '[interrupted]' }
|
|
27
|
+
* handler.handleBargein({ speechDurationMs: 30, ... }); // { type: 'ignore' }
|
|
28
|
+
* ```
|
|
29
|
+
*/
|
|
30
|
+
export class SoftFadeBargeinHandler {
|
|
31
|
+
/**
|
|
32
|
+
* Constructs a new {@link SoftFadeBargeinHandler}.
|
|
33
|
+
*
|
|
34
|
+
* @param options - Optional configuration. Defaults to
|
|
35
|
+
* `{ ignoreMs: 100, cancelMs: 2000, fadeMs: 200 }`.
|
|
36
|
+
*/
|
|
37
|
+
constructor(options = {}) {
|
|
38
|
+
/**
|
|
39
|
+
* The interruption strategy implemented by this handler.
|
|
40
|
+
* Always `'soft-fade'`.
|
|
41
|
+
*/
|
|
42
|
+
this.mode = 'soft-fade';
|
|
43
|
+
this.ignoreMs = options.ignoreMs ?? 100;
|
|
44
|
+
this.cancelMs = options.cancelMs ?? 2000;
|
|
45
|
+
this.fadeMs = options.fadeMs ?? 200;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* Evaluate the barge-in context and return the pipeline action.
|
|
49
|
+
*
|
|
50
|
+
* Decision tree (evaluated in order):
|
|
51
|
+
* 1. `speechDurationMs < ignoreMs` → `{ type: 'ignore' }`
|
|
52
|
+
* 2. `speechDurationMs >= cancelMs` → `{ type: 'cancel', injectMarker: '[interrupted]' }`
|
|
53
|
+
* 3. Otherwise → `{ type: 'pause', fadeMs }`
|
|
54
|
+
*
|
|
55
|
+
* @param context - Snapshot of the barge-in state at the moment of detection.
|
|
56
|
+
* @returns The pipeline action to execute.
|
|
57
|
+
*/
|
|
58
|
+
handleBargein(context) {
|
|
59
|
+
const { speechDurationMs } = context;
|
|
60
|
+
if (speechDurationMs < this.ignoreMs) {
|
|
61
|
+
return { type: 'ignore' };
|
|
62
|
+
}
|
|
63
|
+
if (speechDurationMs >= this.cancelMs) {
|
|
64
|
+
return { type: 'cancel', injectMarker: '[interrupted]' };
|
|
65
|
+
}
|
|
66
|
+
return { type: 'pause', fadeMs: this.fadeMs };
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
//# sourceMappingURL=SoftFadeBargeinHandler.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SoftFadeBargeinHandler.js","sourceRoot":"","sources":["../../src/voice-pipeline/SoftFadeBargeinHandler.ts"],"names":[],"mappings":"AAAA;;;;;;;;;GASG;AAkCH;;;;;;;;;;;;;;;;;;GAkBG;AACH,MAAM,OAAO,sBAAsB;IAsBjC;;;;;OAKG;IACH,YAAY,UAAyC,EAAE;QA3BvD;;;WAGG;QACM,SAAI,GAAG,WAAoB,CAAC;QAwBnC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,GAAG,CAAC;QACxC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,IAAI,CAAC;QACzC,IAAI,CAAC,MAAM,GAAG,OAAO,CAAC,MAAM,IAAI,GAAG,CAAC;IACtC,CAAC;IAED;;;;;;;;;;OAUG;IACH,aAAa,CAAC,OAAuB;QACnC,MAAM,EAAE,gBAAgB,EAAE,GAAG,OAAO,CAAC;QAErC,IAAI,gBAAgB,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YACrC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,CAAC;QAC5B,CAAC;QAED,IAAI,gBAAgB,IAAI,IAAI,CAAC,QAAQ,EAAE,CAAC;YACtC,OAAO,EAAE,IAAI,EAAE,QAAQ,EAAE,YAAY,EAAE,eAAe,EAAE,CAAC;QAC3D,CAAC;QAED,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,MAAM,EAAE,IAAI,CAAC,MAAM,EAAE,CAAC;IAChD,CAAC;CACF"}
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module voice-pipeline/VoicePipelineOrchestrator
|
|
3
|
+
*
|
|
4
|
+
* Central state machine that wires together transport, STT, endpoint detection,
|
|
5
|
+
* TTS, barge-in handling, and the agent session into a coordinated real-time
|
|
6
|
+
* voice conversation loop.
|
|
7
|
+
*
|
|
8
|
+
* State transitions:
|
|
9
|
+
* ```
|
|
10
|
+
* IDLE → startSession() → LISTENING
|
|
11
|
+
* LISTENING → turn_complete → PROCESSING
|
|
12
|
+
* PROCESSING → LLM tokens start → SPEAKING
|
|
13
|
+
* SPEAKING → TTS complete → LISTENING
|
|
14
|
+
* SPEAKING → barge-in → INTERRUPTING → LISTENING
|
|
15
|
+
* ANY → transport disconnect → CLOSED
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
import { EventEmitter } from 'node:events';
|
|
19
|
+
import type { IBargeinHandler, IDiarizationEngine, IEndpointDetector, IStreamTransport, IStreamingSTT, IStreamingTTS, IVoicePipelineAgentSession, PipelineState, VoicePipelineConfig, VoicePipelineSession } from './types.js';
|
|
20
|
+
/**
|
|
21
|
+
* Overrides for injecting pre-built components (primarily for testing).
|
|
22
|
+
* In production, components would be resolved from ExtensionManager.
|
|
23
|
+
*/
|
|
24
|
+
export interface VoicePipelineOverrides {
|
|
25
|
+
/** Pre-built streaming STT provider. */
|
|
26
|
+
streamingSTT?: IStreamingSTT;
|
|
27
|
+
/** Pre-built streaming TTS provider. */
|
|
28
|
+
streamingTTS?: IStreamingTTS;
|
|
29
|
+
/** Pre-built endpoint detector. */
|
|
30
|
+
endpointDetector?: IEndpointDetector;
|
|
31
|
+
/** Pre-built barge-in handler. */
|
|
32
|
+
bargeinHandler?: IBargeinHandler;
|
|
33
|
+
/** Pre-built diarization engine. */
|
|
34
|
+
diarizationEngine?: IDiarizationEngine;
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* VoicePipelineOrchestrator is the central state machine for the AgentOS
|
|
38
|
+
* streaming voice pipeline. It coordinates audio capture, speech recognition,
|
|
39
|
+
* endpoint detection, agent inference, text-to-speech synthesis, and barge-in
|
|
40
|
+
* handling into a seamless real-time conversation loop.
|
|
41
|
+
*
|
|
42
|
+
* Emits:
|
|
43
|
+
* - `'state_changed'` ({ from: PipelineState, to: PipelineState })
|
|
44
|
+
*/
|
|
45
|
+
export declare class VoicePipelineOrchestrator extends EventEmitter {
|
|
46
|
+
private readonly config;
|
|
47
|
+
/** Current pipeline state. */
|
|
48
|
+
private _state;
|
|
49
|
+
/** Active sub-sessions and components, set during startSession. */
|
|
50
|
+
private _sttSession;
|
|
51
|
+
private _ttsSession;
|
|
52
|
+
private _endpointDetector;
|
|
53
|
+
private _bargeinHandler;
|
|
54
|
+
private _transport;
|
|
55
|
+
private _agentSession;
|
|
56
|
+
/** Watchdog timer ID for max turn duration. */
|
|
57
|
+
private _watchdogTimer;
|
|
58
|
+
/** Tracks cumulative TTS text for barge-in context. */
|
|
59
|
+
private _currentTTSText;
|
|
60
|
+
/** Tracks cumulative played duration for barge-in context. */
|
|
61
|
+
private _currentPlayedMs;
|
|
62
|
+
/** Current pipeline state (read-only). */
|
|
63
|
+
get state(): PipelineState;
|
|
64
|
+
constructor(config: VoicePipelineConfig);
|
|
65
|
+
/**
|
|
66
|
+
* Start a voice session. Accepts pre-built components via overrides for testing.
|
|
67
|
+
* In production, components are resolved from ExtensionManager (future task).
|
|
68
|
+
*
|
|
69
|
+
* @param transport - The bidirectional audio/text stream transport.
|
|
70
|
+
* @param agentSession - The agent session adapter for turn-based conversation.
|
|
71
|
+
* @param overrides - Optional pre-built components (for testing or manual wiring).
|
|
72
|
+
* @returns A live VoicePipelineSession object.
|
|
73
|
+
*/
|
|
74
|
+
startSession(transport: IStreamTransport, agentSession: IVoicePipelineAgentSession, overrides?: VoicePipelineOverrides): Promise<VoicePipelineSession>;
|
|
75
|
+
/**
|
|
76
|
+
* Stop the current session, tearing down all sub-sessions and timers.
|
|
77
|
+
*
|
|
78
|
+
* @param reason - Optional human-readable reason for diagnostics.
|
|
79
|
+
*/
|
|
80
|
+
stopSession(reason?: string): Promise<void>;
|
|
81
|
+
/**
|
|
82
|
+
* Forward audio frames from transport to STT session.
|
|
83
|
+
*/
|
|
84
|
+
private _wireTransportToSTT;
|
|
85
|
+
/**
|
|
86
|
+
* Forward STT transcript events to endpoint detector and transport.
|
|
87
|
+
*/
|
|
88
|
+
private _wireSTTToEndpoint;
|
|
89
|
+
/**
|
|
90
|
+
* Handle turn_complete from endpoint detector — transition through
|
|
91
|
+
* PROCESSING → SPEAKING → LISTENING.
|
|
92
|
+
*/
|
|
93
|
+
private _wireTurnComplete;
|
|
94
|
+
/**
|
|
95
|
+
* Forward TTS audio chunks to the transport and handle utterance completion.
|
|
96
|
+
*/
|
|
97
|
+
private _wireTTSToTransport;
|
|
98
|
+
/**
|
|
99
|
+
* Wire barge-in detection: when speech is detected during SPEAKING state,
|
|
100
|
+
* consult the barge-in handler and act accordingly.
|
|
101
|
+
*/
|
|
102
|
+
private _wireBargein;
|
|
103
|
+
/**
|
|
104
|
+
* Handle transport disconnect — close everything.
|
|
105
|
+
*/
|
|
106
|
+
private _wireDisconnect;
|
|
107
|
+
/**
|
|
108
|
+
* Transition to a new pipeline state, emitting a `state_changed` event.
|
|
109
|
+
*/
|
|
110
|
+
private _setState;
|
|
111
|
+
/**
|
|
112
|
+
* Reset the watchdog timer for max turn duration. If the pipeline stays in
|
|
113
|
+
* LISTENING for longer than `maxTurnDurationMs` (default 30s) without a
|
|
114
|
+
* turn_complete, force a timeout.
|
|
115
|
+
*/
|
|
116
|
+
private _resetWatchdog;
|
|
117
|
+
/**
|
|
118
|
+
* Clear the watchdog timer if active.
|
|
119
|
+
*/
|
|
120
|
+
private _clearWatchdog;
|
|
121
|
+
}
|
|
122
|
+
//# sourceMappingURL=VoicePipelineOrchestrator.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"VoicePipelineOrchestrator.d.ts","sourceRoot":"","sources":["../../src/voice-pipeline/VoicePipelineOrchestrator.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAG3C,OAAO,KAAK,EAGV,eAAe,EACf,kBAAkB,EAClB,iBAAiB,EACjB,gBAAgB,EAChB,aAAa,EACb,aAAa,EACb,0BAA0B,EAC1B,aAAa,EAKb,mBAAmB,EACnB,oBAAoB,EAErB,MAAM,YAAY,CAAC;AAEpB;;;GAGG;AACH,MAAM,WAAW,sBAAsB;IACrC,wCAAwC;IACxC,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,wCAAwC;IACxC,YAAY,CAAC,EAAE,aAAa,CAAC;IAC7B,mCAAmC;IACnC,gBAAgB,CAAC,EAAE,iBAAiB,CAAC;IACrC,kCAAkC;IAClC,cAAc,CAAC,EAAE,eAAe,CAAC;IACjC,oCAAoC;IACpC,iBAAiB,CAAC,EAAE,kBAAkB,CAAC;CACxC;AAED;;;;;;;;GAQG;AACH,qBAAa,yBAA0B,SAAQ,YAAY;IA0B7C,OAAO,CAAC,QAAQ,CAAC,MAAM;IAzBnC,8BAA8B;IAC9B,OAAO,CAAC,MAAM,CAAyB;IAEvC,mEAAmE;IACnE,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,WAAW,CAAoC;IACvD,OAAO,CAAC,iBAAiB,CAAkC;IAC3D,OAAO,CAAC,eAAe,CAAgC;IACvD,OAAO,CAAC,UAAU,CAAiC;IACnD,OAAO,CAAC,aAAa,CAA2C;IAEhE,+CAA+C;IAC/C,OAAO,CAAC,cAAc,CAA8C;IAEpE,uDAAuD;IACvD,OAAO,CAAC,eAAe,CAAM;IAE7B,8DAA8D;IAC9D,OAAO,CAAC,gBAAgB,CAAK;IAE7B,0CAA0C;IAC1C,IAAI,KAAK,IAAI,aAAa,CAEzB;gBAE4B,MAAM,EAAE,mBAAmB;IAIxD;;;;;;;;OAQG;IACG,YAAY,CAChB,SAAS,EAAE,gBAAgB,EAC3B,YAAY,EAAE,0BAA0B,EACxC,SAAS,CAAC,EAAE,sBAAsB,GACjC,OAAO,CAAC,oBAAoB,CAAC;IAgEhC;;;;OAIG;IACG,WAAW,CAAC,MAAM,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,IAAI,CAAC;IAsBjD;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAS3B;;OAEG;IACH,OAAO,CAAC,kBAAkB;IAgB1B;;;OAGG;IACH,OAAO,CAAC,iBAAiB;IAuCzB;;OAEG;IACH,OAAO,CAAC,mBAAmB;IAyB3B;;;OAGG;IACH,OAAO,CAAC,YAAY;IA8CpB;;OAEG;IACH,OAAO,CAAC,eAAe;IAiBvB;;OAEG;IACH,OAAO,CAAC,SAAS;IAWjB;;;;OAIG;IACH,OAAO,CAAC,cAAc;IActB;;OAEG;IACH,OAAO,CAAC,cAAc;CAMvB"}
|