@framers/agentos 0.1.111 → 0.1.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/strategies/debate.d.ts +12 -1
- package/dist/api/strategies/debate.d.ts.map +1 -1
- package/dist/api/strategies/debate.js +41 -5
- package/dist/api/strategies/debate.js.map +1 -1
- package/dist/api/strategies/hierarchical.d.ts +15 -1
- package/dist/api/strategies/hierarchical.d.ts.map +1 -1
- package/dist/api/strategies/hierarchical.js +51 -7
- package/dist/api/strategies/hierarchical.js.map +1 -1
- package/dist/api/strategies/index.d.ts +26 -4
- package/dist/api/strategies/index.d.ts.map +1 -1
- package/dist/api/strategies/index.js +26 -4
- package/dist/api/strategies/index.js.map +1 -1
- package/dist/api/strategies/parallel.d.ts +15 -4
- package/dist/api/strategies/parallel.d.ts.map +1 -1
- package/dist/api/strategies/parallel.js +53 -16
- package/dist/api/strategies/parallel.js.map +1 -1
- package/dist/api/strategies/review-loop.d.ts +15 -1
- package/dist/api/strategies/review-loop.d.ts.map +1 -1
- package/dist/api/strategies/review-loop.js +36 -10
- package/dist/api/strategies/review-loop.js.map +1 -1
- package/dist/api/strategies/sequential.d.ts +11 -1
- package/dist/api/strategies/sequential.d.ts.map +1 -1
- package/dist/api/strategies/sequential.js +39 -8
- package/dist/api/strategies/sequential.js.map +1 -1
- package/dist/api/strategies/shared.d.ts +71 -7
- package/dist/api/strategies/shared.d.ts.map +1 -1
- package/dist/api/strategies/shared.js +89 -10
- package/dist/api/strategies/shared.js.map +1 -1
- package/dist/api/types.d.ts +54 -1
- package/dist/api/types.d.ts.map +1 -1
- package/dist/api/types.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts.map +1 -1
- package/dist/memory/facade/Memory.js +8 -0
- package/dist/memory/facade/Memory.js.map +1 -1
- package/dist/memory/facade/types.d.ts +10 -0
- package/dist/memory/facade/types.d.ts.map +1 -1
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +5 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/observation/MemoryObserver.d.ts +63 -1
- package/dist/memory/observation/MemoryObserver.d.ts.map +1 -1
- package/dist/memory/observation/MemoryObserver.js +115 -4
- package/dist/memory/observation/MemoryObserver.js.map +1 -1
- package/dist/memory/observation/ObservationCompressor.d.ts +88 -0
- package/dist/memory/observation/ObservationCompressor.d.ts.map +1 -0
- package/dist/memory/observation/ObservationCompressor.js +207 -0
- package/dist/memory/observation/ObservationCompressor.js.map +1 -0
- package/dist/memory/observation/ObservationReflector.d.ts +82 -0
- package/dist/memory/observation/ObservationReflector.d.ts.map +1 -0
- package/dist/memory/observation/ObservationReflector.js +212 -0
- package/dist/memory/observation/ObservationReflector.js.map +1 -0
- package/dist/memory/observation/temporal.d.ts +54 -0
- package/dist/memory/observation/temporal.d.ts.map +1 -0
- package/dist/memory/observation/temporal.js +115 -0
- package/dist/memory/observation/temporal.js.map +1 -0
- package/dist/orchestration/builders/VoiceNodeBuilder.d.ts +82 -25
- package/dist/orchestration/builders/VoiceNodeBuilder.d.ts.map +1 -1
- package/dist/orchestration/builders/VoiceNodeBuilder.js +86 -26
- package/dist/orchestration/builders/VoiceNodeBuilder.js.map +1 -1
- package/dist/orchestration/events/GraphEvent.d.ts +67 -5
- package/dist/orchestration/events/GraphEvent.d.ts.map +1 -1
- package/dist/orchestration/events/GraphEvent.js.map +1 -1
- package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts +102 -25
- package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceNodeExecutor.js +133 -38
- package/dist/orchestration/runtime/VoiceNodeExecutor.js.map +1 -1
- package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts +94 -32
- package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceTransportAdapter.js +82 -28
- package/dist/orchestration/runtime/VoiceTransportAdapter.js.map +1 -1
- package/dist/orchestration/runtime/VoiceTurnCollector.d.ts +73 -20
- package/dist/orchestration/runtime/VoiceTurnCollector.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceTurnCollector.js +84 -23
- package/dist/orchestration/runtime/VoiceTurnCollector.js.map +1 -1
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -1
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +115 -6
- package/dist/voice/MediaStreamParser.d.ts.map +1 -1
- package/dist/voice/MediaStreamParser.js +44 -0
- package/dist/voice/MediaStreamParser.js.map +1 -1
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -20
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -1
- package/dist/voice/TelephonyStreamTransport.js +136 -30
- package/dist/voice/TelephonyStreamTransport.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +64 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.js +67 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +55 -8
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +60 -9
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +73 -11
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.js +81 -12
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -1
- package/dist/voice/providers/plivo.d.ts +108 -12
- package/dist/voice/providers/plivo.d.ts.map +1 -1
- package/dist/voice/providers/plivo.js +106 -9
- package/dist/voice/providers/plivo.js.map +1 -1
- package/dist/voice/providers/telnyx.d.ts +110 -20
- package/dist/voice/providers/telnyx.d.ts.map +1 -1
- package/dist/voice/providers/telnyx.js +111 -20
- package/dist/voice/providers/telnyx.js.map +1 -1
- package/dist/voice/providers/twilio.d.ts +91 -13
- package/dist/voice/providers/twilio.d.ts.map +1 -1
- package/dist/voice/providers/twilio.js +94 -14
- package/dist/voice/providers/twilio.js.map +1 -1
- package/dist/voice/twiml.d.ts +70 -12
- package/dist/voice/twiml.d.ts.map +1 -1
- package/dist/voice/twiml.js +70 -12
- package/dist/voice/twiml.js.map +1 -1
- package/dist/voice/types.d.ts +142 -15
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice/types.js +34 -3
- package/dist/voice/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -3,16 +3,28 @@
|
|
|
3
3
|
* @description Bridges graph I/O to the voice pipeline when a workflow runs in
|
|
4
4
|
* voice transport mode.
|
|
5
5
|
*
|
|
6
|
+
* ## Purpose
|
|
7
|
+
*
|
|
6
8
|
* `VoiceTransportAdapter` wraps a graph's input/output cycle so that:
|
|
7
9
|
* - **Node input** is obtained by waiting for the user's next speech turn
|
|
8
10
|
* (`waitForUserTurn()` on the underlying `VoicePipelineOrchestrator`).
|
|
9
11
|
* - **Node output** is delivered to the TTS engine (`pushToTTS()` on the
|
|
10
12
|
* underlying `VoicePipelineOrchestrator`).
|
|
11
13
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
14
|
+
* ## `getNodeInput` / `deliverNodeOutput` contract
|
|
15
|
+
*
|
|
16
|
+
* - `getNodeInput(nodeId)` blocks until the transport emits a `turn_complete`
|
|
17
|
+
* event, then resolves with the transcript string. It also emits a
|
|
18
|
+
* `voice_turn_complete` GraphEvent so the runtime event bus stays in sync.
|
|
19
|
+
* - `deliverNodeOutput(nodeId, output)` sends text (or a streaming async
|
|
20
|
+
* iterable) to TTS and emits a `voice_audio` outbound GraphEvent.
|
|
21
|
+
* - Both methods throw if called before `init()`.
|
|
22
|
+
*
|
|
23
|
+
* ## Lazy initialisation
|
|
24
|
+
*
|
|
25
|
+
* The adapter is lazy -- it does not create a `VoicePipelineOrchestrator` until
|
|
26
|
+
* `init()` is called. The pipeline reference is `any` typed to avoid a hard
|
|
27
|
+
* import cycle with the voice subsystem; callers that want stronger types may cast.
|
|
16
28
|
*
|
|
17
29
|
* @example
|
|
18
30
|
* ```typescript
|
|
@@ -27,6 +39,9 @@
|
|
|
27
39
|
* await adapter.deliverNodeOutput('greet', 'Hello, how can I help you today?');
|
|
28
40
|
* await adapter.dispose();
|
|
29
41
|
* ```
|
|
42
|
+
*
|
|
43
|
+
* @see {@link VoiceNodeExecutor} -- the executor that consumes this adapter's events.
|
|
44
|
+
* @see {@link VoiceTransportConfig} -- configuration knobs forwarded to the pipeline.
|
|
30
45
|
*/
|
|
31
46
|
import type { GraphState } from '../ir/types.js';
|
|
32
47
|
import type { GraphEvent } from '../events/GraphEvent.js';
|
|
@@ -34,8 +49,21 @@ import type { GraphEvent } from '../events/GraphEvent.js';
|
|
|
34
49
|
* Configuration knobs forwarded to the voice pipeline when the adapter
|
|
35
50
|
* initialises its internal `VoicePipelineOrchestrator` instance.
|
|
36
51
|
*
|
|
37
|
-
* All fields are optional
|
|
38
|
-
* sensible library defaults.
|
|
52
|
+
* All fields are optional -- defaults are resolved from agent.config.json or
|
|
53
|
+
* sensible library defaults within the voice pipeline itself.
|
|
54
|
+
*
|
|
55
|
+
* @example
|
|
56
|
+
* ```ts
|
|
57
|
+
* const config: VoiceTransportConfig = {
|
|
58
|
+
* stt: 'deepgram',
|
|
59
|
+
* tts: 'elevenlabs',
|
|
60
|
+
* voice: 'rachel',
|
|
61
|
+
* bargeIn: 'hard-cut',
|
|
62
|
+
* endpointing: 'semantic',
|
|
63
|
+
* diarization: true,
|
|
64
|
+
* language: 'en-US',
|
|
65
|
+
* };
|
|
66
|
+
* ```
|
|
39
67
|
*/
|
|
40
68
|
export interface VoiceTransportConfig {
|
|
41
69
|
/** STT provider identifier (e.g. `'deepgram'`, `'openai'`). */
|
|
@@ -46,16 +74,16 @@ export interface VoiceTransportConfig {
|
|
|
46
74
|
voice?: string;
|
|
47
75
|
/**
|
|
48
76
|
* Barge-in handling strategy.
|
|
49
|
-
* - `'hard-cut'`
|
|
50
|
-
* - `'soft-fade'`
|
|
51
|
-
* - `'disabled'`
|
|
77
|
+
* - `'hard-cut'` -- interrupt TTS immediately when speech is detected.
|
|
78
|
+
* - `'soft-fade'` -- ramp TTS volume down before cutting.
|
|
79
|
+
* - `'disabled'` -- ignore user speech while the agent is speaking.
|
|
52
80
|
*/
|
|
53
81
|
bargeIn?: string;
|
|
54
82
|
/**
|
|
55
83
|
* Endpoint detection mode used to decide when the user has finished speaking.
|
|
56
|
-
* - `'acoustic'`
|
|
57
|
-
* - `'heuristic'`
|
|
58
|
-
* - `'semantic'`
|
|
84
|
+
* - `'acoustic'` -- energy/VAD-based detection.
|
|
85
|
+
* - `'heuristic'` -- punctuation + silence heuristics.
|
|
86
|
+
* - `'semantic'` -- LLM-assisted turn boundary detection.
|
|
59
87
|
*/
|
|
60
88
|
endpointing?: string;
|
|
61
89
|
/** Whether to enable speaker diarization for multi-speaker sessions. */
|
|
@@ -66,13 +94,26 @@ export interface VoiceTransportConfig {
|
|
|
66
94
|
/**
|
|
67
95
|
* Adapts a compiled graph's I/O cycle to the real-time voice pipeline.
|
|
68
96
|
*
|
|
69
|
-
* Lifecycle
|
|
97
|
+
* ## Lifecycle
|
|
98
|
+
*
|
|
70
99
|
* 1. Construct with {@link VoiceTransportConfig}, an `IStreamTransport`, and an
|
|
71
100
|
* event sink callback.
|
|
72
|
-
* 2. Call `init()` once before the graph starts running.
|
|
101
|
+
* 2. Call `init()` once before the graph starts running. This injects the
|
|
102
|
+
* transport into `state.scratch.voiceTransport` and emits a `voice_session`
|
|
103
|
+
* started event.
|
|
73
104
|
* 3. Use `getNodeInput()` to obtain the user's transcribed speech for a node.
|
|
105
|
+
* Blocks until the transport emits a `turn_complete` event.
|
|
74
106
|
* 4. Use `deliverNodeOutput()` to send the node's response to TTS.
|
|
75
107
|
* 5. Call `dispose()` to clean up resources when the session ends.
|
|
108
|
+
*
|
|
109
|
+
* ## Error handling
|
|
110
|
+
*
|
|
111
|
+
* Both `getNodeInput()` and `deliverNodeOutput()` throw `Error` if called
|
|
112
|
+
* before `init()`. After `dispose()`, the adapter is marked as uninitialised
|
|
113
|
+
* so subsequent calls also throw.
|
|
114
|
+
*
|
|
115
|
+
* @see {@link VoiceTransportConfig} -- the config shape forwarded to the pipeline.
|
|
116
|
+
* @see {@link VoiceNodeExecutor} -- the executor that interacts with the transport.
|
|
76
117
|
*/
|
|
77
118
|
export declare class VoiceTransportAdapter {
|
|
78
119
|
private readonly config;
|
|
@@ -84,24 +125,35 @@ export declare class VoiceTransportAdapter {
|
|
|
84
125
|
* In a full implementation this would be `VoicePipelineOrchestrator | null`.
|
|
85
126
|
*/
|
|
86
127
|
private pipeline;
|
|
87
|
-
/**
|
|
128
|
+
/**
|
|
129
|
+
* Tracks whether `init()` has been called successfully.
|
|
130
|
+
* Set to `false` by `dispose()` to prevent use-after-teardown.
|
|
131
|
+
*/
|
|
88
132
|
private initialized;
|
|
89
133
|
/**
|
|
90
|
-
*
|
|
91
|
-
*
|
|
92
|
-
* @param
|
|
134
|
+
* Creates a new VoiceTransportAdapter.
|
|
135
|
+
*
|
|
136
|
+
* @param config - Voice pipeline configuration knobs. Forwarded to the
|
|
137
|
+
* pipeline when it is initialised.
|
|
138
|
+
* @param transport - Bidirectional audio/control stream transport
|
|
139
|
+
* (`IStreamTransport`). Must be an EventEmitter that
|
|
140
|
+
* emits `turn_complete` events for `getNodeInput()`.
|
|
141
|
+
* @param eventSink - Callback receiving all `GraphEvent` values emitted by
|
|
142
|
+
* this adapter. Must not throw.
|
|
93
143
|
*/
|
|
94
144
|
constructor(config: VoiceTransportConfig, transport: any, // IStreamTransport
|
|
95
145
|
eventSink: (event: GraphEvent) => void);
|
|
96
146
|
/**
|
|
97
147
|
* Initialise the adapter.
|
|
98
148
|
*
|
|
99
|
-
* Injects the `IStreamTransport` instance into `state.scratch.voiceTransport`
|
|
100
|
-
* that graph nodes
|
|
101
|
-
*
|
|
149
|
+
* Injects the `IStreamTransport` instance into `state.scratch.voiceTransport`
|
|
150
|
+
* so that voice graph nodes (specifically {@link VoiceNodeExecutor}) can access
|
|
151
|
+
* the transport for session event subscription. Then emits a `voice_session`
|
|
152
|
+
* started event to signal that the voice session is live.
|
|
102
153
|
*
|
|
103
154
|
* Must be called exactly once before {@link getNodeInput} or
|
|
104
|
-
* {@link deliverNodeOutput}.
|
|
155
|
+
* {@link deliverNodeOutput}. Calling `init()` multiple times is safe but
|
|
156
|
+
* redundant -- the transport reference is simply overwritten.
|
|
105
157
|
*
|
|
106
158
|
* @param state - Mutable `GraphState` (or partial) for the current run.
|
|
107
159
|
* `state.scratch` is created lazily if absent.
|
|
@@ -111,39 +163,46 @@ export declare class VoiceTransportAdapter {
|
|
|
111
163
|
* Wait for the user's next speech turn and return the transcript text.
|
|
112
164
|
*
|
|
113
165
|
* In a full production implementation this delegates to
|
|
114
|
-
* `VoicePipelineOrchestrator.waitForUserTurn()`.
|
|
166
|
+
* `VoicePipelineOrchestrator.waitForUserTurn()`. In the current implementation
|
|
115
167
|
* it listens for a single `'turn_complete'` event from the underlying transport
|
|
116
168
|
* and resolves with the transcript text.
|
|
117
169
|
*
|
|
118
170
|
* Also emits a {@link GraphEvent} of type `voice_turn_complete` so that the
|
|
119
|
-
* runtime event bus stays in sync.
|
|
171
|
+
* runtime event bus stays in sync with the transport-level turn lifecycle.
|
|
120
172
|
*
|
|
121
|
-
* @param nodeId - The id of the graph node requesting input; used to tag the
|
|
173
|
+
* @param nodeId - The id of the graph node requesting input; used to tag the
|
|
174
|
+
* emitted event for downstream filtering.
|
|
122
175
|
* @returns Resolved transcript string from the user's speech turn.
|
|
123
|
-
* @throws {Error} If called before `init()`.
|
|
176
|
+
* @throws {Error} If called before `init()` or after `dispose()`.
|
|
124
177
|
*/
|
|
125
178
|
getNodeInput(nodeId: string): Promise<string>;
|
|
126
179
|
/**
|
|
127
180
|
* Deliver a node's text output to the TTS engine.
|
|
128
181
|
*
|
|
129
182
|
* Accepts either a plain `string` or an `AsyncIterable<string>` of token
|
|
130
|
-
* chunks (e.g. a streaming LLM response).
|
|
183
|
+
* chunks (e.g. a streaming LLM response). In a full production implementation
|
|
131
184
|
* this delegates to `VoicePipelineOrchestrator.pushToTTS(output)`.
|
|
132
185
|
*
|
|
133
186
|
* Emits a {@link GraphEvent} of type `voice_audio` (direction `'outbound'`)
|
|
134
187
|
* so that the runtime event bus records the TTS delivery.
|
|
135
188
|
*
|
|
136
|
-
* @param nodeId - The id of the graph node delivering the output
|
|
137
|
-
*
|
|
138
|
-
* @
|
|
189
|
+
* @param nodeId - The id of the graph node delivering the output; tags the
|
|
190
|
+
* emitted event for downstream filtering.
|
|
191
|
+
* @param _output - Text or async token stream to synthesise as speech.
|
|
192
|
+
* The underscore prefix indicates it is not yet consumed
|
|
193
|
+
* in the v1 stub implementation.
|
|
194
|
+
* @throws {Error} If called before `init()` or after `dispose()`.
|
|
139
195
|
*/
|
|
140
196
|
deliverNodeOutput(nodeId: string, _output: string | AsyncIterable<string>): Promise<void>;
|
|
141
197
|
/**
|
|
142
198
|
* Handle a user barge-in at the transport level.
|
|
143
199
|
*
|
|
144
200
|
* Should be called by the runtime or transport layer when the user starts
|
|
145
|
-
* speaking while the agent is mid-utterance.
|
|
146
|
-
* so that graph event consumers can react (e.g. cancel pending tool calls
|
|
201
|
+
* speaking while the agent is mid-utterance. Emits a `voice_barge_in` event
|
|
202
|
+
* so that graph event consumers can react (e.g. cancel pending tool calls,
|
|
203
|
+
* stop TTS playback, or reroute the graph).
|
|
204
|
+
*
|
|
205
|
+
* @see {@link VoiceInterruptError} -- the structured error used inside the graph executor.
|
|
147
206
|
*/
|
|
148
207
|
handleBargeIn(): void;
|
|
149
208
|
/**
|
|
@@ -151,6 +210,9 @@ export declare class VoiceTransportAdapter {
|
|
|
151
210
|
*
|
|
152
211
|
* Marks the adapter as uninitialised so subsequent calls to `getNodeInput()`
|
|
153
212
|
* or `deliverNodeOutput()` will throw, preventing accidental use after teardown.
|
|
213
|
+
*
|
|
214
|
+
* This method is idempotent -- calling it multiple times simply re-emits the
|
|
215
|
+
* ended event and re-sets the initialised flag.
|
|
154
216
|
*/
|
|
155
217
|
dispose(): Promise<void>;
|
|
156
218
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"VoiceTransportAdapter.d.ts","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTransportAdapter.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"VoiceTransportAdapter.d.ts","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTransportAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AAEH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,gBAAgB,CAAC;AACjD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAM1D;;;;;;;;;;;;;;;;;;;GAmBG;AACH,MAAM,WAAW,oBAAoB;IACnC,+DAA+D;IAC/D,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,iEAAiE;IACjE,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,0DAA0D;IAC1D,KAAK,CAAC,EAAE,MAAM,CAAC;IAEf;;;;;OAKG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IAErB,wEAAwE;IACxE,WAAW,CAAC,EAAE,OAAO,CAAC;IAEtB,6DAA6D;IAC7D,QAAQ,CAAC,EAAE,MAAM,CAAC;CACnB;AAMD;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,qBAAa,qBAAqB;IA0B9B,OAAO,CAAC,QAAQ,CAAC,MAAM;IACvB,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,SAAS;IA3B5B;;;;OAIG;IACH,OAAO,CAAC,QAAQ,CAAoB;IAEpC;;;OAGG;IACH,OAAO,CAAC,WAAW,CAAS;IAE5B;;;;;;;;;;OAUG;gBAEgB,MAAM,EAAE,oBAAoB,EAC5B,SAAS,EAAE,GAAG,EAAE,mBAAmB;IACnC,SAAS,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI;IAOzD;;;;;;;;;;;;;;OAcG;IACG,IAAI,CAAC,KAAK,EAAE,OAAO,CAAC,UAAU,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAcrD;;;;;;;;;;;;;;;OAeG;IACG,YAAY,CAAC,MAAM,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC;IA2BnD;;;;;;;;;;;;;;;;OAgBG;IACG,iBAAiB,CAAC,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,GAAG,aAAa,CAAC,MAAM,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAqB/F;;;;;;;;;OASG;IACH,aAAa,IAAI,IAAI;IAarB;;;;;;;;OAQG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAS/B"}
|
|
@@ -3,16 +3,28 @@
|
|
|
3
3
|
* @description Bridges graph I/O to the voice pipeline when a workflow runs in
|
|
4
4
|
* voice transport mode.
|
|
5
5
|
*
|
|
6
|
+
* ## Purpose
|
|
7
|
+
*
|
|
6
8
|
* `VoiceTransportAdapter` wraps a graph's input/output cycle so that:
|
|
7
9
|
* - **Node input** is obtained by waiting for the user's next speech turn
|
|
8
10
|
* (`waitForUserTurn()` on the underlying `VoicePipelineOrchestrator`).
|
|
9
11
|
* - **Node output** is delivered to the TTS engine (`pushToTTS()` on the
|
|
10
12
|
* underlying `VoicePipelineOrchestrator`).
|
|
11
13
|
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
14
|
+
* ## `getNodeInput` / `deliverNodeOutput` contract
|
|
15
|
+
*
|
|
16
|
+
* - `getNodeInput(nodeId)` blocks until the transport emits a `turn_complete`
|
|
17
|
+
* event, then resolves with the transcript string. It also emits a
|
|
18
|
+
* `voice_turn_complete` GraphEvent so the runtime event bus stays in sync.
|
|
19
|
+
* - `deliverNodeOutput(nodeId, output)` sends text (or a streaming async
|
|
20
|
+
* iterable) to TTS and emits a `voice_audio` outbound GraphEvent.
|
|
21
|
+
* - Both methods throw if called before `init()`.
|
|
22
|
+
*
|
|
23
|
+
* ## Lazy initialisation
|
|
24
|
+
*
|
|
25
|
+
* The adapter is lazy -- it does not create a `VoicePipelineOrchestrator` until
|
|
26
|
+
* `init()` is called. The pipeline reference is `any` typed to avoid a hard
|
|
27
|
+
* import cycle with the voice subsystem; callers that want stronger types may cast.
|
|
16
28
|
*
|
|
17
29
|
* @example
|
|
18
30
|
* ```typescript
|
|
@@ -27,6 +39,9 @@
|
|
|
27
39
|
* await adapter.deliverNodeOutput('greet', 'Hello, how can I help you today?');
|
|
28
40
|
* await adapter.dispose();
|
|
29
41
|
* ```
|
|
42
|
+
*
|
|
43
|
+
* @see {@link VoiceNodeExecutor} -- the executor that consumes this adapter's events.
|
|
44
|
+
* @see {@link VoiceTransportConfig} -- configuration knobs forwarded to the pipeline.
|
|
30
45
|
*/
|
|
31
46
|
// ---------------------------------------------------------------------------
|
|
32
47
|
// VoiceTransportAdapter
|
|
@@ -34,19 +49,38 @@
|
|
|
34
49
|
/**
|
|
35
50
|
* Adapts a compiled graph's I/O cycle to the real-time voice pipeline.
|
|
36
51
|
*
|
|
37
|
-
* Lifecycle
|
|
52
|
+
* ## Lifecycle
|
|
53
|
+
*
|
|
38
54
|
* 1. Construct with {@link VoiceTransportConfig}, an `IStreamTransport`, and an
|
|
39
55
|
* event sink callback.
|
|
40
|
-
* 2. Call `init()` once before the graph starts running.
|
|
56
|
+
* 2. Call `init()` once before the graph starts running. This injects the
|
|
57
|
+
* transport into `state.scratch.voiceTransport` and emits a `voice_session`
|
|
58
|
+
* started event.
|
|
41
59
|
* 3. Use `getNodeInput()` to obtain the user's transcribed speech for a node.
|
|
60
|
+
* Blocks until the transport emits a `turn_complete` event.
|
|
42
61
|
* 4. Use `deliverNodeOutput()` to send the node's response to TTS.
|
|
43
62
|
* 5. Call `dispose()` to clean up resources when the session ends.
|
|
63
|
+
*
|
|
64
|
+
* ## Error handling
|
|
65
|
+
*
|
|
66
|
+
* Both `getNodeInput()` and `deliverNodeOutput()` throw `Error` if called
|
|
67
|
+
* before `init()`. After `dispose()`, the adapter is marked as uninitialised
|
|
68
|
+
* so subsequent calls also throw.
|
|
69
|
+
*
|
|
70
|
+
* @see {@link VoiceTransportConfig} -- the config shape forwarded to the pipeline.
|
|
71
|
+
* @see {@link VoiceNodeExecutor} -- the executor that interacts with the transport.
|
|
44
72
|
*/
|
|
45
73
|
export class VoiceTransportAdapter {
|
|
46
74
|
/**
|
|
47
|
-
*
|
|
48
|
-
*
|
|
49
|
-
* @param
|
|
75
|
+
* Creates a new VoiceTransportAdapter.
|
|
76
|
+
*
|
|
77
|
+
* @param config - Voice pipeline configuration knobs. Forwarded to the
|
|
78
|
+
* pipeline when it is initialised.
|
|
79
|
+
* @param transport - Bidirectional audio/control stream transport
|
|
80
|
+
* (`IStreamTransport`). Must be an EventEmitter that
|
|
81
|
+
* emits `turn_complete` events for `getNodeInput()`.
|
|
82
|
+
* @param eventSink - Callback receiving all `GraphEvent` values emitted by
|
|
83
|
+
* this adapter. Must not throw.
|
|
50
84
|
*/
|
|
51
85
|
constructor(config, transport, // IStreamTransport
|
|
52
86
|
eventSink) {
|
|
@@ -59,7 +93,10 @@ export class VoiceTransportAdapter {
|
|
|
59
93
|
* In a full implementation this would be `VoicePipelineOrchestrator | null`.
|
|
60
94
|
*/
|
|
61
95
|
this.pipeline = null; // VoicePipelineOrchestrator (lazy)
|
|
62
|
-
/**
|
|
96
|
+
/**
|
|
97
|
+
* Tracks whether `init()` has been called successfully.
|
|
98
|
+
* Set to `false` by `dispose()` to prevent use-after-teardown.
|
|
99
|
+
*/
|
|
63
100
|
this.initialized = false;
|
|
64
101
|
}
|
|
65
102
|
// -------------------------------------------------------------------------
|
|
@@ -68,19 +105,22 @@ export class VoiceTransportAdapter {
|
|
|
68
105
|
/**
|
|
69
106
|
* Initialise the adapter.
|
|
70
107
|
*
|
|
71
|
-
* Injects the `IStreamTransport` instance into `state.scratch.voiceTransport`
|
|
72
|
-
* that graph nodes
|
|
73
|
-
*
|
|
108
|
+
* Injects the `IStreamTransport` instance into `state.scratch.voiceTransport`
|
|
109
|
+
* so that voice graph nodes (specifically {@link VoiceNodeExecutor}) can access
|
|
110
|
+
* the transport for session event subscription. Then emits a `voice_session`
|
|
111
|
+
* started event to signal that the voice session is live.
|
|
74
112
|
*
|
|
75
113
|
* Must be called exactly once before {@link getNodeInput} or
|
|
76
|
-
* {@link deliverNodeOutput}.
|
|
114
|
+
* {@link deliverNodeOutput}. Calling `init()` multiple times is safe but
|
|
115
|
+
* redundant -- the transport reference is simply overwritten.
|
|
77
116
|
*
|
|
78
117
|
* @param state - Mutable `GraphState` (or partial) for the current run.
|
|
79
118
|
* `state.scratch` is created lazily if absent.
|
|
80
119
|
*/
|
|
81
120
|
async init(state) {
|
|
82
121
|
var _a;
|
|
83
|
-
// Lazily create the scratch bag if the caller passed a partial state
|
|
122
|
+
// Lazily create the scratch bag if the caller passed a partial state
|
|
123
|
+
// without a pre-existing scratch object.
|
|
84
124
|
const scratch = ((_a = state).scratch ?? (_a.scratch = {}));
|
|
85
125
|
scratch.voiceTransport = this.transport;
|
|
86
126
|
this.initialized = true;
|
|
@@ -94,25 +134,29 @@ export class VoiceTransportAdapter {
|
|
|
94
134
|
* Wait for the user's next speech turn and return the transcript text.
|
|
95
135
|
*
|
|
96
136
|
* In a full production implementation this delegates to
|
|
97
|
-
* `VoicePipelineOrchestrator.waitForUserTurn()`.
|
|
137
|
+
* `VoicePipelineOrchestrator.waitForUserTurn()`. In the current implementation
|
|
98
138
|
* it listens for a single `'turn_complete'` event from the underlying transport
|
|
99
139
|
* and resolves with the transcript text.
|
|
100
140
|
*
|
|
101
141
|
* Also emits a {@link GraphEvent} of type `voice_turn_complete` so that the
|
|
102
|
-
* runtime event bus stays in sync.
|
|
142
|
+
* runtime event bus stays in sync with the transport-level turn lifecycle.
|
|
103
143
|
*
|
|
104
|
-
* @param nodeId - The id of the graph node requesting input; used to tag the
|
|
144
|
+
* @param nodeId - The id of the graph node requesting input; used to tag the
|
|
145
|
+
* emitted event for downstream filtering.
|
|
105
146
|
* @returns Resolved transcript string from the user's speech turn.
|
|
106
|
-
* @throws {Error} If called before `init()`.
|
|
147
|
+
* @throws {Error} If called before `init()` or after `dispose()`.
|
|
107
148
|
*/
|
|
108
149
|
async getNodeInput(nodeId) {
|
|
109
150
|
if (!this.initialized) {
|
|
110
151
|
throw new Error('VoiceTransportAdapter not initialized');
|
|
111
152
|
}
|
|
112
|
-
// In
|
|
113
|
-
//
|
|
153
|
+
// In the full implementation this would delegate to:
|
|
154
|
+
// this.pipeline.waitForUserTurn()
|
|
155
|
+
// For now, listen directly to transport events for the next turn.
|
|
114
156
|
return new Promise((resolve) => {
|
|
115
157
|
this.transport.once('turn_complete', (evt) => {
|
|
158
|
+
// Accept both `transcript` and `text` fields for compatibility
|
|
159
|
+
// with different transport implementations.
|
|
116
160
|
const transcript = evt?.transcript ?? evt?.text ?? '';
|
|
117
161
|
this.eventSink({
|
|
118
162
|
type: 'voice_turn_complete',
|
|
@@ -129,21 +173,25 @@ export class VoiceTransportAdapter {
|
|
|
129
173
|
* Deliver a node's text output to the TTS engine.
|
|
130
174
|
*
|
|
131
175
|
* Accepts either a plain `string` or an `AsyncIterable<string>` of token
|
|
132
|
-
* chunks (e.g. a streaming LLM response).
|
|
176
|
+
* chunks (e.g. a streaming LLM response). In a full production implementation
|
|
133
177
|
* this delegates to `VoicePipelineOrchestrator.pushToTTS(output)`.
|
|
134
178
|
*
|
|
135
179
|
* Emits a {@link GraphEvent} of type `voice_audio` (direction `'outbound'`)
|
|
136
180
|
* so that the runtime event bus records the TTS delivery.
|
|
137
181
|
*
|
|
138
|
-
* @param nodeId - The id of the graph node delivering the output
|
|
139
|
-
*
|
|
140
|
-
* @
|
|
182
|
+
* @param nodeId - The id of the graph node delivering the output; tags the
|
|
183
|
+
* emitted event for downstream filtering.
|
|
184
|
+
* @param _output - Text or async token stream to synthesise as speech.
|
|
185
|
+
* The underscore prefix indicates it is not yet consumed
|
|
186
|
+
* in the v1 stub implementation.
|
|
187
|
+
* @throws {Error} If called before `init()` or after `dispose()`.
|
|
141
188
|
*/
|
|
142
189
|
async deliverNodeOutput(nodeId, _output) {
|
|
143
190
|
if (!this.initialized) {
|
|
144
191
|
throw new Error('VoiceTransportAdapter not initialized');
|
|
145
192
|
}
|
|
146
|
-
// In
|
|
193
|
+
// In the full implementation this would delegate to:
|
|
194
|
+
// this.pipeline.pushToTTS(output)
|
|
147
195
|
// For now, emit the event to signal delivery.
|
|
148
196
|
this.eventSink({
|
|
149
197
|
type: 'voice_audio',
|
|
@@ -160,8 +208,11 @@ export class VoiceTransportAdapter {
|
|
|
160
208
|
* Handle a user barge-in at the transport level.
|
|
161
209
|
*
|
|
162
210
|
* Should be called by the runtime or transport layer when the user starts
|
|
163
|
-
* speaking while the agent is mid-utterance.
|
|
164
|
-
* so that graph event consumers can react (e.g. cancel pending tool calls
|
|
211
|
+
* speaking while the agent is mid-utterance. Emits a `voice_barge_in` event
|
|
212
|
+
* so that graph event consumers can react (e.g. cancel pending tool calls,
|
|
213
|
+
* stop TTS playback, or reroute the graph).
|
|
214
|
+
*
|
|
215
|
+
* @see {@link VoiceInterruptError} -- the structured error used inside the graph executor.
|
|
165
216
|
*/
|
|
166
217
|
handleBargeIn() {
|
|
167
218
|
this.eventSink({
|
|
@@ -179,6 +230,9 @@ export class VoiceTransportAdapter {
|
|
|
179
230
|
*
|
|
180
231
|
* Marks the adapter as uninitialised so subsequent calls to `getNodeInput()`
|
|
181
232
|
* or `deliverNodeOutput()` will throw, preventing accidental use after teardown.
|
|
233
|
+
*
|
|
234
|
+
* This method is idempotent -- calling it multiple times simply re-emits the
|
|
235
|
+
* ended event and re-sets the initialised flag.
|
|
182
236
|
*/
|
|
183
237
|
async dispose() {
|
|
184
238
|
this.eventSink({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"VoiceTransportAdapter.js","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTransportAdapter.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"VoiceTransportAdapter.js","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTransportAdapter.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA4CG;AA8DH,8EAA8E;AAC9E,wBAAwB;AACxB,8EAA8E;AAE9E;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,MAAM,OAAO,qBAAqB;IAchC;;;;;;;;;;OAUG;IACH,YACmB,MAA4B,EAC5B,SAAc,EAAE,mBAAmB;IACnC,SAAsC;QAFtC,WAAM,GAAN,MAAM,CAAsB;QAC5B,cAAS,GAAT,SAAS,CAAK;QACd,cAAS,GAAT,SAAS,CAA6B;QA3BzD;;;;WAIG;QACK,aAAQ,GAAe,IAAI,CAAC,CAAC,mCAAmC;QAExE;;;WAGG;QACK,gBAAW,GAAG,KAAK,CAAC;IAiBzB,CAAC;IAEJ,4EAA4E;IAC5E,YAAY;IACZ,4EAA4E;IAE5E;;;;;;;;;;;;;;OAcG;IACH,KAAK,CAAC,IAAI,CAAC,KAA0B;;QACnC,qEAAqE;QACrE,yCAAyC;QACzC,MAAM,OAAO,GAAG,OAAE,KAAa,EAAC,OAAO,QAAP,OAAO,GAAK,EAAE,EAAC,CAAC;QAChD,OAAO,CAAC,cAAc,GAAG,IAAI,CAAC,SAAS,CAAC;QACxC,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;QAExB,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,eAAe;YACrB,MAAM,EAAE,eAAe;YACvB,MAAM,EAAE,SAAS;SAClB,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;OAeG;IACH,KAAK,CAAC,YAAY,CAAC,MAAc;QAC/B,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,qDAAqD;QACrD,oCAAoC;QACpC,kEAAkE;QAClE,OAAO,IAAI,OAAO,CAAS,CAAC,OAAO,EAAE,EAAE;YACrC,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,eAAe,EAAE,CAAC,GAAQ,EAAE,EAAE;gBAChD,+DAA+D;gBAC/D,4CAA4C;gBAC5C,MAAM,UAAU,GAAW,GAAG,EAAE,UAAU,IAAI,GAAG,EAAE,IAAI,IAAI,EAAE,CAAC;gBAE9D,IAAI,CAAC,SAAS,CAAC;oBACb,IAAI,EAAE,qBAAqB;oBAC3B,MAAM;oBACN,UAAU;oBACV,SAAS,EAAE,CAAC;oBACZ,cAAc,EAAE,GAAG,EAAE,MAAM,IAAI,SAAS;iBACzC,CAAC,CAAC;gBAEH,OAAO,CAAC,UAAU,CAAC,CAAC;YACtB,CAAC,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,iBAAiB,CAAC,MAAc,EAAE,OAAuC;QAC7E,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,CAAC;YACtB,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;QAC3D,CAAC;QAED,qDAAqD;QACrD,oCAAoC;QACpC,8CAA8C;QAC9C,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,aAAa;YACnB,MAAM;YACN,SAAS,EAAE,UAAU;YACrB,MAAM,EAAE,KAAK;YACb,UAAU,EAAE,CAAC;SACd,CAAC,CAAC;IACL,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;;OASG;IACH,aAAa;QACX,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,gBAAgB;YACtB,MAAM,EAAE,eAAe;YACvB,eAAe,EAAE,EAAE;YACnB,UAAU,EAAE,EAAE;SACf,CAAC,CAAC;IACL,CAAC;IAED,4EAA4E;IAC5E,WAAW;IACX,4EAA4E;IAE5E;;;;;;;;OAQG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,CAAC,SAAS,CAAC;YACb,IAAI,EAAE,eAAe;YACrB,MAAM,EAAE,eAAe;YACvB,MAAM,EAAE,OAAO;YACf,UAAU,EAAE,oBAAoB;SACjC,CAAC,CAAC;QACH,IAAI,CAAC,WAAW,GAAG,KAAK,CAAC;IAC3B,CAAC;CACF"}
|
|
@@ -3,39 +3,61 @@
|
|
|
3
3
|
* @description Subscribes to voice pipeline session events and maintains a
|
|
4
4
|
* running transcript buffer, turn counter, and last-speaker tracker.
|
|
5
5
|
*
|
|
6
|
+
* ## Event bridging strategy
|
|
7
|
+
*
|
|
6
8
|
* The collector bridges the raw EventEmitter-based voice pipeline session into
|
|
7
9
|
* the typed `GraphEvent` stream consumed by the graph runtime. Four session
|
|
8
10
|
* events are handled:
|
|
9
11
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
*
|
|
17
|
-
*
|
|
12
|
+
* | Session event | GraphEvent emitted | Buffered? | Why |
|
|
13
|
+
* |----------------------|--------------------------|-----------|--------------------------------------------------------|
|
|
14
|
+
* | `interim_transcript` | `voice_transcript` | No | Partials are noisy and would duplicate final entries. |
|
|
15
|
+
* | `final_transcript` | `voice_transcript` | Yes | Confirmed utterances form the canonical transcript. |
|
|
16
|
+
* | `turn_complete` | `voice_turn_complete` | N/A | Marks endpoint detection; advances the turn counter. |
|
|
17
|
+
* | `barge_in` | `voice_barge_in` | N/A | Signals user interruption for downstream handlers. |
|
|
18
|
+
*
|
|
19
|
+
* ## Checkpoint restore
|
|
18
20
|
*
|
|
19
21
|
* The `initialTurnCount` constructor parameter enables checkpoint restore:
|
|
20
22
|
* pass the previously persisted count so that `turnIndex` values continue
|
|
21
|
-
* from where the session left off rather than resetting to zero.
|
|
23
|
+
* from where the session left off rather than resetting to zero. This is
|
|
24
|
+
* critical for `maxTurns` enforcement across graph suspensions.
|
|
25
|
+
*
|
|
26
|
+
* @see {@link VoiceNodeExecutor} -- creates and owns the collector during voice node execution.
|
|
27
|
+
* @see {@link VoiceNodeCheckpoint} -- persists `turnIndex` and `transcript` across suspensions.
|
|
22
28
|
*/
|
|
23
29
|
import { EventEmitter } from 'events';
|
|
24
30
|
import type { GraphEvent } from '../events/GraphEvent.js';
|
|
25
31
|
/**
|
|
26
32
|
* A single confirmed (final) utterance captured from the voice pipeline.
|
|
27
33
|
*
|
|
28
|
-
* Only `final_transcript` events populate
|
|
29
|
-
* discarded to keep the transcript clean and avoid duplicate
|
|
34
|
+
* Only `final_transcript` events populate the transcript buffer -- interim
|
|
35
|
+
* partials are discarded to keep the transcript clean and avoid duplicate
|
|
36
|
+
* entries that would corrupt downstream summarisation.
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```ts
|
|
40
|
+
* const entry: TranscriptEntry = {
|
|
41
|
+
* speaker: 'Speaker_0',
|
|
42
|
+
* text: 'Hello, how can I help you?',
|
|
43
|
+
* timestamp: Date.now(),
|
|
44
|
+
* };
|
|
45
|
+
* ```
|
|
30
46
|
*/
|
|
31
47
|
export interface TranscriptEntry {
|
|
32
|
-
/**
|
|
48
|
+
/**
|
|
49
|
+
* Speaker identifier as reported by the STT service (e.g. `"Speaker_0"`).
|
|
50
|
+
* Defaults to `"user"` when the STT service does not provide diarization labels.
|
|
51
|
+
*/
|
|
33
52
|
speaker: string;
|
|
34
53
|
/** Recognised text for this utterance. */
|
|
35
54
|
text: string;
|
|
36
55
|
/**
|
|
37
56
|
* Wall-clock timestamp (milliseconds since Unix epoch) recorded at the
|
|
38
|
-
* moment the `final_transcript` event was processed.
|
|
57
|
+
* moment the `final_transcript` event was processed by the collector.
|
|
58
|
+
*
|
|
59
|
+
* This is the collector's receive time, not the STT service's recognition
|
|
60
|
+
* time, so it includes any event loop latency between STT and the collector.
|
|
39
61
|
*/
|
|
40
62
|
timestamp: number;
|
|
41
63
|
}
|
|
@@ -43,6 +65,11 @@ export interface TranscriptEntry {
|
|
|
43
65
|
* Stateful collector that subscribes to a voice pipeline session and routes
|
|
44
66
|
* session events into the AgentOS `GraphEvent` stream.
|
|
45
67
|
*
|
|
68
|
+
* The collector is designed to be short-lived -- created at the start of a
|
|
69
|
+
* voice node execution and discarded when the node completes. Its state
|
|
70
|
+
* (transcript, turn count, last speaker) is captured into a
|
|
71
|
+
* {@link VoiceNodeCheckpoint} by the executor before disposal.
|
|
72
|
+
*
|
|
46
73
|
* @example
|
|
47
74
|
* ```ts
|
|
48
75
|
* const collector = new VoiceTurnCollector(
|
|
@@ -56,39 +83,65 @@ export interface TranscriptEntry {
|
|
|
56
83
|
* console.log(collector.getTranscript()); // full buffered transcript
|
|
57
84
|
* console.log(collector.getLastSpeaker()); // last identified speaker
|
|
58
85
|
* ```
|
|
86
|
+
*
|
|
87
|
+
* @see {@link TranscriptEntry} -- shape of each buffered transcript entry.
|
|
88
|
+
* @see {@link VoiceNodeExecutor} -- the executor that creates and queries the collector.
|
|
59
89
|
*/
|
|
60
90
|
export declare class VoiceTurnCollector {
|
|
61
91
|
private readonly eventSink;
|
|
62
92
|
private readonly nodeId;
|
|
63
|
-
/** Buffered confirmed utterances in chronological order. */
|
|
93
|
+
/** Buffered confirmed utterances in chronological order. Append-only. */
|
|
64
94
|
private transcript;
|
|
65
95
|
/** Running count of completed turns (endpoint-detected). */
|
|
66
96
|
private turnCount;
|
|
67
|
-
/**
|
|
97
|
+
/**
|
|
98
|
+
* Speaker identifier from the most recent `final_transcript` event.
|
|
99
|
+
* Empty string until the first final transcript arrives.
|
|
100
|
+
*/
|
|
68
101
|
private lastSpeaker;
|
|
69
102
|
/**
|
|
103
|
+
* Creates a new VoiceTurnCollector and immediately subscribes to session events.
|
|
104
|
+
*
|
|
105
|
+
* Subscription is performed in the constructor (rather than a separate `init()`
|
|
106
|
+
* method) because the collector has no meaningful state before subscription and
|
|
107
|
+
* there is no cleanup/unsubscribe lifecycle -- the session EventEmitter is
|
|
108
|
+
* short-lived and garbage-collected with the collector.
|
|
109
|
+
*
|
|
70
110
|
* @param session - The voice pipeline `EventEmitter` to subscribe to.
|
|
71
|
-
*
|
|
72
|
-
*
|
|
73
|
-
* @param
|
|
74
|
-
*
|
|
111
|
+
* Must emit `interim_transcript`, `final_transcript`,
|
|
112
|
+
* `turn_complete`, and `barge_in` events.
|
|
113
|
+
* @param eventSink - Callback invoked synchronously for every emitted
|
|
114
|
+
* `GraphEvent`. Must not throw -- exceptions would
|
|
115
|
+
* propagate into the session event loop.
|
|
116
|
+
* @param nodeId - Identifies the owning graph node in every emitted
|
|
117
|
+
* event, enabling consumers to filter events by node.
|
|
118
|
+
* @param initialTurnCount - Seed value for `turnCount`; pass a persisted value
|
|
119
|
+
* to resume from a checkpoint rather than starting at
|
|
120
|
+
* zero. Defaults to `0`.
|
|
75
121
|
*/
|
|
76
122
|
constructor(session: EventEmitter, eventSink: (event: GraphEvent) => void, nodeId: string, initialTurnCount?: number);
|
|
77
123
|
/**
|
|
78
124
|
* Returns the total number of completed turns since construction (or since the
|
|
79
125
|
* provided `initialTurnCount` when restoring from a checkpoint).
|
|
126
|
+
*
|
|
127
|
+
* @returns The current turn count. Always >= `initialTurnCount`.
|
|
80
128
|
*/
|
|
81
129
|
getTurnCount(): number;
|
|
82
130
|
/**
|
|
83
131
|
* Returns a shallow copy of the buffered transcript entries.
|
|
84
132
|
*
|
|
85
133
|
* A copy is returned to prevent external callers from mutating the internal
|
|
86
|
-
* buffer
|
|
134
|
+
* buffer -- entries are append-only and must remain in chronological order
|
|
135
|
+
* for correct checkpoint persistence.
|
|
136
|
+
*
|
|
137
|
+
* @returns A new array containing all confirmed transcript entries in order.
|
|
87
138
|
*/
|
|
88
139
|
getTranscript(): TranscriptEntry[];
|
|
89
140
|
/**
|
|
90
141
|
* Returns the speaker identifier from the most recent `final_transcript` event,
|
|
91
142
|
* or an empty string if no final transcript has been received yet.
|
|
143
|
+
*
|
|
144
|
+
* @returns The last speaker label, or `''` if none.
|
|
92
145
|
*/
|
|
93
146
|
getLastSpeaker(): string;
|
|
94
147
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"VoiceTurnCollector.d.ts","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTurnCollector.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"VoiceTurnCollector.d.ts","sourceRoot":"","sources":["../../../src/orchestration/runtime/VoiceTurnCollector.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;GA2BG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,QAAQ,CAAC;AACtC,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAM1D;;;;;;;;;;;;;;;GAeG;AACH,MAAM,WAAW,eAAe;IAC9B;;;OAGG;IACH,OAAO,EAAE,MAAM,CAAC;IAEhB,0CAA0C;IAC1C,IAAI,EAAE,MAAM,CAAC;IAEb;;;;;;OAMG;IACH,SAAS,EAAE,MAAM,CAAC;CACnB;AAMD;;;;;;;;;;;;;;;;;;;;;;;;;GAyBG;AACH,qBAAa,kBAAkB;IAmC3B,OAAO,CAAC,QAAQ,CAAC,SAAS;IAC1B,OAAO,CAAC,QAAQ,CAAC,MAAM;IAnCzB,yEAAyE;IACzE,OAAO,CAAC,UAAU,CAAyB;IAE3C,4DAA4D;IAC5D,OAAO,CAAC,SAAS,CAAS;IAE1B;;;OAGG;IACH,OAAO,CAAC,WAAW,CAAM;IAEzB;;;;;;;;;;;;;;;;;;;OAmBG;gBAED,OAAO,EAAE,YAAY,EACJ,SAAS,EAAE,CAAC,KAAK,EAAE,UAAU,KAAK,IAAI,EACtC,MAAM,EAAE,MAAM,EAC/B,gBAAgB,SAAI;IAqGtB;;;;;OAKG;IACH,YAAY,IAAI,MAAM;IAItB;;;;;;;;OAQG;IACH,aAAa,IAAI,eAAe,EAAE;IAIlC;;;;;OAKG;IACH,cAAc,IAAI,MAAM;CAGzB"}
|