@alexkroman1/aai 1.4.5 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +9 -9
- package/CHANGELOG.md +13 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1432 -1208
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +13 -46
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/transports/pipeline-transport.test.ts +651 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
|
@@ -0,0 +1,532 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
// Pipeline transport — STT → LLM → TTS orchestration behind the Transport interface.
|
|
3
|
+
|
|
4
|
+
// Pipeline mode executes tools inline via streamText's `tools.execute`.
|
|
5
|
+
// `callbacks.onToolCall` is observability-only; runtime.ts routes it to
|
|
6
|
+
// `client.toolCall` directly (bypassing SessionCore's tool-dispatch path,
|
|
7
|
+
// which is S2S-only). `sendToolResult` is a no-op because results are
|
|
8
|
+
// already handled by streamText.
|
|
9
|
+
//
|
|
10
|
+
// `conversationMessages` below is transport-local and currently uncapped —
|
|
11
|
+
// SessionCore's `maxHistory` does not yet feed through. Long pipeline
|
|
12
|
+
// sessions may accumulate unbounded context; revisit if it matters.
|
|
13
|
+
|
|
14
|
+
import type { LanguageModel, ModelMessage } from "ai";
|
|
15
|
+
import { stepCountIs, streamText } from "ai";
|
|
16
|
+
import type { ExecuteTool, ToolSchema } from "../../sdk/_internal-types.ts";
|
|
17
|
+
import {
|
|
18
|
+
DEFAULT_MAX_HISTORY,
|
|
19
|
+
DEFAULT_STT_SAMPLE_RATE,
|
|
20
|
+
DEFAULT_TTS_SAMPLE_RATE,
|
|
21
|
+
PIPELINE_FLUSH_TIMEOUT_MS,
|
|
22
|
+
} from "../../sdk/constants.ts";
|
|
23
|
+
import type { SessionErrorCode } from "../../sdk/protocol.ts";
|
|
24
|
+
import type {
|
|
25
|
+
SttError,
|
|
26
|
+
SttOpener,
|
|
27
|
+
SttSession,
|
|
28
|
+
TtsError,
|
|
29
|
+
TtsOpener,
|
|
30
|
+
TtsSession,
|
|
31
|
+
Unsubscribe,
|
|
32
|
+
} from "../../sdk/providers.ts";
|
|
33
|
+
import type { Message, ToolChoice } from "../../sdk/types.ts";
|
|
34
|
+
import { errorMessage } from "../../sdk/utils.ts";
|
|
35
|
+
import { consoleLogger, type Logger } from "../runtime-config.ts";
|
|
36
|
+
import { toVercelTools } from "../to-vercel-tools.ts";
|
|
37
|
+
import type { Transport, TransportCallbacks, TransportSessionConfig } from "./types.ts";
|
|
38
|
+
|
|
39
|
+
/** Configuration for {@link createPipelineTransport}. */
|
|
40
|
+
export interface PipelineTransportOptions {
|
|
41
|
+
/** Unique session identifier. */
|
|
42
|
+
sid: string;
|
|
43
|
+
/** Agent slug. */
|
|
44
|
+
agent: string;
|
|
45
|
+
/** STT opener (resolved from an SttProvider descriptor). */
|
|
46
|
+
stt: SttOpener;
|
|
47
|
+
/** LLM provider (Vercel AI SDK LanguageModel). */
|
|
48
|
+
llm: LanguageModel;
|
|
49
|
+
/** TTS opener (resolved from a TtsProvider descriptor). */
|
|
50
|
+
tts: TtsOpener;
|
|
51
|
+
/** Transport-level callbacks into SessionCore. */
|
|
52
|
+
callbacks: TransportCallbacks;
|
|
53
|
+
/** Session config: systemPrompt, greeting, tools, history. */
|
|
54
|
+
sessionConfig: TransportSessionConfig;
|
|
55
|
+
/** Tool schemas (JSON Schema) for Vercel AI tool binding. */
|
|
56
|
+
toolSchemas?: readonly ToolSchema[];
|
|
57
|
+
/** Agent's tool-execution function. */
|
|
58
|
+
executeTool?: ExecuteTool;
|
|
59
|
+
/** Provider-specific API keys. */
|
|
60
|
+
providerKeys: {
|
|
61
|
+
stt: string;
|
|
62
|
+
tts: string;
|
|
63
|
+
};
|
|
64
|
+
/** STT audio input sample rate (PCM16, Hz). Defaults to DEFAULT_STT_SAMPLE_RATE. */
|
|
65
|
+
sttSampleRate?: number | undefined;
|
|
66
|
+
/** TTS audio output sample rate (PCM16, Hz). Defaults to DEFAULT_TTS_SAMPLE_RATE. */
|
|
67
|
+
ttsSampleRate?: number | undefined;
|
|
68
|
+
/** Optional STT prompt injected via SttOpenOptions.sttPrompt. */
|
|
69
|
+
sttPrompt?: string | undefined;
|
|
70
|
+
/** Max LLM tool-call steps per turn. Defaults to 5. */
|
|
71
|
+
maxSteps?: number | undefined;
|
|
72
|
+
/** Tool selection policy passed to `streamText`. Defaults to `"auto"`. */
|
|
73
|
+
toolChoice?: ToolChoice | undefined;
|
|
74
|
+
/** Logger. Defaults to consoleLogger. */
|
|
75
|
+
logger?: Logger | undefined;
|
|
76
|
+
/** Skip the initial greeting (used for session resume). */
|
|
77
|
+
skipGreeting?: boolean | undefined;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function toModelMessage(m: Message): ModelMessage {
|
|
81
|
+
if (m.role === "user") return { role: "user", content: m.content };
|
|
82
|
+
return { role: "assistant", content: m.content };
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
/** Create a pipeline-mode Transport (STT → LLM → TTS). */
|
|
86
|
+
export function createPipelineTransport(opts: PipelineTransportOptions): Transport {
|
|
87
|
+
const log = opts.logger ?? consoleLogger;
|
|
88
|
+
const sttSampleRate = opts.sttSampleRate ?? DEFAULT_STT_SAMPLE_RATE;
|
|
89
|
+
const ttsSampleRate = opts.ttsSampleRate ?? DEFAULT_TTS_SAMPLE_RATE;
|
|
90
|
+
const maxSteps = opts.maxSteps ?? 5;
|
|
91
|
+
const toolChoice = opts.toolChoice ?? "auto";
|
|
92
|
+
const toolSchemas = opts.toolSchemas ?? [];
|
|
93
|
+
const executeTool: ExecuteTool =
|
|
94
|
+
opts.executeTool ??
|
|
95
|
+
(async () => {
|
|
96
|
+
throw new Error("No executeTool provided");
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
const { callbacks, sessionConfig } = opts;
|
|
100
|
+
|
|
101
|
+
// Derive the system prompt — pipeline mode always uses voice=true.
|
|
102
|
+
// In the refactored transport, we receive the final systemPrompt directly
|
|
103
|
+
// from sessionConfig (built by the caller). We use it as-is but also keep
|
|
104
|
+
// the hasTools logic available if the caller passes raw schemas.
|
|
105
|
+
const systemPrompt = sessionConfig.systemPrompt;
|
|
106
|
+
|
|
107
|
+
// ---- State ----------------------------------------------------------------
|
|
108
|
+
const sessionAbort = new AbortController();
|
|
109
|
+
let audioReady = false;
|
|
110
|
+
let terminated = false;
|
|
111
|
+
let sttSession: SttSession | null = null;
|
|
112
|
+
let ttsSession: TtsSession | null = null;
|
|
113
|
+
let turnController: AbortController | null = null;
|
|
114
|
+
let nextReplyId = 0;
|
|
115
|
+
// Conversation history — seeded from sessionConfig.history if provided.
|
|
116
|
+
// Pipeline transport manages its own history since SessionCore doesn't own
|
|
117
|
+
// the conversation in pipeline mode (history is needed to build the LLM
|
|
118
|
+
// messages array for each turn).
|
|
119
|
+
const conversationMessages: Message[] = sessionConfig.history ? [...sessionConfig.history] : [];
|
|
120
|
+
let turnPromise: Promise<void> | null = null;
|
|
121
|
+
const sttSubs: Unsubscribe[] = [];
|
|
122
|
+
const ttsSubs: Unsubscribe[] = [];
|
|
123
|
+
|
|
124
|
+
// ---- History helpers ------------------------------------------------------
|
|
125
|
+
function pushMessages(...msgs: Message[]): void {
|
|
126
|
+
conversationMessages.push(...msgs);
|
|
127
|
+
if (conversationMessages.length > DEFAULT_MAX_HISTORY) {
|
|
128
|
+
conversationMessages.splice(0, conversationMessages.length - DEFAULT_MAX_HISTORY);
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function chainTurn(p: Promise<void>): void {
|
|
133
|
+
turnPromise = (turnPromise ?? Promise.resolve()).then(() => p);
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
// ---- Error helpers --------------------------------------------------------
|
|
137
|
+
function emitError(code: SessionErrorCode, message: string): void {
|
|
138
|
+
callbacks.onError(code, message);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
// ---- Termination ----------------------------------------------------------
|
|
142
|
+
/**
|
|
143
|
+
* Tear down after an unrecoverable provider error. Aborts the in-flight
|
|
144
|
+
* turn, cancels TTS, signals providers to close. Idempotent.
|
|
145
|
+
*/
|
|
146
|
+
function terminate(): void {
|
|
147
|
+
if (terminated) return;
|
|
148
|
+
terminated = true;
|
|
149
|
+
if (turnController !== null) {
|
|
150
|
+
turnController.abort();
|
|
151
|
+
turnController = null;
|
|
152
|
+
}
|
|
153
|
+
ttsSession?.cancel();
|
|
154
|
+
callbacks.onCancelled();
|
|
155
|
+
sessionAbort.abort();
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// ---- STT event handlers ---------------------------------------------------
|
|
159
|
+
function onSttPartial(_text: string): void {
|
|
160
|
+
if (terminated) return;
|
|
161
|
+
if (turnController === null) return;
|
|
162
|
+
log.info("Pipeline barge-in", { sid: opts.sid });
|
|
163
|
+
turnController.abort();
|
|
164
|
+
turnController = null;
|
|
165
|
+
ttsSession?.cancel();
|
|
166
|
+
callbacks.onCancelled();
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
function onSttFinal(text: string): void {
|
|
170
|
+
if (terminated) return;
|
|
171
|
+
const trimmed = text.trim();
|
|
172
|
+
if (trimmed.length === 0) return;
|
|
173
|
+
// Replace in-flight turn if one is running (duplicate/late STT final).
|
|
174
|
+
if (turnController !== null) {
|
|
175
|
+
log.info("Pipeline replacing in-flight turn", { sid: opts.sid });
|
|
176
|
+
turnController.abort();
|
|
177
|
+
turnController = null;
|
|
178
|
+
ttsSession?.cancel();
|
|
179
|
+
callbacks.onCancelled();
|
|
180
|
+
}
|
|
181
|
+
callbacks.onUserTranscript(text);
|
|
182
|
+
const turn = runTurn(trimmed).catch((err: unknown) => {
|
|
183
|
+
log.error("Pipeline turn crashed", { error: errorMessage(err), sid: opts.sid });
|
|
184
|
+
});
|
|
185
|
+
chainTurn(turn);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
function onSttError(err: SttError): void {
|
|
189
|
+
if (terminated) return;
|
|
190
|
+
log.error("STT error", { code: err.code, message: err.message, sid: opts.sid });
|
|
191
|
+
emitError("stt", err.message);
|
|
192
|
+
terminate();
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// ---- TTS event handlers ---------------------------------------------------
|
|
196
|
+
function onTtsError(err: TtsError): void {
|
|
197
|
+
if (terminated) return;
|
|
198
|
+
log.error("TTS error", { code: err.code, message: err.message, sid: opts.sid });
|
|
199
|
+
emitError("tts", err.message);
|
|
200
|
+
terminate();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// ---- LLM streaming --------------------------------------------------------
|
|
204
|
+
async function consumeLlmStream(
|
|
205
|
+
ctl: AbortController,
|
|
206
|
+
messages: ModelMessage[],
|
|
207
|
+
tools: ReturnType<typeof toVercelTools>,
|
|
208
|
+
onDelta: (delta: string) => void,
|
|
209
|
+
): Promise<void> {
|
|
210
|
+
try {
|
|
211
|
+
const result = streamText({
|
|
212
|
+
model: opts.llm,
|
|
213
|
+
system: systemPrompt,
|
|
214
|
+
messages,
|
|
215
|
+
tools,
|
|
216
|
+
toolChoice,
|
|
217
|
+
stopWhen: stepCountIs(maxSteps),
|
|
218
|
+
abortSignal: ctl.signal,
|
|
219
|
+
});
|
|
220
|
+
for await (const part of result.fullStream) {
|
|
221
|
+
if (ctl.signal.aborted) break;
|
|
222
|
+
handleStreamPart(part, ctl, onDelta);
|
|
223
|
+
}
|
|
224
|
+
} catch (err: unknown) {
|
|
225
|
+
if (!ctl.signal.aborted) {
|
|
226
|
+
const msg = errorMessage(err);
|
|
227
|
+
log.error("LLM streamText failed", { error: msg, sid: opts.sid });
|
|
228
|
+
emitError("llm", msg);
|
|
229
|
+
}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
function handleStreamPart(
|
|
234
|
+
part: {
|
|
235
|
+
readonly type: string;
|
|
236
|
+
readonly text?: string;
|
|
237
|
+
readonly input?: unknown;
|
|
238
|
+
readonly output?: unknown;
|
|
239
|
+
readonly toolCallId?: string;
|
|
240
|
+
readonly toolName?: string;
|
|
241
|
+
readonly error?: unknown;
|
|
242
|
+
},
|
|
243
|
+
_ctl: AbortController,
|
|
244
|
+
onDelta: (delta: string) => void,
|
|
245
|
+
): void {
|
|
246
|
+
switch (part.type) {
|
|
247
|
+
case "text-delta": {
|
|
248
|
+
const delta = part.text ?? "";
|
|
249
|
+
if (delta.length === 0) return;
|
|
250
|
+
onDelta(delta);
|
|
251
|
+
ttsSession?.sendText(delta);
|
|
252
|
+
return;
|
|
253
|
+
}
|
|
254
|
+
case "tool-call": {
|
|
255
|
+
// Option A: fire callbacks.onToolCall for observability only.
|
|
256
|
+
// Actual execution happens inline via toVercelTools.
|
|
257
|
+
const input = (part.input ?? {}) as Record<string, unknown>;
|
|
258
|
+
callbacks.onToolCall(part.toolCallId ?? "", part.toolName ?? "", input);
|
|
259
|
+
return;
|
|
260
|
+
}
|
|
261
|
+
case "error": {
|
|
262
|
+
const msg = errorMessage(part.error);
|
|
263
|
+
log.error("LLM stream error", { message: msg, sid: opts.sid });
|
|
264
|
+
emitError("llm", msg);
|
|
265
|
+
return;
|
|
266
|
+
}
|
|
267
|
+
default:
|
|
268
|
+
return;
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
// ---- TTS flush ------------------------------------------------------------
|
|
273
|
+
/**
|
|
274
|
+
* Flush TTS and wait for drain. Resolves on:
|
|
275
|
+
* - TTS emits `done`
|
|
276
|
+
* - `signal` aborts (barge-in / provider error / session stop)
|
|
277
|
+
* - PIPELINE_FLUSH_TIMEOUT_MS elapses
|
|
278
|
+
* Resolves immediately if no TTS session.
|
|
279
|
+
*/
|
|
280
|
+
function flushTtsAndWait(signal: AbortSignal): Promise<void> {
|
|
281
|
+
const tts = ttsSession;
|
|
282
|
+
if (!tts) return Promise.resolve();
|
|
283
|
+
return new Promise<void>((resolve) => {
|
|
284
|
+
let off: Unsubscribe | null = null;
|
|
285
|
+
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
286
|
+
const cleanup = () => {
|
|
287
|
+
if (off) {
|
|
288
|
+
off();
|
|
289
|
+
off = null;
|
|
290
|
+
}
|
|
291
|
+
if (timer) {
|
|
292
|
+
clearTimeout(timer);
|
|
293
|
+
timer = null;
|
|
294
|
+
}
|
|
295
|
+
signal.removeEventListener("abort", onAbort);
|
|
296
|
+
};
|
|
297
|
+
const finish = () => {
|
|
298
|
+
cleanup();
|
|
299
|
+
resolve();
|
|
300
|
+
};
|
|
301
|
+
const onAbort = () => finish();
|
|
302
|
+
if (signal.aborted) {
|
|
303
|
+
resolve();
|
|
304
|
+
return;
|
|
305
|
+
}
|
|
306
|
+
signal.addEventListener("abort", onAbort, { once: true });
|
|
307
|
+
off = tts.on("done", finish);
|
|
308
|
+
timer = setTimeout(() => {
|
|
309
|
+
log.warn("TTS flush timeout", {
|
|
310
|
+
sid: opts.sid,
|
|
311
|
+
timeoutMs: PIPELINE_FLUSH_TIMEOUT_MS,
|
|
312
|
+
});
|
|
313
|
+
finish();
|
|
314
|
+
}, PIPELINE_FLUSH_TIMEOUT_MS);
|
|
315
|
+
tts.flush();
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
// ---- Turn orchestration ---------------------------------------------------
|
|
320
|
+
async function runTurn(userText: string): Promise<void> {
|
|
321
|
+
const replyId = `pipeline-${++nextReplyId}`;
|
|
322
|
+
callbacks.onReplyStarted(replyId);
|
|
323
|
+
pushMessages({ role: "user", content: userText });
|
|
324
|
+
|
|
325
|
+
const ctl = new AbortController();
|
|
326
|
+
turnController = ctl;
|
|
327
|
+
|
|
328
|
+
const tools = toVercelTools(toolSchemas, {
|
|
329
|
+
executeTool,
|
|
330
|
+
sessionId: opts.sid,
|
|
331
|
+
messages: () => conversationMessages,
|
|
332
|
+
signal: ctl.signal,
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
const messages: ModelMessage[] = conversationMessages.map(toModelMessage);
|
|
336
|
+
let accumulated = "";
|
|
337
|
+
await consumeLlmStream(ctl, messages, tools, (delta) => {
|
|
338
|
+
accumulated += delta;
|
|
339
|
+
});
|
|
340
|
+
|
|
341
|
+
if (ctl.signal.aborted) {
|
|
342
|
+
if (turnController === ctl) turnController = null;
|
|
343
|
+
return;
|
|
344
|
+
}
|
|
345
|
+
|
|
346
|
+
// Emit the complete transcript once the LLM finishes streaming.
|
|
347
|
+
if (accumulated.length > 0) {
|
|
348
|
+
callbacks.onAgentTranscript(accumulated, false);
|
|
349
|
+
pushMessages({ role: "assistant", content: accumulated });
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
await flushTtsAndWait(ctl.signal);
|
|
353
|
+
|
|
354
|
+
if (ctl.signal.aborted) {
|
|
355
|
+
if (turnController === ctl) turnController = null;
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
// Do NOT call callbacks.onAudioDone() here — session-core's flushReply
|
|
360
|
+
// (triggered by onReplyDone) emits audioDone + replyDone together, matching
|
|
361
|
+
// the S2S transport contract. Calling it here would double-fire audio_done.
|
|
362
|
+
callbacks.onReplyDone();
|
|
363
|
+
if (turnController === ctl) turnController = null;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
async function runGreeting(text: string): Promise<void> {
|
|
367
|
+
const replyId = `pipeline-greeting-${++nextReplyId}`;
|
|
368
|
+
callbacks.onReplyStarted(replyId);
|
|
369
|
+
|
|
370
|
+
const ctl = new AbortController();
|
|
371
|
+
turnController = ctl;
|
|
372
|
+
|
|
373
|
+
callbacks.onAgentTranscript(text, false);
|
|
374
|
+
pushMessages({ role: "assistant", content: text });
|
|
375
|
+
ttsSession?.sendText(text);
|
|
376
|
+
|
|
377
|
+
await flushTtsAndWait(ctl.signal);
|
|
378
|
+
|
|
379
|
+
if (ctl.signal.aborted) {
|
|
380
|
+
if (turnController === ctl) turnController = null;
|
|
381
|
+
return;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Do NOT call callbacks.onAudioDone() here — session-core's flushReply
|
|
385
|
+
// (triggered by onReplyDone) emits audioDone + replyDone together, matching
|
|
386
|
+
// the S2S transport contract. Calling it here would double-fire audio_done.
|
|
387
|
+
callbacks.onReplyDone();
|
|
388
|
+
if (turnController === ctl) turnController = null;
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
// ---- Provider lifecycle ---------------------------------------------------
|
|
392
|
+
function reportOpenRejection(which: "stt" | "tts", reason: unknown): void {
|
|
393
|
+
const msg = errorMessage(reason);
|
|
394
|
+
log.error(`${which === "stt" ? "STT" : "TTS"} open failed`, {
|
|
395
|
+
error: msg,
|
|
396
|
+
sid: opts.sid,
|
|
397
|
+
});
|
|
398
|
+
emitError(which, msg);
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
async function adoptStt(session: SttSession, teardown: boolean): Promise<void> {
|
|
402
|
+
if (teardown) {
|
|
403
|
+
await session.close().catch(() => undefined);
|
|
404
|
+
return;
|
|
405
|
+
}
|
|
406
|
+
sttSession = session;
|
|
407
|
+
sttSubs.push(session.on("partial", onSttPartial));
|
|
408
|
+
sttSubs.push(session.on("final", onSttFinal));
|
|
409
|
+
sttSubs.push(session.on("error", onSttError));
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
async function adoptTts(session: TtsSession, teardown: boolean): Promise<void> {
|
|
413
|
+
if (teardown) {
|
|
414
|
+
await session.close().catch(() => undefined);
|
|
415
|
+
return;
|
|
416
|
+
}
|
|
417
|
+
ttsSession = session;
|
|
418
|
+
ttsSubs.push(
|
|
419
|
+
session.on("audio", (pcm) => {
|
|
420
|
+
callbacks.onAudioChunk(new Uint8Array(pcm.buffer, pcm.byteOffset, pcm.byteLength));
|
|
421
|
+
}),
|
|
422
|
+
);
|
|
423
|
+
// Note: `done` is NOT subscribed here. flushTtsAndWait() attaches a
|
|
424
|
+
// one-shot listener per-turn so it knows when synthesis drains. Calling
|
|
425
|
+
// callbacks.onAudioDone() is done explicitly at the end of runTurn /
|
|
426
|
+
// runGreeting — not via a persistent subscription — to avoid double-firing.
|
|
427
|
+
ttsSubs.push(session.on("error", onTtsError));
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
async function openProviders(): Promise<void> {
|
|
431
|
+
const [sttResult, ttsResult] = await Promise.allSettled([
|
|
432
|
+
opts.stt.open({
|
|
433
|
+
sampleRate: sttSampleRate,
|
|
434
|
+
apiKey: opts.providerKeys.stt,
|
|
435
|
+
sttPrompt: opts.sttPrompt,
|
|
436
|
+
signal: sessionAbort.signal,
|
|
437
|
+
}),
|
|
438
|
+
opts.tts.open({
|
|
439
|
+
sampleRate: ttsSampleRate,
|
|
440
|
+
apiKey: opts.providerKeys.tts,
|
|
441
|
+
signal: sessionAbort.signal,
|
|
442
|
+
}),
|
|
443
|
+
]);
|
|
444
|
+
|
|
445
|
+
if (sttResult.status === "rejected") reportOpenRejection("stt", sttResult.reason);
|
|
446
|
+
if (ttsResult.status === "rejected") reportOpenRejection("tts", ttsResult.reason);
|
|
447
|
+
|
|
448
|
+
const aborted = sessionAbort.signal.aborted;
|
|
449
|
+
const sttFailed = sttResult.status === "rejected";
|
|
450
|
+
const ttsFailed = ttsResult.status === "rejected";
|
|
451
|
+
const teardown = aborted || sttFailed || ttsFailed;
|
|
452
|
+
|
|
453
|
+
if (sttResult.status === "fulfilled") await adoptStt(sttResult.value, teardown);
|
|
454
|
+
if (ttsResult.status === "fulfilled") await adoptTts(ttsResult.value, teardown);
|
|
455
|
+
|
|
456
|
+
if (!aborted && (sttFailed || ttsFailed)) terminate();
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// ---- Greeting on audio ready ----------------------------------------------
|
|
460
|
+
function onAudioReady(): void {
|
|
461
|
+
if (audioReady || terminated) return;
|
|
462
|
+
audioReady = true;
|
|
463
|
+
if (opts.skipGreeting) return;
|
|
464
|
+
const greeting = sessionConfig.greeting;
|
|
465
|
+
if (!greeting) return;
|
|
466
|
+
const turn = runGreeting(greeting).catch((err: unknown) => {
|
|
467
|
+
log.error("Pipeline greeting failed", { error: errorMessage(err), sid: opts.sid });
|
|
468
|
+
});
|
|
469
|
+
chainTurn(turn);
|
|
470
|
+
}
|
|
471
|
+
|
|
472
|
+
// ---- Transport interface --------------------------------------------------
|
|
473
|
+
return {
|
|
474
|
+
async start(): Promise<void> {
|
|
475
|
+
await openProviders();
|
|
476
|
+
// In S2S mode, onSessionReady fires when the provider acknowledges the
|
|
477
|
+
// session. In pipeline mode, we fire it immediately after providers open
|
|
478
|
+
// (which is the equivalent "ready" signal), then trigger greeting.
|
|
479
|
+
callbacks.onSessionReady?.(opts.sid);
|
|
480
|
+
onAudioReady();
|
|
481
|
+
},
|
|
482
|
+
|
|
483
|
+
async stop(): Promise<void> {
|
|
484
|
+
if (sessionAbort.signal.aborted) return;
|
|
485
|
+
sessionAbort.abort();
|
|
486
|
+
turnController?.abort();
|
|
487
|
+
for (const off of sttSubs) off();
|
|
488
|
+
for (const off of ttsSubs) off();
|
|
489
|
+
sttSubs.length = 0;
|
|
490
|
+
ttsSubs.length = 0;
|
|
491
|
+
if (turnPromise !== null) await turnPromise;
|
|
492
|
+
await sttSession?.close().catch(() => {
|
|
493
|
+
/* already closed */
|
|
494
|
+
});
|
|
495
|
+
await ttsSession?.close().catch(() => {
|
|
496
|
+
/* already closed */
|
|
497
|
+
});
|
|
498
|
+
},
|
|
499
|
+
|
|
500
|
+
sendUserAudio(bytes: Uint8Array): void {
|
|
501
|
+
if (terminated || !audioReady) return;
|
|
502
|
+
const offset = bytes.byteOffset;
|
|
503
|
+
const length = bytes.byteLength;
|
|
504
|
+
let pcm: Int16Array;
|
|
505
|
+
if (offset % 2 === 0 && length % 2 === 0) {
|
|
506
|
+
pcm = new Int16Array(bytes.buffer, offset, length / 2);
|
|
507
|
+
} else {
|
|
508
|
+
const copy = new Uint8Array(length - (length % 2));
|
|
509
|
+
copy.set(bytes.subarray(0, copy.byteLength));
|
|
510
|
+
pcm = new Int16Array(copy.buffer);
|
|
511
|
+
}
|
|
512
|
+
sttSession?.sendAudio(pcm);
|
|
513
|
+
},
|
|
514
|
+
|
|
515
|
+
// Option A: tool execution stays inside toVercelTools/streamText.
|
|
516
|
+
// sendToolResult is a no-op for pipeline mode.
|
|
517
|
+
// biome-ignore lint/suspicious/noEmptyBlockStatements: intentional no-op for pipeline mode
|
|
518
|
+
sendToolResult(_callId: string, _result: string): void {},
|
|
519
|
+
|
|
520
|
+
cancelReply(): void {
|
|
521
|
+
if (terminated) return;
|
|
522
|
+
turnController?.abort();
|
|
523
|
+
turnController = null;
|
|
524
|
+
ttsSession?.cancel();
|
|
525
|
+
// Do NOT call callbacks.onCancelled() here. This method is invoked from
|
|
526
|
+
// session-core.onCancel (client-initiated cancel), which calls
|
|
527
|
+
// client.cancelled() itself — firing onCancelled here would double-cancel.
|
|
528
|
+
// Barge-in (STT partial) fires callbacks.onCancelled() directly in
|
|
529
|
+
// onSttPartial, where the cancel originates inside the transport.
|
|
530
|
+
},
|
|
531
|
+
};
|
|
532
|
+
}
|