@alexkroman1/aai 1.4.5 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +10 -10
- package/CHANGELOG.md +19 -0
- package/dist/{_internal-types-3p3OJZPb.js → _internal-types-DFL07G3f.js} +2 -0
- package/dist/assemblyai-C969QGi4.js +35 -0
- package/dist/cartesia-BfQPOQ7Y.js +37 -0
- package/dist/host/_pipeline-test-fakes.d.ts +3 -1
- package/dist/host/providers/stt/deepgram.d.ts +28 -0
- package/dist/host/providers/tts/cartesia.d.ts +1 -1
- package/dist/host/providers/tts/rime.d.ts +44 -0
- package/dist/host/runtime-barrel.d.ts +4 -2
- package/dist/host/runtime-barrel.js +1434 -1209
- package/dist/host/runtime.d.ts +2 -2
- package/dist/host/s2s.d.ts +16 -16
- package/dist/host/session-core.d.ts +37 -0
- package/dist/host/transports/pipeline-transport.d.ts +48 -0
- package/dist/host/transports/s2s-transport.d.ts +19 -0
- package/dist/host/transports/types.d.ts +45 -0
- package/dist/host/ws-handler.d.ts +14 -10
- package/dist/sdk/_internal-types.d.ts +2 -0
- package/dist/sdk/manifest-barrel.js +1 -1
- package/dist/sdk/protocol.d.ts +6 -5
- package/dist/sdk/providers/llm-barrel.js +1 -1
- package/dist/sdk/providers/stt/deepgram.d.ts +35 -0
- package/dist/sdk/providers/stt-barrel.d.ts +1 -0
- package/dist/sdk/providers/stt-barrel.js +2 -2
- package/dist/sdk/providers/tts/cartesia.d.ts +12 -4
- package/dist/sdk/providers/tts/rime.d.ts +42 -0
- package/dist/sdk/providers/tts-barrel.d.ts +1 -0
- package/dist/sdk/providers/tts-barrel.js +2 -2
- package/host/_pipeline-test-fakes.ts +6 -3
- package/host/_test-utils.ts +209 -128
- package/host/builtin-tools.ts +1 -0
- package/host/cleanup.test.ts +25 -298
- package/host/integration/pipeline-reference.integration.test.ts +30 -35
- package/host/providers/resolve.ts +10 -2
- package/host/providers/stt/deepgram.test.ts +229 -0
- package/host/providers/stt/deepgram.ts +172 -0
- package/host/providers/tts/cartesia.ts +7 -3
- package/host/providers/tts/rime.test.ts +251 -0
- package/host/providers/tts/rime.ts +322 -0
- package/host/runtime-barrel.ts +4 -2
- package/host/runtime.test.ts +16 -47
- package/host/runtime.ts +131 -23
- package/host/s2s.test.ts +122 -131
- package/host/s2s.ts +44 -52
- package/host/session-core.test.ts +257 -0
- package/host/session-core.ts +262 -0
- package/host/to-vercel-tools.test.ts +9 -1
- package/host/transports/pipeline-transport.test.ts +653 -0
- package/host/transports/pipeline-transport.ts +532 -0
- package/host/{fixture-replay.test.ts → transports/s2s-transport-fixtures.test.ts} +76 -106
- package/host/transports/s2s-transport.test.ts +56 -0
- package/host/transports/s2s-transport.ts +116 -0
- package/host/transports/types.test.ts +22 -0
- package/host/transports/types.ts +51 -0
- package/host/ws-handler.test.ts +324 -242
- package/host/ws-handler.ts +56 -59
- package/package.json +2 -1
- package/sdk/__snapshots__/exports.test.ts.snap +3 -3
- package/sdk/__snapshots__/schema-shapes.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +3 -0
- package/sdk/protocol-compat.test.ts +8 -0
- package/sdk/protocol.ts +6 -5
- package/sdk/providers/stt/deepgram.ts +43 -0
- package/sdk/providers/stt-barrel.ts +2 -0
- package/sdk/providers/tts/cartesia.ts +15 -5
- package/sdk/providers/tts/rime.ts +52 -0
- package/sdk/providers/tts-barrel.ts +2 -0
- package/sdk/schema-alignment.test.ts +18 -6
- package/dist/assemblyai-Cxg9eobY.js +0 -18
- package/dist/cartesia-DwDk2tEu.js +0 -10
- package/dist/host/pipeline-session-ctx.d.ts +0 -24
- package/dist/host/pipeline-session.d.ts +0 -52
- package/dist/host/session-ctx.d.ts +0 -73
- package/dist/host/session.d.ts +0 -62
- package/host/pipeline-session-ctx.test.ts +0 -31
- package/host/pipeline-session-ctx.ts +0 -36
- package/host/pipeline-session.test.ts +0 -672
- package/host/pipeline-session.ts +0 -533
- package/host/s2s-fixtures.test.ts +0 -237
- package/host/session-ctx.test.ts +0 -387
- package/host/session-ctx.ts +0 -134
- package/host/session-fixture-replay.test.ts +0 -128
- package/host/session.test.ts +0 -634
- package/host/session.ts +0 -412
- /package/dist/{anthropic-BrUCPKUc.js → anthropic-CcLZygAr.js} +0 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
// Copyright 2026 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Rime TTS opener (host-only).
|
|
4
|
+
*
|
|
5
|
+
* The user-facing descriptor factory (`rime(...)`) lives in
|
|
6
|
+
* `sdk/providers/tts/rime.ts`. This module is the host-side
|
|
7
|
+
* counterpart: it takes the descriptor options + an API key and
|
|
8
|
+
* returns a {@link TtsOpener} that the pipeline session drives.
|
|
9
|
+
*
|
|
10
|
+
* **Protocol.** Connects to Rime's `ws2` JSON WebSocket endpoint
|
|
11
|
+
* (`wss://users-ws.rime.ai/ws2`). Client-to-server messages are JSON:
|
|
12
|
+
* - `{ "text": "..." }` — append text to the synthesis buffer
|
|
13
|
+
* - `{ "operation": "clear" }` — drop buffered text (barge-in)
|
|
14
|
+
* - `{ "operation": "eos" }` — drain buffer, close connection (NOT used
|
|
15
|
+
* during a session: it would tear down the WS, forcing reconnect per
|
|
16
|
+
* turn). We force end-of-turn synthesis with a trailing `"."` instead.
|
|
17
|
+
* The server responds with JSON frames:
|
|
18
|
+
* - `{ type: "chunk", data: <base64 PCM16 LE>, contextId: string | null }`
|
|
19
|
+
* - `{ type: "timestamps", ... }` (ignored)
|
|
20
|
+
* - `{ type: "error", message: string }` (surfaced as `tts_stream_error`)
|
|
21
|
+
*
|
|
22
|
+
* **Single long-lived connection per session.** Rime buffers text until it
|
|
23
|
+
* sees terminal punctuation (`.`, `?`, `!`), so we use one WebSocket per
|
|
24
|
+
* `open()` call and reuse it across turns. `clear` resets the buffer
|
|
25
|
+
* between cancellations.
|
|
26
|
+
*
|
|
27
|
+
* **Done detection.** After `flush()` sends a trailing `"."` to force the
|
|
28
|
+
* server to synthesize any half-buffered text, we arm a quiescence timer
|
|
29
|
+
* that fires 500 ms after the last received audio chunk. When it fires,
|
|
30
|
+
* `done` is emitted.
|
|
31
|
+
*
|
|
32
|
+
* **Audio format.** The URL requests `audioFormat=pcm` at the negotiated
|
|
33
|
+
* `sampleRate`, which returns raw PCM16 little-endian. We decode the base64
|
|
34
|
+
* payload and construct a zero-copy `Int16Array` view over the decoded bytes.
|
|
35
|
+
*/
|
|
36
|
+
|
|
37
|
+
import { createNanoEvents, type Emitter } from "nanoevents";
|
|
38
|
+
import WebSocket from "ws";
|
|
39
|
+
import { RIME_DEFAULT_VOICE, type RimeOptions } from "../../../sdk/providers/tts/rime.ts";
|
|
40
|
+
import {
|
|
41
|
+
makeTtsError,
|
|
42
|
+
type TtsEvents,
|
|
43
|
+
type TtsOpener,
|
|
44
|
+
type TtsOpenOptions,
|
|
45
|
+
type TtsSession,
|
|
46
|
+
} from "../../../sdk/providers.ts";
|
|
47
|
+
|
|
48
|
+
/** Internal: TtsSession with a test-only handle to the raw WebSocket. */
|
|
49
|
+
export interface RimeSession extends TtsSession {
|
|
50
|
+
/** @internal Test-only: exposes the underlying raw WebSocket. */
|
|
51
|
+
readonly _ws: WebSocket;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** PCM16 sample rates accepted by the Rime `ws2` endpoint. */
|
|
55
|
+
const RIME_PCM16_RATES = [
|
|
56
|
+
8000, 16_000, 22_050, 24_000, 44_100, 48_000,
|
|
57
|
+
] as const satisfies readonly number[];
|
|
58
|
+
|
|
59
|
+
function assertSupportedSampleRate(rate: number): number {
|
|
60
|
+
if ((RIME_PCM16_RATES as readonly number[]).includes(rate)) return rate;
|
|
61
|
+
throw makeTtsError(
|
|
62
|
+
"tts_connect_failed",
|
|
63
|
+
`Rime TTS: unsupported sample rate ${rate}. Supported: ${RIME_PCM16_RATES.join(", ")}.`,
|
|
64
|
+
);
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Decode a base64 string from Rime into a zero-copy `Int16Array`.
|
|
69
|
+
*
|
|
70
|
+
* Rime's `ws2` endpoint returns base64-encoded PCM16 LE in each chunk.
|
|
71
|
+
* `Buffer.from(base64, "base64")` gives us a Node.js Buffer (which is a
|
|
72
|
+
* Uint8Array subclass) with `byteOffset === 0`. PCM16 bytes always come in
|
|
73
|
+
* pairs so the length is guaranteed to be even.
|
|
74
|
+
*/
|
|
75
|
+
function base64ToPcm(data: string): Int16Array {
|
|
76
|
+
const bytes = Buffer.from(data, "base64");
|
|
77
|
+
// Defensive: drop a trailing odd byte rather than throwing on misalignment.
|
|
78
|
+
const evenLen = bytes.byteLength - (bytes.byteLength % 2);
|
|
79
|
+
if (evenLen === 0) return new Int16Array(0);
|
|
80
|
+
return new Int16Array(bytes.buffer, bytes.byteOffset, evenLen / 2);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/**
|
|
84
|
+
* Shape of JSON messages received from Rime's `ws2` endpoint.
|
|
85
|
+
*
|
|
86
|
+
* Only `chunk` messages carry audio; `timestamps` messages are informational
|
|
87
|
+
* and can be ignored for the current use case.
|
|
88
|
+
*/
|
|
89
|
+
interface RimeMessage {
|
|
90
|
+
type: "chunk" | "timestamps" | "error" | string;
|
|
91
|
+
/** Base64-encoded PCM16 LE audio. Present on `type === "chunk"`. */
|
|
92
|
+
data?: string;
|
|
93
|
+
/** Context discriminator for the in-flight utterance. May be null. */
|
|
94
|
+
contextId?: string | null;
|
|
95
|
+
/** Error description. Present on `type === "error"`. */
|
|
96
|
+
message?: string;
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
/** Quiescence timeout in ms — how long to wait after the last audio chunk before emitting `done`. */
|
|
100
|
+
const QUIESCENCE_MS = 500;
|
|
101
|
+
|
|
102
|
+
/**
|
|
103
|
+
* After `flush()`, how long to wait for the FIRST audio chunk before
|
|
104
|
+
* giving up and emitting `done`. Greeting and short replies hit this
|
|
105
|
+
* path: `flush()` runs immediately after `sendText()`, so audio TTFB
|
|
106
|
+
* exceeds the 500 ms quiescence window. Once the first chunk arrives,
|
|
107
|
+
* we transition to the shorter quiescence timeout.
|
|
108
|
+
*/
|
|
109
|
+
const FIRST_AUDIO_TIMEOUT_MS = 5000;
|
|
110
|
+
|
|
111
|
+
/** Wait for the WebSocket `open` event; reject on first `error`. */
|
|
112
|
+
function waitForOpen(ws: WebSocket): Promise<void> {
|
|
113
|
+
return new Promise<void>((resolve, reject) => {
|
|
114
|
+
const onOpen = () => {
|
|
115
|
+
ws.removeListener("error", onError);
|
|
116
|
+
resolve();
|
|
117
|
+
};
|
|
118
|
+
const onError = (err: Error) => {
|
|
119
|
+
ws.removeListener("open", onOpen);
|
|
120
|
+
reject(
|
|
121
|
+
makeTtsError(
|
|
122
|
+
"tts_connect_failed",
|
|
123
|
+
`Rime TTS: connect failed: ${err?.message ?? String(err)}`,
|
|
124
|
+
),
|
|
125
|
+
);
|
|
126
|
+
};
|
|
127
|
+
ws.once("open", onOpen);
|
|
128
|
+
ws.once("error", onError);
|
|
129
|
+
});
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
/**
|
|
133
|
+
* Handle one incoming WebSocket message frame.
|
|
134
|
+
*
|
|
135
|
+
* Extracted into a top-level function to keep `open()` under the cognitive
|
|
136
|
+
* complexity limit while retaining full access to the session state via refs.
|
|
137
|
+
*/
|
|
138
|
+
function handleRimeMessage(
|
|
139
|
+
raw: WebSocket.Data,
|
|
140
|
+
emitter: Emitter<TtsEvents>,
|
|
141
|
+
armQuiescence: () => void,
|
|
142
|
+
isActiveTimer: () => boolean,
|
|
143
|
+
): void {
|
|
144
|
+
let msg: RimeMessage;
|
|
145
|
+
try {
|
|
146
|
+
msg = JSON.parse(typeof raw === "string" ? raw : raw.toString()) as RimeMessage;
|
|
147
|
+
} catch {
|
|
148
|
+
// Unparseable frame — ignore.
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if (msg.type === "chunk" && typeof msg.data === "string") {
|
|
153
|
+
const pcm = base64ToPcm(msg.data);
|
|
154
|
+
if (pcm.length > 0) {
|
|
155
|
+
emitter.emit("audio", pcm);
|
|
156
|
+
// While we're waiting on a flush (long timer for first audio, or
|
|
157
|
+
// short timer between chunks), each chunk resets to the short
|
|
158
|
+
// quiescence window — so `done` fires only after audio stops.
|
|
159
|
+
if (isActiveTimer()) armQuiescence();
|
|
160
|
+
}
|
|
161
|
+
return;
|
|
162
|
+
}
|
|
163
|
+
if (msg.type === "error") {
|
|
164
|
+
emitter.emit(
|
|
165
|
+
"error",
|
|
166
|
+
makeTtsError("tts_stream_error", `Rime TTS: ${msg.message ?? "unknown error"}`),
|
|
167
|
+
);
|
|
168
|
+
}
|
|
169
|
+
// Ignore `type === "timestamps"` and unknown message types.
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
/** Build a {@link TtsOpener} from resolved Rime descriptor options. */
|
|
173
|
+
export function openRime(opts: RimeOptions): TtsOpener {
|
|
174
|
+
return {
|
|
175
|
+
name: "rime",
|
|
176
|
+
async open(openOpts: TtsOpenOptions): Promise<TtsSession> {
|
|
177
|
+
const apiKey = openOpts.apiKey || process.env.RIME_API_KEY;
|
|
178
|
+
if (!apiKey) {
|
|
179
|
+
throw makeTtsError(
|
|
180
|
+
"tts_auth_failed",
|
|
181
|
+
"Rime TTS: missing API key. Set RIME_API_KEY in the agent env.",
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
const sampleRate = assertSupportedSampleRate(openOpts.sampleRate);
|
|
186
|
+
const model = opts.model ?? "mistv2";
|
|
187
|
+
const lang = opts.language ?? "eng";
|
|
188
|
+
const voice = opts.voice ?? RIME_DEFAULT_VOICE;
|
|
189
|
+
|
|
190
|
+
// Construct the ws2 URL with query parameters.
|
|
191
|
+
const url = `wss://users-ws.rime.ai/ws2?speaker=${encodeURIComponent(voice)}&modelId=${encodeURIComponent(model)}&audioFormat=pcm&samplingRate=${sampleRate}&lang=${encodeURIComponent(lang)}`;
|
|
192
|
+
|
|
193
|
+
let ws: WebSocket;
|
|
194
|
+
try {
|
|
195
|
+
ws = new WebSocket(url, { headers: { Authorization: `Bearer ${apiKey}` } });
|
|
196
|
+
} catch (cause) {
|
|
197
|
+
throw makeTtsError(
|
|
198
|
+
"tts_connect_failed",
|
|
199
|
+
`Rime TTS: failed to create WebSocket: ${cause instanceof Error ? cause.message : String(cause)}`,
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
await waitForOpen(ws);
|
|
204
|
+
|
|
205
|
+
const emitter: Emitter<TtsEvents> = createNanoEvents<TtsEvents>();
|
|
206
|
+
let closed = false;
|
|
207
|
+
let doneEmitted = false;
|
|
208
|
+
/**
|
|
209
|
+
* After `flush()`, we arm a timer that fires `done`. Initial timeout is
|
|
210
|
+
* `FIRST_AUDIO_TIMEOUT_MS` to give Rime headroom on TTFB; the first
|
|
211
|
+
* chunk swaps it for a shorter `QUIESCENCE_MS` window that resets on
|
|
212
|
+
* each subsequent chunk. `cancel()` emits `done` synchronously.
|
|
213
|
+
*/
|
|
214
|
+
let quiescenceTimer: ReturnType<typeof setTimeout> | null = null;
|
|
215
|
+
|
|
216
|
+
const clearQuiescence = () => {
|
|
217
|
+
if (quiescenceTimer !== null) {
|
|
218
|
+
clearTimeout(quiescenceTimer);
|
|
219
|
+
quiescenceTimer = null;
|
|
220
|
+
}
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
const emitDoneOnce = () => {
|
|
224
|
+
clearQuiescence();
|
|
225
|
+
if (doneEmitted || closed) return;
|
|
226
|
+
doneEmitted = true;
|
|
227
|
+
emitter.emit("done");
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
const armQuiescence = () => {
|
|
231
|
+
clearQuiescence();
|
|
232
|
+
quiescenceTimer = setTimeout(emitDoneOnce, QUIESCENCE_MS);
|
|
233
|
+
};
|
|
234
|
+
|
|
235
|
+
const armFirstAudioTimer = () => {
|
|
236
|
+
clearQuiescence();
|
|
237
|
+
quiescenceTimer = setTimeout(emitDoneOnce, FIRST_AUDIO_TIMEOUT_MS);
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
ws.on("message", (raw: WebSocket.Data) => {
|
|
241
|
+
if (closed) return;
|
|
242
|
+
handleRimeMessage(raw, emitter, armQuiescence, () => quiescenceTimer !== null);
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
ws.on("error", (err: Error) => {
|
|
246
|
+
if (closed) return;
|
|
247
|
+
emitter.emit(
|
|
248
|
+
"error",
|
|
249
|
+
makeTtsError("tts_stream_error", `Rime TTS stream error: ${err?.message ?? String(err)}`),
|
|
250
|
+
);
|
|
251
|
+
});
|
|
252
|
+
|
|
253
|
+
ws.on("close", () => {
|
|
254
|
+
if (closed) return;
|
|
255
|
+
// Unexpected server-side close — surface `done` so the pipeline
|
|
256
|
+
// doesn't hang waiting for an utterance that will never complete.
|
|
257
|
+
emitDoneOnce();
|
|
258
|
+
});
|
|
259
|
+
|
|
260
|
+
const close = async (): Promise<void> => {
|
|
261
|
+
if (closed) return;
|
|
262
|
+
closed = true;
|
|
263
|
+
clearQuiescence();
|
|
264
|
+
try {
|
|
265
|
+
ws.close();
|
|
266
|
+
} catch {
|
|
267
|
+
// Swallow: caller has already decided to tear down.
|
|
268
|
+
}
|
|
269
|
+
};
|
|
270
|
+
|
|
271
|
+
if (openOpts.signal.aborted) {
|
|
272
|
+
void close();
|
|
273
|
+
} else {
|
|
274
|
+
openOpts.signal.addEventListener("abort", () => void close(), { once: true });
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
const session: RimeSession = {
|
|
278
|
+
sendText(text: string) {
|
|
279
|
+
if (closed || text.length === 0) return;
|
|
280
|
+
if (ws.readyState !== WebSocket.OPEN) return;
|
|
281
|
+
// Reset done state at the start of a new turn.
|
|
282
|
+
doneEmitted = false;
|
|
283
|
+
ws.send(JSON.stringify({ text }));
|
|
284
|
+
},
|
|
285
|
+
|
|
286
|
+
flush() {
|
|
287
|
+
if (closed) return;
|
|
288
|
+
if (ws.readyState !== WebSocket.OPEN) return;
|
|
289
|
+
// Force synthesis of any text buffered behind a missing terminal
|
|
290
|
+
// punctuation: append a trailing `"."`. Sending the JSON `eos`
|
|
291
|
+
// operation would close the connection, requiring a reconnect on
|
|
292
|
+
// every turn — `"."` keeps the WS reusable. Use the longer
|
|
293
|
+
// first-audio timer until the initial chunk arrives; the chunk
|
|
294
|
+
// handler swaps it for short quiescence on each subsequent chunk.
|
|
295
|
+
ws.send(JSON.stringify({ text: "." }));
|
|
296
|
+
armFirstAudioTimer();
|
|
297
|
+
},
|
|
298
|
+
|
|
299
|
+
cancel() {
|
|
300
|
+
if (closed) return;
|
|
301
|
+
// Drop Rime's server-side buffer for barge-in.
|
|
302
|
+
if (ws.readyState === WebSocket.OPEN) {
|
|
303
|
+
ws.send(JSON.stringify({ operation: "clear" }));
|
|
304
|
+
}
|
|
305
|
+
// Emit `done` synchronously — the orchestrator's state machine
|
|
306
|
+
// advances on `done`, and barge-in must not be microtask-deferred.
|
|
307
|
+
emitDoneOnce();
|
|
308
|
+
},
|
|
309
|
+
|
|
310
|
+
on(event, fn) {
|
|
311
|
+
return emitter.on(event, fn);
|
|
312
|
+
},
|
|
313
|
+
|
|
314
|
+
close,
|
|
315
|
+
|
|
316
|
+
_ws: ws,
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
return session;
|
|
320
|
+
},
|
|
321
|
+
};
|
|
322
|
+
}
|
package/host/runtime-barrel.ts
CHANGED
|
@@ -17,8 +17,10 @@ export * from "./builtin-tools.ts";
|
|
|
17
17
|
export * from "./runtime.ts";
|
|
18
18
|
export * from "./runtime-config.ts";
|
|
19
19
|
export * from "./server.ts";
|
|
20
|
-
export * from "./session.ts";
|
|
21
|
-
export * from "./session-ctx.ts";
|
|
20
|
+
export * from "./session-core.ts";
|
|
22
21
|
export * from "./tool-executor.ts";
|
|
22
|
+
export * from "./transports/pipeline-transport.ts";
|
|
23
|
+
export * from "./transports/s2s-transport.ts";
|
|
24
|
+
export * from "./transports/types.ts";
|
|
23
25
|
export * from "./unstorage-kv.ts";
|
|
24
26
|
export * from "./ws-handler.ts";
|
package/host/runtime.test.ts
CHANGED
|
@@ -11,10 +11,10 @@ import {
|
|
|
11
11
|
createFakeTtsProvider,
|
|
12
12
|
} from "./_pipeline-test-fakes.ts";
|
|
13
13
|
import { CONFORMANCE_AGENT, testRuntime } from "./_runtime-conformance.ts";
|
|
14
|
-
import { flush, makeAgent,
|
|
14
|
+
import { flush, makeAgent, makeClientSink, makeMockHandle, silentLogger } from "./_test-utils.ts";
|
|
15
15
|
import { createRuntime } from "./runtime.ts";
|
|
16
|
-
import { _internals } from "./session.ts";
|
|
17
16
|
import { executeToolCall } from "./tool-executor.ts";
|
|
17
|
+
import { _internals } from "./transports/s2s-transport.ts";
|
|
18
18
|
import { createUnstorageKv } from "./unstorage-kv.ts";
|
|
19
19
|
|
|
20
20
|
describe("toAgentConfig", () => {
|
|
@@ -322,7 +322,9 @@ describe("executeToolCall", () => {
|
|
|
322
322
|
describe("createRuntime sandbox mode", () => {
|
|
323
323
|
test("uses provided executeTool and toolSchemas", async () => {
|
|
324
324
|
const mockExecuteTool = vi.fn(async () => "mocked-result");
|
|
325
|
-
const mockToolSchemas = [
|
|
325
|
+
const mockToolSchemas = [
|
|
326
|
+
{ type: "function" as const, name: "mock_tool", description: "A mock tool", parameters: {} },
|
|
327
|
+
];
|
|
326
328
|
|
|
327
329
|
const runtime = createRuntime({
|
|
328
330
|
agent: makeAgent(),
|
|
@@ -360,11 +362,7 @@ describe("createRuntime shutdown", () => {
|
|
|
360
362
|
|
|
361
363
|
test("shutdown stops active sessions gracefully", async () => {
|
|
362
364
|
const mockHandle = makeMockHandle();
|
|
363
|
-
const connectSpy = vi.spyOn(_internals, "connectS2s").
|
|
364
|
-
// Fire "ready" so session.start() resolves
|
|
365
|
-
setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
|
|
366
|
-
return mockHandle;
|
|
367
|
-
});
|
|
365
|
+
const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
|
|
368
366
|
|
|
369
367
|
const agent = makeAgent();
|
|
370
368
|
const runtime = createRuntime({ agent, env: {}, logger: silentLogger });
|
|
@@ -392,10 +390,7 @@ describe("createRuntime shutdown", () => {
|
|
|
392
390
|
mockHandle.close = vi.fn(() => {
|
|
393
391
|
throw new Error("close failed");
|
|
394
392
|
});
|
|
395
|
-
const connectSpy = vi.spyOn(_internals, "connectS2s").
|
|
396
|
-
setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
|
|
397
|
-
return mockHandle;
|
|
398
|
-
});
|
|
393
|
+
const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
|
|
399
394
|
|
|
400
395
|
const agent = makeAgent();
|
|
401
396
|
const runtime = createRuntime({ agent, env: {}, logger });
|
|
@@ -422,10 +417,7 @@ describe("createRuntime shutdown", () => {
|
|
|
422
417
|
mockHandle.close = vi.fn(() => {
|
|
423
418
|
// intentionally do nothing — session stop will hang
|
|
424
419
|
});
|
|
425
|
-
const connectSpy = vi.spyOn(_internals, "connectS2s").
|
|
426
|
-
setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
|
|
427
|
-
return mockHandle;
|
|
428
|
-
});
|
|
420
|
+
const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
|
|
429
421
|
|
|
430
422
|
const agent = makeAgent();
|
|
431
423
|
const runtime = createRuntime({
|
|
@@ -483,12 +475,7 @@ describe("createRuntime createSession", () => {
|
|
|
483
475
|
test("createSession returns a Session object", () => {
|
|
484
476
|
const agent = makeAgent();
|
|
485
477
|
const runtime = createRuntime({ agent, env: {} });
|
|
486
|
-
const client =
|
|
487
|
-
open: true,
|
|
488
|
-
event: vi.fn(),
|
|
489
|
-
playAudioChunk: vi.fn(),
|
|
490
|
-
playAudioDone: vi.fn(),
|
|
491
|
-
};
|
|
478
|
+
const client = makeClientSink();
|
|
492
479
|
const session = runtime.createSession({
|
|
493
480
|
id: "test-session",
|
|
494
481
|
agent: agent.name,
|
|
@@ -506,12 +493,7 @@ describe("createRuntime createSession", () => {
|
|
|
506
493
|
test("createSession passes skipGreeting option", () => {
|
|
507
494
|
const agent = makeAgent();
|
|
508
495
|
const runtime = createRuntime({ agent, env: {} });
|
|
509
|
-
const client =
|
|
510
|
-
open: true,
|
|
511
|
-
event: vi.fn(),
|
|
512
|
-
playAudioChunk: vi.fn(),
|
|
513
|
-
playAudioDone: vi.fn(),
|
|
514
|
-
};
|
|
496
|
+
const client = makeClientSink();
|
|
515
497
|
// Should not throw when skipGreeting is set
|
|
516
498
|
const session = runtime.createSession({
|
|
517
499
|
id: "test-session",
|
|
@@ -525,12 +507,7 @@ describe("createRuntime createSession", () => {
|
|
|
525
507
|
test("createSession passes resumeFrom option", () => {
|
|
526
508
|
const agent = makeAgent();
|
|
527
509
|
const runtime = createRuntime({ agent, env: {} });
|
|
528
|
-
const client =
|
|
529
|
-
open: true,
|
|
530
|
-
event: vi.fn(),
|
|
531
|
-
playAudioChunk: vi.fn(),
|
|
532
|
-
playAudioDone: vi.fn(),
|
|
533
|
-
};
|
|
510
|
+
const client = makeClientSink();
|
|
534
511
|
const session = runtime.createSession({
|
|
535
512
|
id: "test-session",
|
|
536
513
|
agent: agent.name,
|
|
@@ -604,13 +581,8 @@ describe("createRuntime with custom options", () => {
|
|
|
604
581
|
agent,
|
|
605
582
|
env: { ASSEMBLYAI_API_KEY: "test-api-key" },
|
|
606
583
|
});
|
|
607
|
-
const client =
|
|
608
|
-
|
|
609
|
-
event: vi.fn(),
|
|
610
|
-
playAudioChunk: vi.fn(),
|
|
611
|
-
playAudioDone: vi.fn(),
|
|
612
|
-
};
|
|
613
|
-
// Should not throw — the API key gets passed to createS2sSession internally
|
|
584
|
+
const client = makeClientSink();
|
|
585
|
+
// Should not throw — the API key gets passed to createS2sTransport internally
|
|
614
586
|
const session = runtime.createSession({
|
|
615
587
|
id: "test-session",
|
|
616
588
|
agent: agent.name,
|
|
@@ -651,7 +623,7 @@ describe("Runtime — session routing", () => {
|
|
|
651
623
|
tts,
|
|
652
624
|
});
|
|
653
625
|
|
|
654
|
-
const client =
|
|
626
|
+
const client = makeClientSink();
|
|
655
627
|
const session = runtime.createSession({
|
|
656
628
|
id: "sess-pipeline",
|
|
657
629
|
agent: "test-agent",
|
|
@@ -677,10 +649,7 @@ describe("Runtime — session routing", () => {
|
|
|
677
649
|
|
|
678
650
|
test("manifest without stt/llm/tts routes to S2sSession (createWebSocket IS called)", async () => {
|
|
679
651
|
const mockHandle = makeMockHandle();
|
|
680
|
-
const connectSpy = vi.spyOn(_internals, "connectS2s").
|
|
681
|
-
setTimeout(() => mockHandle._fire("ready", { sessionId: "mock-sid" }), 0);
|
|
682
|
-
return mockHandle;
|
|
683
|
-
});
|
|
652
|
+
const connectSpy = vi.spyOn(_internals, "connectS2s").mockResolvedValue(mockHandle);
|
|
684
653
|
|
|
685
654
|
const createWebSocket = vi.fn();
|
|
686
655
|
const runtime = createRuntime({
|
|
@@ -690,7 +659,7 @@ describe("Runtime — session routing", () => {
|
|
|
690
659
|
createWebSocket,
|
|
691
660
|
});
|
|
692
661
|
|
|
693
|
-
const client =
|
|
662
|
+
const client = makeClientSink();
|
|
694
663
|
const session = runtime.createSession({
|
|
695
664
|
id: "sess-s2s",
|
|
696
665
|
agent: "test-agent",
|