@alexkroman1/aai 1.2.3 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +14 -12
- package/CHANGELOG.md +14 -0
- package/dist/host/_pipeline-test-fakes.d.ts +107 -0
- package/dist/host/pipeline-session-ctx.d.ts +24 -0
- package/dist/host/pipeline-session.d.ts +48 -0
- package/dist/host/providers/llm.d.ts +2 -0
- package/dist/host/providers/stt/assemblyai.d.ts +31 -0
- package/dist/host/providers/stt-barrel.d.ts +8 -0
- package/dist/host/providers/stt-barrel.js +92 -0
- package/dist/host/providers/stt.d.ts +2 -0
- package/dist/host/providers/tts/cartesia.d.ts +39 -0
- package/dist/host/providers/tts-barrel.d.ts +8 -0
- package/dist/host/providers/tts-barrel.js +182 -0
- package/dist/host/providers/tts.d.ts +2 -0
- package/dist/host/runtime-barrel.js +498 -80
- package/dist/host/runtime.d.ts +17 -0
- package/dist/host/s2s.d.ts +5 -0
- package/dist/host/session-ctx.d.ts +22 -4
- package/dist/host/to-vercel-tools.d.ts +44 -0
- package/dist/index.js +5 -0
- package/dist/sdk/_internal-types.d.ts +15 -1
- package/dist/sdk/define.d.ts +21 -0
- package/dist/sdk/manifest.d.ts +22 -0
- package/dist/sdk/protocol.d.ts +3 -3
- package/dist/sdk/providers.d.ts +70 -0
- package/dist/sdk/types.d.ts +16 -0
- package/exports-no-dev-deps.test.ts +39 -14
- package/host/_pipeline-test-fakes.ts +323 -0
- package/host/_test-utils.ts +1 -0
- package/host/integration/fixtures/README.md +49 -0
- package/host/integration/pipeline-reference.integration.test.ts +124 -0
- package/host/pipeline-session-ctx.test.ts +31 -0
- package/host/pipeline-session-ctx.ts +36 -0
- package/host/pipeline-session.test.ts +337 -0
- package/host/pipeline-session.ts +405 -0
- package/host/providers/llm.ts +3 -0
- package/host/providers/providers.test-d.ts +31 -0
- package/host/providers/stt/assemblyai.test.ts +100 -0
- package/host/providers/stt/assemblyai.ts +154 -0
- package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
- package/host/providers/stt-barrel.ts +13 -0
- package/host/providers/stt.ts +3 -0
- package/host/providers/tts/cartesia.test.ts +210 -0
- package/host/providers/tts/cartesia.ts +251 -0
- package/host/providers/tts-barrel.ts +13 -0
- package/host/providers/tts.ts +3 -0
- package/host/runtime.test.ts +81 -1
- package/host/runtime.ts +61 -0
- package/host/s2s.test.ts +19 -0
- package/host/s2s.ts +10 -0
- package/host/session-ctx.ts +35 -8
- package/host/to-vercel-tools.test.ts +153 -0
- package/host/to-vercel-tools.ts +70 -0
- package/package.json +15 -1
- package/sdk/__snapshots__/exports.test.ts.snap +1 -0
- package/sdk/_internal-types.ts +16 -0
- package/sdk/define.test-d.ts +21 -0
- package/sdk/define.test.ts +33 -0
- package/sdk/define.ts +21 -0
- package/sdk/manifest.test-d.ts +14 -0
- package/sdk/manifest.test.ts +51 -0
- package/sdk/manifest.ts +39 -0
- package/sdk/providers.ts +90 -0
- package/sdk/types.ts +16 -0
- package/vitest.config.ts +1 -0
package/host/runtime.ts
CHANGED
|
@@ -14,9 +14,11 @@ import { DEFAULT_SHUTDOWN_TIMEOUT_MS } from "../sdk/constants.ts";
|
|
|
14
14
|
import type { Kv } from "../sdk/kv.ts";
|
|
15
15
|
import type { ClientSink } from "../sdk/protocol.ts";
|
|
16
16
|
import { buildReadyConfig, type ReadyConfig } from "../sdk/protocol.ts";
|
|
17
|
+
import type { LlmProvider, SttProvider, TtsProvider } from "../sdk/providers.ts";
|
|
17
18
|
import type { AgentDef } from "../sdk/types.ts";
|
|
18
19
|
import { toolError } from "../sdk/utils.ts";
|
|
19
20
|
import { resolveAllBuiltins } from "./builtin-tools.ts";
|
|
21
|
+
import { createPipelineSession } from "./pipeline-session.ts";
|
|
20
22
|
import type { Logger, S2SConfig } from "./runtime-config.ts";
|
|
21
23
|
import { consoleLogger, DEFAULT_S2S_CONFIG } from "./runtime-config.ts";
|
|
22
24
|
import type { CreateS2sWebSocket } from "./s2s.ts";
|
|
@@ -58,6 +60,18 @@ function createLocalKv(): Kv {
|
|
|
58
60
|
return createUnstorageKv({ storage: createStorage() });
|
|
59
61
|
}
|
|
60
62
|
|
|
63
|
+
/**
|
|
64
|
+
* Resolve an API key host-side for pipeline providers.
|
|
65
|
+
*
|
|
66
|
+
* Checks the agent's declared env first, then the host process env as a
|
|
67
|
+
* fallback. Returns `""` when absent — pipeline providers surface a clear
|
|
68
|
+
* `MissingCredentialsError` via their `open()` that the orchestrator
|
|
69
|
+
* converts to a `session.error` wire event.
|
|
70
|
+
*/
|
|
71
|
+
function resolveApiKey(envVar: string, env: Record<string, string>): string {
|
|
72
|
+
return env[envVar] ?? process.env[envVar] ?? "";
|
|
73
|
+
}
|
|
74
|
+
|
|
61
75
|
/**
|
|
62
76
|
* Configuration for {@link createRuntime}.
|
|
63
77
|
*
|
|
@@ -111,6 +125,22 @@ export type RuntimeOptions = {
|
|
|
111
125
|
* their own fetch wrapper.
|
|
112
126
|
*/
|
|
113
127
|
fetch?: typeof globalThis.fetch | undefined;
|
|
128
|
+
/**
|
|
129
|
+
* Pluggable STT provider. Must be set together with `llm` and `tts` to
|
|
130
|
+
* route sessions through the pipeline path; leave all three unset for
|
|
131
|
+
* the default AssemblyAI Streaming Speech-to-Speech (S2S) path.
|
|
132
|
+
*/
|
|
133
|
+
stt?: SttProvider | undefined;
|
|
134
|
+
/**
|
|
135
|
+
* Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
|
|
136
|
+
* together with `stt` and `tts` to route sessions through the pipeline path.
|
|
137
|
+
*/
|
|
138
|
+
llm?: LlmProvider | undefined;
|
|
139
|
+
/**
|
|
140
|
+
* Pluggable TTS provider. Must be set together with `stt` and `llm` to
|
|
141
|
+
* route sessions through the pipeline path.
|
|
142
|
+
*/
|
|
143
|
+
tts?: TtsProvider | undefined;
|
|
114
144
|
};
|
|
115
145
|
|
|
116
146
|
/**
|
|
@@ -160,6 +190,14 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
|
|
|
160
190
|
sessionStartTimeoutMs,
|
|
161
191
|
shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS,
|
|
162
192
|
} = opts;
|
|
193
|
+
// Derive session mode from the provider triple: all three set ⇒ pipeline,
|
|
194
|
+
// none set ⇒ s2s. Anything in-between is a configuration error.
|
|
195
|
+
const providerCount =
|
|
196
|
+
(opts.stt != null ? 1 : 0) + (opts.llm != null ? 1 : 0) + (opts.tts != null ? 1 : 0);
|
|
197
|
+
if (providerCount !== 0 && providerCount !== 3) {
|
|
198
|
+
throw new Error("stt, llm, and tts must be set together");
|
|
199
|
+
}
|
|
200
|
+
const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
|
|
163
201
|
const agentConfig = toAgentConfig(agent);
|
|
164
202
|
const sessions = new Map<string, Session>();
|
|
165
203
|
const sinkMap = new Map<string, ClientSink>();
|
|
@@ -241,6 +279,29 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
|
|
|
241
279
|
resumeFrom?: string;
|
|
242
280
|
}): Session {
|
|
243
281
|
sinkMap.set(sessionOpts.id, sessionOpts.client);
|
|
282
|
+
if (mode === "pipeline") {
|
|
283
|
+
// biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
|
|
284
|
+
const stt = opts.stt!;
|
|
285
|
+
// biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
|
|
286
|
+
const llm = opts.llm!;
|
|
287
|
+
// biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
|
|
288
|
+
const tts = opts.tts!;
|
|
289
|
+
return createPipelineSession({
|
|
290
|
+
id: sessionOpts.id,
|
|
291
|
+
agent: sessionOpts.agent,
|
|
292
|
+
client: sessionOpts.client,
|
|
293
|
+
agentConfig,
|
|
294
|
+
toolSchemas,
|
|
295
|
+
toolGuidance,
|
|
296
|
+
executeTool,
|
|
297
|
+
stt,
|
|
298
|
+
llm,
|
|
299
|
+
tts,
|
|
300
|
+
sttApiKey: resolveApiKey("ASSEMBLYAI_API_KEY", env),
|
|
301
|
+
ttsApiKey: resolveApiKey("CARTESIA_API_KEY", env),
|
|
302
|
+
logger,
|
|
303
|
+
});
|
|
304
|
+
}
|
|
244
305
|
const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
|
|
245
306
|
return createS2sSession({
|
|
246
307
|
id: sessionOpts.id,
|
package/host/s2s.test.ts
CHANGED
|
@@ -66,6 +66,7 @@ describe("connectS2s", () => {
|
|
|
66
66
|
expect(handle).toEqual(
|
|
67
67
|
expect.objectContaining({
|
|
68
68
|
sendAudio: expect.any(Function),
|
|
69
|
+
sendAudioRaw: expect.any(Function),
|
|
69
70
|
sendToolResult: expect.any(Function),
|
|
70
71
|
updateSession: expect.any(Function),
|
|
71
72
|
resumeSession: expect.any(Function),
|
|
@@ -125,6 +126,24 @@ describe("connectS2s", () => {
|
|
|
125
126
|
expect(raw.send).not.toHaveBeenCalled();
|
|
126
127
|
});
|
|
127
128
|
|
|
129
|
+
test("sendAudioRaw forwards the exact string to the socket", async () => {
|
|
130
|
+
const { raw, handle } = await setupHandle();
|
|
131
|
+
|
|
132
|
+
const frame = '{"type":"input.audio","audio":"abc"}';
|
|
133
|
+
handle.sendAudioRaw(frame);
|
|
134
|
+
|
|
135
|
+
expect(raw.send).toHaveBeenCalledOnce();
|
|
136
|
+
expect(raw.send.mock.calls[0]?.[0]).toBe(frame);
|
|
137
|
+
});
|
|
138
|
+
|
|
139
|
+
test("sendAudioRaw is no-op when ws is not open", async () => {
|
|
140
|
+
const { raw, handle } = await setupHandle();
|
|
141
|
+
raw.readyState = 3; // CLOSED
|
|
142
|
+
|
|
143
|
+
handle.sendAudioRaw('{"type":"input.audio","audio":"abc"}');
|
|
144
|
+
expect(raw.send).not.toHaveBeenCalled();
|
|
145
|
+
});
|
|
146
|
+
|
|
128
147
|
test("sendToolResult sends tool.result message", async () => {
|
|
129
148
|
const { raw, handle } = await setupHandle();
|
|
130
149
|
|
package/host/s2s.ts
CHANGED
|
@@ -159,6 +159,11 @@ export type S2sEvents = {
|
|
|
159
159
|
export type S2sHandle = {
|
|
160
160
|
on<K extends keyof S2sEvents>(event: K, cb: S2sEvents[K]): Unsubscribe;
|
|
161
161
|
sendAudio(audio: Uint8Array): void;
|
|
162
|
+
/**
|
|
163
|
+
* Send a pre-encoded audio wire frame. For perf-critical callers (load tests)
|
|
164
|
+
* that batch-encode up front. Skips logging; caller owns wire format.
|
|
165
|
+
*/
|
|
166
|
+
sendAudioRaw(jsonFrame: string): void;
|
|
162
167
|
sendToolResult(callId: string, result: string): void;
|
|
163
168
|
updateSession(config: S2sSessionConfig): void;
|
|
164
169
|
resumeSession(sessionId: string): void;
|
|
@@ -212,6 +217,11 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
|
|
|
212
217
|
ws.send(`{"type":"input.audio","audio":"${uint8ToBase64(audio)}"}`);
|
|
213
218
|
},
|
|
214
219
|
|
|
220
|
+
sendAudioRaw(jsonFrame: string): void {
|
|
221
|
+
if (ws.readyState !== WS_OPEN) return;
|
|
222
|
+
ws.send(jsonFrame);
|
|
223
|
+
},
|
|
224
|
+
|
|
215
225
|
sendToolResult(callId: string, result: string): void {
|
|
216
226
|
const msg = { type: "tool.result", call_id: callId, result };
|
|
217
227
|
log.info("S2S >> tool.result", { call_id: callId, resultLength: result.length });
|
package/host/session-ctx.ts
CHANGED
|
@@ -30,15 +30,18 @@ export type SessionDeps = {
|
|
|
30
30
|
};
|
|
31
31
|
|
|
32
32
|
/**
|
|
33
|
-
*
|
|
33
|
+
* Transport-agnostic session context shared by S2S and pipeline sessions.
|
|
34
|
+
*
|
|
35
|
+
* Owns reply lifecycle, conversation history (with sliding-window truncation),
|
|
36
|
+
* and per-turn tool-call step enforcement. Transport-specific fields (e.g.
|
|
37
|
+
* `s2s` for S2S, `stt`/`tts` for the pipeline) live on the extending types.
|
|
34
38
|
*
|
|
35
39
|
* Split into three layers:
|
|
36
40
|
* - {@link SessionDeps} — immutable dependencies (set once)
|
|
37
41
|
* - {@link ReplyState} via `reply` — per-reply mutable state (reset on beginReply/cancelReply)
|
|
38
|
-
* - Remaining fields —
|
|
42
|
+
* - Remaining fields — conversation and lifecycle methods
|
|
39
43
|
*/
|
|
40
|
-
export type
|
|
41
|
-
s2s: S2sHandle | null;
|
|
44
|
+
export type BaseSessionCtx = SessionDeps & {
|
|
42
45
|
reply: ReplyState;
|
|
43
46
|
turnPromise: Promise<void> | null;
|
|
44
47
|
conversationMessages: Message[];
|
|
@@ -50,7 +53,14 @@ export type S2sSessionCtx = SessionDeps & {
|
|
|
50
53
|
chainTurn(p: Promise<void>): void;
|
|
51
54
|
};
|
|
52
55
|
|
|
53
|
-
|
|
56
|
+
/**
|
|
57
|
+
* S2S session context — {@link BaseSessionCtx} plus the S2S WebSocket handle.
|
|
58
|
+
*/
|
|
59
|
+
export type S2sSessionCtx = BaseSessionCtx & {
|
|
60
|
+
s2s: S2sHandle | null;
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
export function _buildBaseCtx(opts: {
|
|
54
64
|
id: string;
|
|
55
65
|
agent: string;
|
|
56
66
|
client: ClientSink;
|
|
@@ -58,12 +68,11 @@ export function buildCtx(opts: {
|
|
|
58
68
|
executeTool: ExecuteTool;
|
|
59
69
|
log: Logger;
|
|
60
70
|
maxHistory?: number | undefined;
|
|
61
|
-
}):
|
|
71
|
+
}): BaseSessionCtx {
|
|
62
72
|
const { agentConfig, log } = opts;
|
|
63
73
|
const maxHistory = opts.maxHistory ?? DEFAULT_MAX_HISTORY;
|
|
64
|
-
const ctx:
|
|
74
|
+
const ctx: BaseSessionCtx = {
|
|
65
75
|
...opts,
|
|
66
|
-
s2s: null,
|
|
67
76
|
reply: { pendingTools: [], toolCallCount: 0, currentReplyId: null },
|
|
68
77
|
turnPromise: null,
|
|
69
78
|
conversationMessages: [],
|
|
@@ -105,3 +114,21 @@ export function buildCtx(opts: {
|
|
|
105
114
|
};
|
|
106
115
|
return ctx;
|
|
107
116
|
}
|
|
117
|
+
|
|
118
|
+
export function buildCtx(opts: {
|
|
119
|
+
id: string;
|
|
120
|
+
agent: string;
|
|
121
|
+
client: ClientSink;
|
|
122
|
+
agentConfig: AgentConfig;
|
|
123
|
+
executeTool: ExecuteTool;
|
|
124
|
+
log: Logger;
|
|
125
|
+
maxHistory?: number | undefined;
|
|
126
|
+
}): S2sSessionCtx {
|
|
127
|
+
// Mutate the base ctx in place rather than spreading into a new object —
|
|
128
|
+
// the helper methods close over the base ctx reference, so spreading would
|
|
129
|
+
// leave them writing to an orphan object (e.g. `beginReply` would mutate
|
|
130
|
+
// the base `reply`, not the spread copy's `reply`).
|
|
131
|
+
const base = _buildBaseCtx(opts) as S2sSessionCtx;
|
|
132
|
+
base.s2s = null;
|
|
133
|
+
return base;
|
|
134
|
+
}
|
|
@@ -0,0 +1,153 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
import { describe, expect, test, vi } from "vitest";
|
|
3
|
+
import type { ToolSchema } from "../sdk/_internal-types.ts";
|
|
4
|
+
import { toVercelTools } from "./to-vercel-tools.ts";
|
|
5
|
+
|
|
6
|
+
const schemas: ToolSchema[] = [
|
|
7
|
+
{
|
|
8
|
+
name: "get_weather",
|
|
9
|
+
description: "Look up the weather.",
|
|
10
|
+
parameters: {
|
|
11
|
+
type: "object",
|
|
12
|
+
properties: { city: { type: "string" } },
|
|
13
|
+
required: ["city"],
|
|
14
|
+
},
|
|
15
|
+
},
|
|
16
|
+
];
|
|
17
|
+
|
|
18
|
+
describe("toVercelTools", () => {
|
|
19
|
+
test("produces one Vercel AI SDK tool per schema, keyed by name", () => {
|
|
20
|
+
const executeTool = vi.fn(async () => "sunny");
|
|
21
|
+
const tools = toVercelTools(schemas, {
|
|
22
|
+
executeTool,
|
|
23
|
+
sessionId: "s1",
|
|
24
|
+
messages: () => [],
|
|
25
|
+
});
|
|
26
|
+
expect(Object.keys(tools)).toEqual(["get_weather"]);
|
|
27
|
+
expect(tools.get_weather).toMatchObject({
|
|
28
|
+
description: "Look up the weather.",
|
|
29
|
+
});
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
test("execute delegates to ctx.executeTool with (name, args, sessionId, messages)", async () => {
|
|
33
|
+
const executeTool = vi.fn(async () => "rainy");
|
|
34
|
+
const tools = toVercelTools(schemas, {
|
|
35
|
+
executeTool,
|
|
36
|
+
sessionId: "sess-42",
|
|
37
|
+
messages: () => [{ role: "user", content: "?" }],
|
|
38
|
+
});
|
|
39
|
+
const result = await tools.get_weather?.execute?.(
|
|
40
|
+
{ city: "SF" },
|
|
41
|
+
{ toolCallId: "tc-1", messages: [] },
|
|
42
|
+
);
|
|
43
|
+
expect(executeTool).toHaveBeenCalledWith(
|
|
44
|
+
"get_weather",
|
|
45
|
+
{ city: "SF" },
|
|
46
|
+
"sess-42",
|
|
47
|
+
[{ role: "user", content: "?" }],
|
|
48
|
+
{ toolCallId: "tc-1" },
|
|
49
|
+
);
|
|
50
|
+
expect(result).toBe("rainy");
|
|
51
|
+
});
|
|
52
|
+
|
|
53
|
+
test("execute passes through abort signal when provided", async () => {
|
|
54
|
+
const controller = new AbortController();
|
|
55
|
+
const executeTool = vi.fn(
|
|
56
|
+
async (
|
|
57
|
+
_n: string,
|
|
58
|
+
_a: Readonly<Record<string, unknown>>,
|
|
59
|
+
_s?: string,
|
|
60
|
+
_m?: readonly unknown[],
|
|
61
|
+
opts?: { signal?: AbortSignal },
|
|
62
|
+
) => {
|
|
63
|
+
expect(opts?.signal).toBe(controller.signal);
|
|
64
|
+
return "ok";
|
|
65
|
+
},
|
|
66
|
+
);
|
|
67
|
+
const tools = toVercelTools(schemas, {
|
|
68
|
+
executeTool,
|
|
69
|
+
sessionId: "s",
|
|
70
|
+
messages: () => [],
|
|
71
|
+
signal: controller.signal,
|
|
72
|
+
});
|
|
73
|
+
await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-2", messages: [] });
|
|
74
|
+
expect(executeTool).toHaveBeenCalledTimes(1);
|
|
75
|
+
});
|
|
76
|
+
|
|
77
|
+
test("execute prefers options.abortSignal over ctx.signal", async () => {
|
|
78
|
+
const ctxController = new AbortController();
|
|
79
|
+
const callController = new AbortController();
|
|
80
|
+
let receivedSignal: AbortSignal | undefined;
|
|
81
|
+
const executeTool = vi.fn(
|
|
82
|
+
async (
|
|
83
|
+
_n: string,
|
|
84
|
+
_a: Readonly<Record<string, unknown>>,
|
|
85
|
+
_s?: string,
|
|
86
|
+
_m?: readonly unknown[],
|
|
87
|
+
opts?: { signal?: AbortSignal },
|
|
88
|
+
) => {
|
|
89
|
+
receivedSignal = opts?.signal;
|
|
90
|
+
return "ok";
|
|
91
|
+
},
|
|
92
|
+
);
|
|
93
|
+
const tools = toVercelTools(schemas, {
|
|
94
|
+
executeTool,
|
|
95
|
+
sessionId: "s",
|
|
96
|
+
messages: () => [],
|
|
97
|
+
signal: ctxController.signal,
|
|
98
|
+
});
|
|
99
|
+
await tools.get_weather?.execute?.(
|
|
100
|
+
{ city: "NY" },
|
|
101
|
+
{ toolCallId: "tc-1", messages: [], abortSignal: callController.signal },
|
|
102
|
+
);
|
|
103
|
+
expect(receivedSignal).toBe(callController.signal);
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
test("execute falls back to ctx.signal when options.abortSignal is absent", async () => {
|
|
107
|
+
const ctxController = new AbortController();
|
|
108
|
+
let receivedSignal: AbortSignal | undefined;
|
|
109
|
+
const executeTool = vi.fn(
|
|
110
|
+
async (
|
|
111
|
+
_n: string,
|
|
112
|
+
_a: Readonly<Record<string, unknown>>,
|
|
113
|
+
_s?: string,
|
|
114
|
+
_m?: readonly unknown[],
|
|
115
|
+
opts?: { signal?: AbortSignal },
|
|
116
|
+
) => {
|
|
117
|
+
receivedSignal = opts?.signal;
|
|
118
|
+
return "ok";
|
|
119
|
+
},
|
|
120
|
+
);
|
|
121
|
+
const tools = toVercelTools(schemas, {
|
|
122
|
+
executeTool,
|
|
123
|
+
sessionId: "s",
|
|
124
|
+
messages: () => [],
|
|
125
|
+
signal: ctxController.signal,
|
|
126
|
+
});
|
|
127
|
+
await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-2", messages: [] });
|
|
128
|
+
expect(receivedSignal).toBe(ctxController.signal);
|
|
129
|
+
});
|
|
130
|
+
|
|
131
|
+
test("execute propagates toolCallId from options", async () => {
|
|
132
|
+
let receivedCallId: string | undefined;
|
|
133
|
+
const executeTool = vi.fn(
|
|
134
|
+
async (
|
|
135
|
+
_n: string,
|
|
136
|
+
_a: Readonly<Record<string, unknown>>,
|
|
137
|
+
_s?: string,
|
|
138
|
+
_m?: readonly unknown[],
|
|
139
|
+
opts?: { toolCallId?: string },
|
|
140
|
+
) => {
|
|
141
|
+
receivedCallId = opts?.toolCallId;
|
|
142
|
+
return "ok";
|
|
143
|
+
},
|
|
144
|
+
);
|
|
145
|
+
const tools = toVercelTools(schemas, {
|
|
146
|
+
executeTool,
|
|
147
|
+
sessionId: "s",
|
|
148
|
+
messages: () => [],
|
|
149
|
+
});
|
|
150
|
+
await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-3", messages: [] });
|
|
151
|
+
expect(receivedCallId).toBe("tc-3");
|
|
152
|
+
});
|
|
153
|
+
});
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
/**
|
|
3
|
+
* Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
|
|
4
|
+
* delegation to the agent's {@link ExecuteTool} function.
|
|
5
|
+
*
|
|
6
|
+
* The pipeline orchestrator passes the output to `streamText({ tools })`.
|
|
7
|
+
* Each produced tool's `execute` closure calls
|
|
8
|
+
* `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
|
|
9
|
+
* so the existing agent tool infrastructure (argument validation, KV, hooks,
|
|
10
|
+
* timeout) remains the single source of truth for tool behavior.
|
|
11
|
+
*
|
|
12
|
+
* Per-call `options.abortSignal` (forwarded by `streamText` when the
|
|
13
|
+
* outer turn is aborted, e.g. barge-in) takes precedence over the
|
|
14
|
+
* bag-level `ctx.signal` so individual invocations respect streamText
|
|
15
|
+
* aborts.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { jsonSchema, type Tool, type ToolExecutionOptions, tool } from "ai";
|
|
19
|
+
import type { ExecuteTool, ExecuteToolOptions, ToolSchema } from "../sdk/_internal-types.ts";
|
|
20
|
+
import type { Message } from "../sdk/types.ts";
|
|
21
|
+
|
|
22
|
+
export interface ToVercelToolsContext {
|
|
23
|
+
/** The agent's tool-execution function (from the runtime). */
|
|
24
|
+
executeTool: ExecuteTool;
|
|
25
|
+
/** Session id threaded to {@link executeTool}. */
|
|
26
|
+
sessionId: string;
|
|
27
|
+
/**
|
|
28
|
+
* Returns the current conversation history at call-time. Called per
|
|
29
|
+
* tool invocation so late calls see fresh state instead of a snapshot
|
|
30
|
+
* captured when the tool bag was built.
|
|
31
|
+
*/
|
|
32
|
+
messages: () => readonly Message[];
|
|
33
|
+
/**
|
|
34
|
+
* Bag-level abort signal. Used as a fallback when the per-call
|
|
35
|
+
* `options.abortSignal` from Vercel's `ToolExecutionOptions` is absent.
|
|
36
|
+
*/
|
|
37
|
+
signal?: AbortSignal;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
/**
|
|
41
|
+
* Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
|
|
42
|
+
* (record keyed by tool name).
|
|
43
|
+
*
|
|
44
|
+
* Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
|
|
45
|
+
* the agent's JSON Schema `parameters`. Execution is delegated to
|
|
46
|
+
* `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
|
|
47
|
+
*/
|
|
48
|
+
export function toVercelTools(
|
|
49
|
+
schemas: readonly ToolSchema[],
|
|
50
|
+
ctx: ToVercelToolsContext,
|
|
51
|
+
): Record<string, Tool> {
|
|
52
|
+
const out: Record<string, Tool> = {};
|
|
53
|
+
for (const schema of schemas) {
|
|
54
|
+
out[schema.name] = tool({
|
|
55
|
+
description: schema.description,
|
|
56
|
+
inputSchema: jsonSchema(schema.parameters),
|
|
57
|
+
execute: async (args: unknown, options: ToolExecutionOptions) => {
|
|
58
|
+
const input = (args ?? {}) as Readonly<Record<string, unknown>>;
|
|
59
|
+
// Prefer the per-call abortSignal forwarded by streamText over the
|
|
60
|
+
// bag-level ctx.signal so individual invocations respect aborts.
|
|
61
|
+
const signal = options.abortSignal ?? ctx.signal;
|
|
62
|
+
const opts: ExecuteToolOptions = {};
|
|
63
|
+
if (signal !== undefined) opts.signal = signal;
|
|
64
|
+
if (options.toolCallId !== undefined) opts.toolCallId = options.toolCallId;
|
|
65
|
+
return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages(), opts);
|
|
66
|
+
},
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
return out;
|
|
70
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@alexkroman1/aai",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.3.0",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"exports": {
|
|
6
6
|
".": {
|
|
@@ -22,9 +22,22 @@
|
|
|
22
22
|
"@dev/source": "./sdk/manifest-barrel.ts",
|
|
23
23
|
"types": "./dist/sdk/manifest-barrel.d.ts",
|
|
24
24
|
"import": "./dist/sdk/manifest-barrel.js"
|
|
25
|
+
},
|
|
26
|
+
"./stt": {
|
|
27
|
+
"@dev/source": "./host/providers/stt-barrel.ts",
|
|
28
|
+
"types": "./dist/host/providers/stt-barrel.d.ts",
|
|
29
|
+
"import": "./dist/host/providers/stt-barrel.js"
|
|
30
|
+
},
|
|
31
|
+
"./tts": {
|
|
32
|
+
"@dev/source": "./host/providers/tts-barrel.ts",
|
|
33
|
+
"types": "./dist/host/providers/tts-barrel.d.ts",
|
|
34
|
+
"import": "./dist/host/providers/tts-barrel.js"
|
|
25
35
|
}
|
|
26
36
|
},
|
|
27
37
|
"dependencies": {
|
|
38
|
+
"@cartesia/cartesia-js": "^3.0.0",
|
|
39
|
+
"ai": "^6.0.161",
|
|
40
|
+
"assemblyai": "^4.30.0",
|
|
28
41
|
"escape-html": "^1.0.3",
|
|
29
42
|
"html-to-text": "^9.0.5",
|
|
30
43
|
"mime-types": "^3.0.2",
|
|
@@ -35,6 +48,7 @@
|
|
|
35
48
|
"zod": "^4.3.6"
|
|
36
49
|
},
|
|
37
50
|
"devDependencies": {
|
|
51
|
+
"@ai-sdk/openai": "^3.0.0",
|
|
38
52
|
"@types/escape-html": "^1.0.4",
|
|
39
53
|
"@types/html-to-text": "^9.0.4",
|
|
40
54
|
"@types/json-schema": "^7.0.15",
|
package/sdk/_internal-types.ts
CHANGED
|
@@ -10,6 +10,21 @@ import { z } from "zod";
|
|
|
10
10
|
import type { Message } from "./types.ts";
|
|
11
11
|
import { BuiltinToolSchema, ToolChoiceSchema, type ToolDef } from "./types.ts";
|
|
12
12
|
|
|
13
|
+
/**
|
|
14
|
+
* Options forwarded to an {@link ExecuteTool} invocation.
|
|
15
|
+
*
|
|
16
|
+
* Primarily used by the pipeline orchestrator (streamText tool loop) to
|
|
17
|
+
* thread an {@link AbortSignal} into tool execution. The S2S voice path
|
|
18
|
+
* does not pass these options today — recipients must treat the whole
|
|
19
|
+
* bag as optional.
|
|
20
|
+
*/
|
|
21
|
+
export interface ExecuteToolOptions {
|
|
22
|
+
/** Abort signal bound to the enclosing LLM turn / request. */
|
|
23
|
+
signal?: AbortSignal;
|
|
24
|
+
/** Vercel AI SDK tool-call ID for this invocation. Useful for tracing and correlation. */
|
|
25
|
+
toolCallId?: string;
|
|
26
|
+
}
|
|
27
|
+
|
|
13
28
|
/**
|
|
14
29
|
* Function signature for executing a tool by name.
|
|
15
30
|
*
|
|
@@ -21,6 +36,7 @@ export type ExecuteTool = (
|
|
|
21
36
|
args: Readonly<Record<string, unknown>>,
|
|
22
37
|
sessionId?: string,
|
|
23
38
|
messages?: readonly Message[],
|
|
39
|
+
opts?: ExecuteToolOptions,
|
|
24
40
|
) => Promise<string>;
|
|
25
41
|
|
|
26
42
|
// ─── AgentConfig ────────────────────────────────────────────────────────────
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
import { expectTypeOf, test } from "vitest";
|
|
3
|
+
import { agent } from "./define.ts";
|
|
4
|
+
import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
|
|
5
|
+
|
|
6
|
+
test("agent() accepts stt/llm/tts optional fields", () => {
|
|
7
|
+
const stt = {} as SttProvider;
|
|
8
|
+
const llm = {} as LlmProvider;
|
|
9
|
+
const tts = {} as TtsProvider;
|
|
10
|
+
const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
|
|
11
|
+
expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
|
|
12
|
+
expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
|
|
13
|
+
expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
test("agent() without stt/llm/tts is still legal (s2s mode)", () => {
|
|
17
|
+
const def = agent({ name: "t", systemPrompt: "p" });
|
|
18
|
+
expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
|
|
19
|
+
expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
|
|
20
|
+
expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
|
|
21
|
+
});
|
package/sdk/define.test.ts
CHANGED
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
import { describe, expect, test } from "vitest";
|
|
3
3
|
import { z } from "zod";
|
|
4
4
|
import { agent, tool } from "./define.ts";
|
|
5
|
+
import { parseManifest } from "./manifest.ts";
|
|
6
|
+
import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
|
|
5
7
|
|
|
6
8
|
describe("tool()", () => {
|
|
7
9
|
test("returns the definition unchanged", () => {
|
|
@@ -54,4 +56,35 @@ describe("agent()", () => {
|
|
|
54
56
|
expect(def.tools.greet).toBe(greetTool);
|
|
55
57
|
expect(def.builtinTools).toEqual(["web_search"]);
|
|
56
58
|
});
|
|
59
|
+
|
|
60
|
+
test("preserves stt/llm/tts providers on the returned def", () => {
|
|
61
|
+
const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
|
|
62
|
+
const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
|
|
63
|
+
const llm = {} as LlmProvider;
|
|
64
|
+
const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
|
|
65
|
+
expect(def.stt).toBe(stt);
|
|
66
|
+
expect(def.llm).toBe(llm);
|
|
67
|
+
expect(def.tts).toBe(tts);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
test("stt/llm/tts flow through parseManifest to mode 'pipeline'", () => {
|
|
71
|
+
const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
|
|
72
|
+
const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
|
|
73
|
+
const llm = {} as LlmProvider;
|
|
74
|
+
const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
|
|
75
|
+
const parsed = parseManifest(def);
|
|
76
|
+
expect(parsed.mode).toBe("pipeline");
|
|
77
|
+
expect(parsed.stt).toBe(stt);
|
|
78
|
+
expect(parsed.llm).toBe(llm);
|
|
79
|
+
expect(parsed.tts).toBe(tts);
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
test("agent without providers resolves to mode 's2s'", () => {
|
|
83
|
+
const def = agent({ name: "t", systemPrompt: "p" });
|
|
84
|
+
const parsed = parseManifest(def);
|
|
85
|
+
expect(parsed.mode).toBe("s2s");
|
|
86
|
+
expect(parsed.stt).toBeUndefined();
|
|
87
|
+
expect(parsed.llm).toBeUndefined();
|
|
88
|
+
expect(parsed.tts).toBeUndefined();
|
|
89
|
+
});
|
|
57
90
|
});
|
package/sdk/define.ts
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
*/
|
|
5
5
|
|
|
6
6
|
import type { z } from "zod";
|
|
7
|
+
import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
|
|
7
8
|
import {
|
|
8
9
|
type AgentDef,
|
|
9
10
|
type BuiltinTool,
|
|
@@ -65,6 +66,11 @@ export function tool<P extends z.ZodObject<z.ZodRawShape>>(def: {
|
|
|
65
66
|
* });
|
|
66
67
|
* ```
|
|
67
68
|
*
|
|
69
|
+
* @remarks
|
|
70
|
+
* Pipeline mode: pass `stt`, `llm`, and `tts` together to switch from the
|
|
71
|
+
* default AssemblyAI Streaming Speech-to-Speech path to a pluggable
|
|
72
|
+
* STT → LLM → TTS pipeline. All three must be set (or all left unset).
|
|
73
|
+
*
|
|
68
74
|
* @public
|
|
69
75
|
*/
|
|
70
76
|
export function agent(def: {
|
|
@@ -77,6 +83,21 @@ export function agent(def: {
|
|
|
77
83
|
toolChoice?: ToolChoice;
|
|
78
84
|
sttPrompt?: string;
|
|
79
85
|
idleTimeoutMs?: number;
|
|
86
|
+
/**
|
|
87
|
+
* Pluggable STT provider. Must be set together with `llm` and `tts` to
|
|
88
|
+
* enable pipeline mode; leave all three unset for S2S mode.
|
|
89
|
+
*/
|
|
90
|
+
stt?: SttProvider;
|
|
91
|
+
/**
|
|
92
|
+
* Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
|
|
93
|
+
* together with `stt` and `tts` to enable pipeline mode.
|
|
94
|
+
*/
|
|
95
|
+
llm?: LlmProvider;
|
|
96
|
+
/**
|
|
97
|
+
* Pluggable TTS provider. Must be set together with `stt` and `llm` to
|
|
98
|
+
* enable pipeline mode.
|
|
99
|
+
*/
|
|
100
|
+
tts?: TtsProvider;
|
|
80
101
|
}): AgentDef {
|
|
81
102
|
return {
|
|
82
103
|
systemPrompt: DEFAULT_SYSTEM_PROMPT,
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
// Copyright 2025 the AAI authors. MIT license.
|
|
2
|
+
import { expectTypeOf, test } from "vitest";
|
|
3
|
+
import type { Manifest } from "./manifest.ts";
|
|
4
|
+
|
|
5
|
+
test("Manifest.stt/llm/tts are optional", () => {
|
|
6
|
+
expectTypeOf<Manifest["stt"]>().toBeNullable();
|
|
7
|
+
expectTypeOf<Manifest["llm"]>().toBeNullable();
|
|
8
|
+
expectTypeOf<Manifest["tts"]>().toBeNullable();
|
|
9
|
+
});
|
|
10
|
+
|
|
11
|
+
test("parseManifest return includes mode", () => {
|
|
12
|
+
type Parsed = ReturnType<typeof import("./manifest.ts").parseManifest>;
|
|
13
|
+
expectTypeOf<Parsed["mode"]>().toEqualTypeOf<"s2s" | "pipeline">();
|
|
14
|
+
});
|