@alexkroman1/aai 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/.turbo/turbo-build.log +14 -12
  2. package/CHANGELOG.md +14 -0
  3. package/dist/host/_pipeline-test-fakes.d.ts +107 -0
  4. package/dist/host/pipeline-session-ctx.d.ts +24 -0
  5. package/dist/host/pipeline-session.d.ts +48 -0
  6. package/dist/host/providers/llm.d.ts +2 -0
  7. package/dist/host/providers/stt/assemblyai.d.ts +31 -0
  8. package/dist/host/providers/stt-barrel.d.ts +8 -0
  9. package/dist/host/providers/stt-barrel.js +92 -0
  10. package/dist/host/providers/stt.d.ts +2 -0
  11. package/dist/host/providers/tts/cartesia.d.ts +39 -0
  12. package/dist/host/providers/tts-barrel.d.ts +8 -0
  13. package/dist/host/providers/tts-barrel.js +182 -0
  14. package/dist/host/providers/tts.d.ts +2 -0
  15. package/dist/host/runtime-barrel.js +498 -80
  16. package/dist/host/runtime.d.ts +17 -0
  17. package/dist/host/s2s.d.ts +5 -0
  18. package/dist/host/session-ctx.d.ts +22 -4
  19. package/dist/host/to-vercel-tools.d.ts +44 -0
  20. package/dist/index.js +5 -0
  21. package/dist/sdk/_internal-types.d.ts +15 -1
  22. package/dist/sdk/define.d.ts +21 -0
  23. package/dist/sdk/manifest.d.ts +22 -0
  24. package/dist/sdk/protocol.d.ts +3 -3
  25. package/dist/sdk/providers.d.ts +70 -0
  26. package/dist/sdk/types.d.ts +16 -0
  27. package/exports-no-dev-deps.test.ts +39 -14
  28. package/host/_pipeline-test-fakes.ts +323 -0
  29. package/host/_test-utils.ts +1 -0
  30. package/host/integration/fixtures/README.md +49 -0
  31. package/host/integration/pipeline-reference.integration.test.ts +124 -0
  32. package/host/pipeline-session-ctx.test.ts +31 -0
  33. package/host/pipeline-session-ctx.ts +36 -0
  34. package/host/pipeline-session.test.ts +337 -0
  35. package/host/pipeline-session.ts +405 -0
  36. package/host/providers/llm.ts +3 -0
  37. package/host/providers/providers.test-d.ts +31 -0
  38. package/host/providers/stt/assemblyai.test.ts +100 -0
  39. package/host/providers/stt/assemblyai.ts +154 -0
  40. package/host/providers/stt/fixtures/assemblyai/basic-turn.json +30 -0
  41. package/host/providers/stt-barrel.ts +13 -0
  42. package/host/providers/stt.ts +3 -0
  43. package/host/providers/tts/cartesia.test.ts +210 -0
  44. package/host/providers/tts/cartesia.ts +251 -0
  45. package/host/providers/tts-barrel.ts +13 -0
  46. package/host/providers/tts.ts +3 -0
  47. package/host/runtime.test.ts +81 -1
  48. package/host/runtime.ts +61 -0
  49. package/host/s2s.test.ts +19 -0
  50. package/host/s2s.ts +10 -0
  51. package/host/session-ctx.ts +35 -8
  52. package/host/to-vercel-tools.test.ts +153 -0
  53. package/host/to-vercel-tools.ts +70 -0
  54. package/package.json +15 -1
  55. package/sdk/__snapshots__/exports.test.ts.snap +1 -0
  56. package/sdk/_internal-types.ts +16 -0
  57. package/sdk/define.test-d.ts +21 -0
  58. package/sdk/define.test.ts +33 -0
  59. package/sdk/define.ts +21 -0
  60. package/sdk/manifest.test-d.ts +14 -0
  61. package/sdk/manifest.test.ts +51 -0
  62. package/sdk/manifest.ts +39 -0
  63. package/sdk/providers.ts +90 -0
  64. package/sdk/types.ts +16 -0
  65. package/vitest.config.ts +1 -0
package/host/runtime.ts CHANGED
@@ -14,9 +14,11 @@ import { DEFAULT_SHUTDOWN_TIMEOUT_MS } from "../sdk/constants.ts";
14
14
  import type { Kv } from "../sdk/kv.ts";
15
15
  import type { ClientSink } from "../sdk/protocol.ts";
16
16
  import { buildReadyConfig, type ReadyConfig } from "../sdk/protocol.ts";
17
+ import type { LlmProvider, SttProvider, TtsProvider } from "../sdk/providers.ts";
17
18
  import type { AgentDef } from "../sdk/types.ts";
18
19
  import { toolError } from "../sdk/utils.ts";
19
20
  import { resolveAllBuiltins } from "./builtin-tools.ts";
21
+ import { createPipelineSession } from "./pipeline-session.ts";
20
22
  import type { Logger, S2SConfig } from "./runtime-config.ts";
21
23
  import { consoleLogger, DEFAULT_S2S_CONFIG } from "./runtime-config.ts";
22
24
  import type { CreateS2sWebSocket } from "./s2s.ts";
@@ -58,6 +60,18 @@ function createLocalKv(): Kv {
58
60
  return createUnstorageKv({ storage: createStorage() });
59
61
  }
60
62
 
63
+ /**
64
+ * Resolve an API key host-side for pipeline providers.
65
+ *
66
+ * Checks the agent's declared env first, then the host process env as a
67
+ * fallback. Returns `""` when absent — pipeline providers surface a clear
68
+ * `MissingCredentialsError` via their `open()` that the orchestrator
69
+ * converts to a `session.error` wire event.
70
+ */
71
+ function resolveApiKey(envVar: string, env: Record<string, string>): string {
72
+ return env[envVar] ?? process.env[envVar] ?? "";
73
+ }
74
+
61
75
  /**
62
76
  * Configuration for {@link createRuntime}.
63
77
  *
@@ -111,6 +125,22 @@ export type RuntimeOptions = {
111
125
  * their own fetch wrapper.
112
126
  */
113
127
  fetch?: typeof globalThis.fetch | undefined;
128
+ /**
129
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
130
+ * route sessions through the pipeline path; leave all three unset for
131
+ * the default AssemblyAI Streaming Speech-to-Speech (S2S) path.
132
+ */
133
+ stt?: SttProvider | undefined;
134
+ /**
135
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
136
+ * together with `stt` and `tts` to route sessions through the pipeline path.
137
+ */
138
+ llm?: LlmProvider | undefined;
139
+ /**
140
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
141
+ * route sessions through the pipeline path.
142
+ */
143
+ tts?: TtsProvider | undefined;
114
144
  };
115
145
 
116
146
  /**
@@ -160,6 +190,14 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
160
190
  sessionStartTimeoutMs,
161
191
  shutdownTimeoutMs = DEFAULT_SHUTDOWN_TIMEOUT_MS,
162
192
  } = opts;
193
+ // Derive session mode from the provider triple: all three set ⇒ pipeline,
194
+ // none set ⇒ s2s. Anything in-between is a configuration error.
195
+ const providerCount =
196
+ (opts.stt != null ? 1 : 0) + (opts.llm != null ? 1 : 0) + (opts.tts != null ? 1 : 0);
197
+ if (providerCount !== 0 && providerCount !== 3) {
198
+ throw new Error("stt, llm, and tts must be set together");
199
+ }
200
+ const mode: "s2s" | "pipeline" = providerCount === 3 ? "pipeline" : "s2s";
163
201
  const agentConfig = toAgentConfig(agent);
164
202
  const sessions = new Map<string, Session>();
165
203
  const sinkMap = new Map<string, ClientSink>();
@@ -241,6 +279,29 @@ export function createRuntime(opts: RuntimeOptions): Runtime {
241
279
  resumeFrom?: string;
242
280
  }): Session {
243
281
  sinkMap.set(sessionOpts.id, sessionOpts.client);
282
+ if (mode === "pipeline") {
283
+ // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
284
+ const stt = opts.stt!;
285
+ // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
286
+ const llm = opts.llm!;
287
+ // biome-ignore lint/style/noNonNullAssertion: providerCount === 3 ⇒ all set
288
+ const tts = opts.tts!;
289
+ return createPipelineSession({
290
+ id: sessionOpts.id,
291
+ agent: sessionOpts.agent,
292
+ client: sessionOpts.client,
293
+ agentConfig,
294
+ toolSchemas,
295
+ toolGuidance,
296
+ executeTool,
297
+ stt,
298
+ llm,
299
+ tts,
300
+ sttApiKey: resolveApiKey("ASSEMBLYAI_API_KEY", env),
301
+ ttsApiKey: resolveApiKey("CARTESIA_API_KEY", env),
302
+ logger,
303
+ });
304
+ }
244
305
  const apiKey = env.ASSEMBLYAI_API_KEY ?? "";
245
306
  return createS2sSession({
246
307
  id: sessionOpts.id,
package/host/s2s.test.ts CHANGED
@@ -66,6 +66,7 @@ describe("connectS2s", () => {
66
66
  expect(handle).toEqual(
67
67
  expect.objectContaining({
68
68
  sendAudio: expect.any(Function),
69
+ sendAudioRaw: expect.any(Function),
69
70
  sendToolResult: expect.any(Function),
70
71
  updateSession: expect.any(Function),
71
72
  resumeSession: expect.any(Function),
@@ -125,6 +126,24 @@ describe("connectS2s", () => {
125
126
  expect(raw.send).not.toHaveBeenCalled();
126
127
  });
127
128
 
129
+ test("sendAudioRaw forwards the exact string to the socket", async () => {
130
+ const { raw, handle } = await setupHandle();
131
+
132
+ const frame = '{"type":"input.audio","audio":"abc"}';
133
+ handle.sendAudioRaw(frame);
134
+
135
+ expect(raw.send).toHaveBeenCalledOnce();
136
+ expect(raw.send.mock.calls[0]?.[0]).toBe(frame);
137
+ });
138
+
139
+ test("sendAudioRaw is no-op when ws is not open", async () => {
140
+ const { raw, handle } = await setupHandle();
141
+ raw.readyState = 3; // CLOSED
142
+
143
+ handle.sendAudioRaw('{"type":"input.audio","audio":"abc"}');
144
+ expect(raw.send).not.toHaveBeenCalled();
145
+ });
146
+
128
147
  test("sendToolResult sends tool.result message", async () => {
129
148
  const { raw, handle } = await setupHandle();
130
149
 
package/host/s2s.ts CHANGED
@@ -159,6 +159,11 @@ export type S2sEvents = {
159
159
  export type S2sHandle = {
160
160
  on<K extends keyof S2sEvents>(event: K, cb: S2sEvents[K]): Unsubscribe;
161
161
  sendAudio(audio: Uint8Array): void;
162
+ /**
163
+ * Send a pre-encoded audio wire frame. For perf-critical callers (load tests)
164
+ * that batch-encode up front. Skips logging; caller owns wire format.
165
+ */
166
+ sendAudioRaw(jsonFrame: string): void;
162
167
  sendToolResult(callId: string, result: string): void;
163
168
  updateSession(config: S2sSessionConfig): void;
164
169
  resumeSession(sessionId: string): void;
@@ -212,6 +217,11 @@ export function connectS2s(opts: ConnectS2sOptions): Promise<S2sHandle> {
212
217
  ws.send(`{"type":"input.audio","audio":"${uint8ToBase64(audio)}"}`);
213
218
  },
214
219
 
220
+ sendAudioRaw(jsonFrame: string): void {
221
+ if (ws.readyState !== WS_OPEN) return;
222
+ ws.send(jsonFrame);
223
+ },
224
+
215
225
  sendToolResult(callId: string, result: string): void {
216
226
  const msg = { type: "tool.result", call_id: callId, result };
217
227
  log.info("S2S >> tool.result", { call_id: callId, resultLength: result.length });
@@ -30,15 +30,18 @@ export type SessionDeps = {
30
30
  };
31
31
 
32
32
  /**
33
- * Session context threaded through event handlers.
33
+ * Transport-agnostic session context shared by S2S and pipeline sessions.
34
+ *
35
+ * Owns reply lifecycle, conversation history (with sliding-window truncation),
36
+ * and per-turn tool-call step enforcement. Transport-specific fields (e.g.
37
+ * `s2s` for S2S, `stt`/`tts` for the pipeline) live on the extending types.
34
38
  *
35
39
  * Split into three layers:
36
40
  * - {@link SessionDeps} — immutable dependencies (set once)
37
41
  * - {@link ReplyState} via `reply` — per-reply mutable state (reset on beginReply/cancelReply)
38
- * - Remaining fields — connection, conversation, and lifecycle methods
42
+ * - Remaining fields — conversation and lifecycle methods
39
43
  */
40
- export type S2sSessionCtx = SessionDeps & {
41
- s2s: S2sHandle | null;
44
+ export type BaseSessionCtx = SessionDeps & {
42
45
  reply: ReplyState;
43
46
  turnPromise: Promise<void> | null;
44
47
  conversationMessages: Message[];
@@ -50,7 +53,14 @@ export type S2sSessionCtx = SessionDeps & {
50
53
  chainTurn(p: Promise<void>): void;
51
54
  };
52
55
 
53
- export function buildCtx(opts: {
56
+ /**
57
+ * S2S session context — {@link BaseSessionCtx} plus the S2S WebSocket handle.
58
+ */
59
+ export type S2sSessionCtx = BaseSessionCtx & {
60
+ s2s: S2sHandle | null;
61
+ };
62
+
63
+ export function _buildBaseCtx(opts: {
54
64
  id: string;
55
65
  agent: string;
56
66
  client: ClientSink;
@@ -58,12 +68,11 @@ export function buildCtx(opts: {
58
68
  executeTool: ExecuteTool;
59
69
  log: Logger;
60
70
  maxHistory?: number | undefined;
61
- }): S2sSessionCtx {
71
+ }): BaseSessionCtx {
62
72
  const { agentConfig, log } = opts;
63
73
  const maxHistory = opts.maxHistory ?? DEFAULT_MAX_HISTORY;
64
- const ctx: S2sSessionCtx = {
74
+ const ctx: BaseSessionCtx = {
65
75
  ...opts,
66
- s2s: null,
67
76
  reply: { pendingTools: [], toolCallCount: 0, currentReplyId: null },
68
77
  turnPromise: null,
69
78
  conversationMessages: [],
@@ -105,3 +114,21 @@ export function buildCtx(opts: {
105
114
  };
106
115
  return ctx;
107
116
  }
117
+
118
+ export function buildCtx(opts: {
119
+ id: string;
120
+ agent: string;
121
+ client: ClientSink;
122
+ agentConfig: AgentConfig;
123
+ executeTool: ExecuteTool;
124
+ log: Logger;
125
+ maxHistory?: number | undefined;
126
+ }): S2sSessionCtx {
127
+ // Mutate the base ctx in place rather than spreading into a new object —
128
+ // the helper methods close over the base ctx reference, so spreading would
129
+ // leave them writing to an orphan object (e.g. `beginReply` would mutate
130
+ // the base `reply`, not the spread copy's `reply`).
131
+ const base = _buildBaseCtx(opts) as S2sSessionCtx;
132
+ base.s2s = null;
133
+ return base;
134
+ }
@@ -0,0 +1,153 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import { describe, expect, test, vi } from "vitest";
3
+ import type { ToolSchema } from "../sdk/_internal-types.ts";
4
+ import { toVercelTools } from "./to-vercel-tools.ts";
5
+
6
+ const schemas: ToolSchema[] = [
7
+ {
8
+ name: "get_weather",
9
+ description: "Look up the weather.",
10
+ parameters: {
11
+ type: "object",
12
+ properties: { city: { type: "string" } },
13
+ required: ["city"],
14
+ },
15
+ },
16
+ ];
17
+
18
+ describe("toVercelTools", () => {
19
+ test("produces one Vercel AI SDK tool per schema, keyed by name", () => {
20
+ const executeTool = vi.fn(async () => "sunny");
21
+ const tools = toVercelTools(schemas, {
22
+ executeTool,
23
+ sessionId: "s1",
24
+ messages: () => [],
25
+ });
26
+ expect(Object.keys(tools)).toEqual(["get_weather"]);
27
+ expect(tools.get_weather).toMatchObject({
28
+ description: "Look up the weather.",
29
+ });
30
+ });
31
+
32
+ test("execute delegates to ctx.executeTool with (name, args, sessionId, messages)", async () => {
33
+ const executeTool = vi.fn(async () => "rainy");
34
+ const tools = toVercelTools(schemas, {
35
+ executeTool,
36
+ sessionId: "sess-42",
37
+ messages: () => [{ role: "user", content: "?" }],
38
+ });
39
+ const result = await tools.get_weather?.execute?.(
40
+ { city: "SF" },
41
+ { toolCallId: "tc-1", messages: [] },
42
+ );
43
+ expect(executeTool).toHaveBeenCalledWith(
44
+ "get_weather",
45
+ { city: "SF" },
46
+ "sess-42",
47
+ [{ role: "user", content: "?" }],
48
+ { toolCallId: "tc-1" },
49
+ );
50
+ expect(result).toBe("rainy");
51
+ });
52
+
53
+ test("execute passes through abort signal when provided", async () => {
54
+ const controller = new AbortController();
55
+ const executeTool = vi.fn(
56
+ async (
57
+ _n: string,
58
+ _a: Readonly<Record<string, unknown>>,
59
+ _s?: string,
60
+ _m?: readonly unknown[],
61
+ opts?: { signal?: AbortSignal },
62
+ ) => {
63
+ expect(opts?.signal).toBe(controller.signal);
64
+ return "ok";
65
+ },
66
+ );
67
+ const tools = toVercelTools(schemas, {
68
+ executeTool,
69
+ sessionId: "s",
70
+ messages: () => [],
71
+ signal: controller.signal,
72
+ });
73
+ await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-2", messages: [] });
74
+ expect(executeTool).toHaveBeenCalledTimes(1);
75
+ });
76
+
77
+ test("execute prefers options.abortSignal over ctx.signal", async () => {
78
+ const ctxController = new AbortController();
79
+ const callController = new AbortController();
80
+ let receivedSignal: AbortSignal | undefined;
81
+ const executeTool = vi.fn(
82
+ async (
83
+ _n: string,
84
+ _a: Readonly<Record<string, unknown>>,
85
+ _s?: string,
86
+ _m?: readonly unknown[],
87
+ opts?: { signal?: AbortSignal },
88
+ ) => {
89
+ receivedSignal = opts?.signal;
90
+ return "ok";
91
+ },
92
+ );
93
+ const tools = toVercelTools(schemas, {
94
+ executeTool,
95
+ sessionId: "s",
96
+ messages: () => [],
97
+ signal: ctxController.signal,
98
+ });
99
+ await tools.get_weather?.execute?.(
100
+ { city: "NY" },
101
+ { toolCallId: "tc-1", messages: [], abortSignal: callController.signal },
102
+ );
103
+ expect(receivedSignal).toBe(callController.signal);
104
+ });
105
+
106
+ test("execute falls back to ctx.signal when options.abortSignal is absent", async () => {
107
+ const ctxController = new AbortController();
108
+ let receivedSignal: AbortSignal | undefined;
109
+ const executeTool = vi.fn(
110
+ async (
111
+ _n: string,
112
+ _a: Readonly<Record<string, unknown>>,
113
+ _s?: string,
114
+ _m?: readonly unknown[],
115
+ opts?: { signal?: AbortSignal },
116
+ ) => {
117
+ receivedSignal = opts?.signal;
118
+ return "ok";
119
+ },
120
+ );
121
+ const tools = toVercelTools(schemas, {
122
+ executeTool,
123
+ sessionId: "s",
124
+ messages: () => [],
125
+ signal: ctxController.signal,
126
+ });
127
+ await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-2", messages: [] });
128
+ expect(receivedSignal).toBe(ctxController.signal);
129
+ });
130
+
131
+ test("execute propagates toolCallId from options", async () => {
132
+ let receivedCallId: string | undefined;
133
+ const executeTool = vi.fn(
134
+ async (
135
+ _n: string,
136
+ _a: Readonly<Record<string, unknown>>,
137
+ _s?: string,
138
+ _m?: readonly unknown[],
139
+ opts?: { toolCallId?: string },
140
+ ) => {
141
+ receivedCallId = opts?.toolCallId;
142
+ return "ok";
143
+ },
144
+ );
145
+ const tools = toVercelTools(schemas, {
146
+ executeTool,
147
+ sessionId: "s",
148
+ messages: () => [],
149
+ });
150
+ await tools.get_weather?.execute?.({ city: "NY" }, { toolCallId: "tc-3", messages: [] });
151
+ expect(receivedCallId).toBe("tc-3");
152
+ });
153
+ });
@@ -0,0 +1,70 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ /**
3
+ * Converts agent {@link ToolSchema}[] to Vercel AI SDK tools with `execute`
4
+ * delegation to the agent's {@link ExecuteTool} function.
5
+ *
6
+ * The pipeline orchestrator passes the output to `streamText({ tools })`.
7
+ * Each produced tool's `execute` closure calls
8
+ * `ctx.executeTool(name, args, sessionId, messages(), { signal, toolCallId })`,
9
+ * so the existing agent tool infrastructure (argument validation, KV, hooks,
10
+ * timeout) remains the single source of truth for tool behavior.
11
+ *
12
+ * Per-call `options.abortSignal` (forwarded by `streamText` when the
13
+ * outer turn is aborted, e.g. barge-in) takes precedence over the
14
+ * bag-level `ctx.signal` so individual invocations respect streamText
15
+ * aborts.
16
+ */
17
+
18
+ import { jsonSchema, type Tool, type ToolExecutionOptions, tool } from "ai";
19
+ import type { ExecuteTool, ExecuteToolOptions, ToolSchema } from "../sdk/_internal-types.ts";
20
+ import type { Message } from "../sdk/types.ts";
21
+
22
+ export interface ToVercelToolsContext {
23
+ /** The agent's tool-execution function (from the runtime). */
24
+ executeTool: ExecuteTool;
25
+ /** Session id threaded to {@link executeTool}. */
26
+ sessionId: string;
27
+ /**
28
+ * Returns the current conversation history at call-time. Called per
29
+ * tool invocation so late calls see fresh state instead of a snapshot
30
+ * captured when the tool bag was built.
31
+ */
32
+ messages: () => readonly Message[];
33
+ /**
34
+ * Bag-level abort signal. Used as a fallback when the per-call
35
+ * `options.abortSignal` from Vercel's `ToolExecutionOptions` is absent.
36
+ */
37
+ signal?: AbortSignal;
38
+ }
39
+
40
+ /**
41
+ * Convert an array of {@link ToolSchema} to a Vercel AI SDK `ToolSet`
42
+ * (record keyed by tool name).
43
+ *
44
+ * Uses the v6 `tool()` helper with `inputSchema: jsonSchema(...)` wrapping
45
+ * the agent's JSON Schema `parameters`. Execution is delegated to
46
+ * `ctx.executeTool` so validation, KV, timeouts, and hooks keep working.
47
+ */
48
+ export function toVercelTools(
49
+ schemas: readonly ToolSchema[],
50
+ ctx: ToVercelToolsContext,
51
+ ): Record<string, Tool> {
52
+ const out: Record<string, Tool> = {};
53
+ for (const schema of schemas) {
54
+ out[schema.name] = tool({
55
+ description: schema.description,
56
+ inputSchema: jsonSchema(schema.parameters),
57
+ execute: async (args: unknown, options: ToolExecutionOptions) => {
58
+ const input = (args ?? {}) as Readonly<Record<string, unknown>>;
59
+ // Prefer the per-call abortSignal forwarded by streamText over the
60
+ // bag-level ctx.signal so individual invocations respect aborts.
61
+ const signal = options.abortSignal ?? ctx.signal;
62
+ const opts: ExecuteToolOptions = {};
63
+ if (signal !== undefined) opts.signal = signal;
64
+ if (options.toolCallId !== undefined) opts.toolCallId = options.toolCallId;
65
+ return ctx.executeTool(schema.name, input, ctx.sessionId, ctx.messages(), opts);
66
+ },
67
+ });
68
+ }
69
+ return out;
70
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@alexkroman1/aai",
3
- "version": "1.2.3",
3
+ "version": "1.3.0",
4
4
  "type": "module",
5
5
  "exports": {
6
6
  ".": {
@@ -22,9 +22,22 @@
22
22
  "@dev/source": "./sdk/manifest-barrel.ts",
23
23
  "types": "./dist/sdk/manifest-barrel.d.ts",
24
24
  "import": "./dist/sdk/manifest-barrel.js"
25
+ },
26
+ "./stt": {
27
+ "@dev/source": "./host/providers/stt-barrel.ts",
28
+ "types": "./dist/host/providers/stt-barrel.d.ts",
29
+ "import": "./dist/host/providers/stt-barrel.js"
30
+ },
31
+ "./tts": {
32
+ "@dev/source": "./host/providers/tts-barrel.ts",
33
+ "types": "./dist/host/providers/tts-barrel.d.ts",
34
+ "import": "./dist/host/providers/tts-barrel.js"
25
35
  }
26
36
  },
27
37
  "dependencies": {
38
+ "@cartesia/cartesia-js": "^3.0.0",
39
+ "ai": "^6.0.161",
40
+ "assemblyai": "^4.30.0",
28
41
  "escape-html": "^1.0.3",
29
42
  "html-to-text": "^9.0.5",
30
43
  "mime-types": "^3.0.2",
@@ -35,6 +48,7 @@
35
48
  "zod": "^4.3.6"
36
49
  },
37
50
  "devDependencies": {
51
+ "@ai-sdk/openai": "^3.0.0",
38
52
  "@types/escape-html": "^1.0.4",
39
53
  "@types/html-to-text": "^9.0.4",
40
54
  "@types/json-schema": "^7.0.15",
@@ -63,6 +63,7 @@ exports[`export surface stability > @alexkroman1/aai/protocol export 1`] = `
63
63
  exports[`export surface stability > @alexkroman1/aai/runtime export 1`] = `
64
64
  [
65
65
  "DEFAULT_S2S_CONFIG",
66
+ "_buildBaseCtx",
66
67
  "_internals",
67
68
  "buildCtx",
68
69
  "consoleLogger",
@@ -10,6 +10,21 @@ import { z } from "zod";
10
10
  import type { Message } from "./types.ts";
11
11
  import { BuiltinToolSchema, ToolChoiceSchema, type ToolDef } from "./types.ts";
12
12
 
13
+ /**
14
+ * Options forwarded to an {@link ExecuteTool} invocation.
15
+ *
16
+ * Primarily used by the pipeline orchestrator (streamText tool loop) to
17
+ * thread an {@link AbortSignal} into tool execution. The S2S voice path
18
+ * does not pass these options today — recipients must treat the whole
19
+ * bag as optional.
20
+ */
21
+ export interface ExecuteToolOptions {
22
+ /** Abort signal bound to the enclosing LLM turn / request. */
23
+ signal?: AbortSignal;
24
+ /** Vercel AI SDK tool-call ID for this invocation. Useful for tracing and correlation. */
25
+ toolCallId?: string;
26
+ }
27
+
13
28
  /**
14
29
  * Function signature for executing a tool by name.
15
30
  *
@@ -21,6 +36,7 @@ export type ExecuteTool = (
21
36
  args: Readonly<Record<string, unknown>>,
22
37
  sessionId?: string,
23
38
  messages?: readonly Message[],
39
+ opts?: ExecuteToolOptions,
24
40
  ) => Promise<string>;
25
41
 
26
42
  // ─── AgentConfig ────────────────────────────────────────────────────────────
@@ -0,0 +1,21 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import { expectTypeOf, test } from "vitest";
3
+ import { agent } from "./define.ts";
4
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
5
+
6
+ test("agent() accepts stt/llm/tts optional fields", () => {
7
+ const stt = {} as SttProvider;
8
+ const llm = {} as LlmProvider;
9
+ const tts = {} as TtsProvider;
10
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
11
+ expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
12
+ expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
13
+ expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
14
+ });
15
+
16
+ test("agent() without stt/llm/tts is still legal (s2s mode)", () => {
17
+ const def = agent({ name: "t", systemPrompt: "p" });
18
+ expectTypeOf(def.stt).toEqualTypeOf<SttProvider | undefined>();
19
+ expectTypeOf(def.llm).toEqualTypeOf<LlmProvider | undefined>();
20
+ expectTypeOf(def.tts).toEqualTypeOf<TtsProvider | undefined>();
21
+ });
@@ -2,6 +2,8 @@
2
2
  import { describe, expect, test } from "vitest";
3
3
  import { z } from "zod";
4
4
  import { agent, tool } from "./define.ts";
5
+ import { parseManifest } from "./manifest.ts";
6
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
5
7
 
6
8
  describe("tool()", () => {
7
9
  test("returns the definition unchanged", () => {
@@ -54,4 +56,35 @@ describe("agent()", () => {
54
56
  expect(def.tools.greet).toBe(greetTool);
55
57
  expect(def.builtinTools).toEqual(["web_search"]);
56
58
  });
59
+
60
+ test("preserves stt/llm/tts providers on the returned def", () => {
61
+ const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
62
+ const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
63
+ const llm = {} as LlmProvider;
64
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
65
+ expect(def.stt).toBe(stt);
66
+ expect(def.llm).toBe(llm);
67
+ expect(def.tts).toBe(tts);
68
+ });
69
+
70
+ test("stt/llm/tts flow through parseManifest to mode 'pipeline'", () => {
71
+ const stt = { name: "fake-stt", open: async () => ({}) } as unknown as SttProvider;
72
+ const tts = { name: "fake-tts", open: async () => ({}) } as unknown as TtsProvider;
73
+ const llm = {} as LlmProvider;
74
+ const def = agent({ name: "t", systemPrompt: "p", stt, llm, tts });
75
+ const parsed = parseManifest(def);
76
+ expect(parsed.mode).toBe("pipeline");
77
+ expect(parsed.stt).toBe(stt);
78
+ expect(parsed.llm).toBe(llm);
79
+ expect(parsed.tts).toBe(tts);
80
+ });
81
+
82
+ test("agent without providers resolves to mode 's2s'", () => {
83
+ const def = agent({ name: "t", systemPrompt: "p" });
84
+ const parsed = parseManifest(def);
85
+ expect(parsed.mode).toBe("s2s");
86
+ expect(parsed.stt).toBeUndefined();
87
+ expect(parsed.llm).toBeUndefined();
88
+ expect(parsed.tts).toBeUndefined();
89
+ });
57
90
  });
package/sdk/define.ts CHANGED
@@ -4,6 +4,7 @@
4
4
  */
5
5
 
6
6
  import type { z } from "zod";
7
+ import type { LlmProvider, SttProvider, TtsProvider } from "./providers.ts";
7
8
  import {
8
9
  type AgentDef,
9
10
  type BuiltinTool,
@@ -65,6 +66,11 @@ export function tool<P extends z.ZodObject<z.ZodRawShape>>(def: {
65
66
  * });
66
67
  * ```
67
68
  *
69
+ * @remarks
70
+ * Pipeline mode: pass `stt`, `llm`, and `tts` together to switch from the
71
+ * default AssemblyAI Streaming Speech-to-Speech path to a pluggable
72
+ * STT → LLM → TTS pipeline. All three must be set (or all left unset).
73
+ *
68
74
  * @public
69
75
  */
70
76
  export function agent(def: {
@@ -77,6 +83,21 @@ export function agent(def: {
77
83
  toolChoice?: ToolChoice;
78
84
  sttPrompt?: string;
79
85
  idleTimeoutMs?: number;
86
+ /**
87
+ * Pluggable STT provider. Must be set together with `llm` and `tts` to
88
+ * enable pipeline mode; leave all three unset for S2S mode.
89
+ */
90
+ stt?: SttProvider;
91
+ /**
92
+ * Pluggable LLM provider (Vercel AI SDK `LanguageModel`). Must be set
93
+ * together with `stt` and `tts` to enable pipeline mode.
94
+ */
95
+ llm?: LlmProvider;
96
+ /**
97
+ * Pluggable TTS provider. Must be set together with `stt` and `llm` to
98
+ * enable pipeline mode.
99
+ */
100
+ tts?: TtsProvider;
80
101
  }): AgentDef {
81
102
  return {
82
103
  systemPrompt: DEFAULT_SYSTEM_PROMPT,
@@ -0,0 +1,14 @@
1
+ // Copyright 2025 the AAI authors. MIT license.
2
+ import { expectTypeOf, test } from "vitest";
3
+ import type { Manifest } from "./manifest.ts";
4
+
5
+ test("Manifest.stt/llm/tts are optional", () => {
6
+ expectTypeOf<Manifest["stt"]>().toBeNullable();
7
+ expectTypeOf<Manifest["llm"]>().toBeNullable();
8
+ expectTypeOf<Manifest["tts"]>().toBeNullable();
9
+ });
10
+
11
+ test("parseManifest return includes mode", () => {
12
+ type Parsed = ReturnType<typeof import("./manifest.ts").parseManifest>;
13
+ expectTypeOf<Parsed["mode"]>().toEqualTypeOf<"s2s" | "pipeline">();
14
+ });